@audicle/sdk 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -18
- package/dist/index.cjs +4 -0
- package/dist/index.d.cts +21 -14
- package/dist/index.d.ts +21 -14
- package/dist/index.js +4 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Audicle Node.js SDK
|
|
2
2
|
|
|
3
|
-
The official Node.js/TypeScript client for the [Audicle](https://audicle.ai) transcription API. Supports
|
|
3
|
+
The official Node.js/TypeScript client for the [Audicle](https://audicle.ai) transcription API. Supports pre-recorded transcription, real-time streaming over WebSocket, and full transcript management.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -22,7 +22,7 @@ const result = await client.transcribe({
|
|
|
22
22
|
console.log(result.result.text);
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
##
|
|
25
|
+
## Pre-Recorded Transcription
|
|
26
26
|
|
|
27
27
|
### From a file
|
|
28
28
|
|
|
@@ -49,7 +49,7 @@ const result = await client.transcribe({
|
|
|
49
49
|
const result = await client.transcribe(
|
|
50
50
|
{ file: audioBlob },
|
|
51
51
|
{
|
|
52
|
-
model: "
|
|
52
|
+
model: "audicle-o1",
|
|
53
53
|
language: "en",
|
|
54
54
|
speakerLabels: true,
|
|
55
55
|
wordTimestamps: true,
|
|
@@ -118,13 +118,13 @@ const result = await client.transcripts.wait("txn_abc123", {
|
|
|
118
118
|
});
|
|
119
119
|
```
|
|
120
120
|
|
|
121
|
-
##
|
|
121
|
+
## Streaming
|
|
122
122
|
|
|
123
123
|
Stream audio over WebSocket for live transcription:
|
|
124
124
|
|
|
125
125
|
```typescript
|
|
126
126
|
const stream = client.streaming.transcribe({
|
|
127
|
-
model: "
|
|
127
|
+
model: "audicle-o1-realtime",
|
|
128
128
|
sample_rate: 16000,
|
|
129
129
|
encoding: "pcm_s16le",
|
|
130
130
|
});
|
|
@@ -150,24 +150,17 @@ stream.sendAudio(pcmBuffer);
|
|
|
150
150
|
stream.finalize();
|
|
151
151
|
```
|
|
152
152
|
|
|
153
|
-
### Models
|
|
154
|
-
|
|
155
|
-
| Model | Description |
|
|
156
|
-
|---|---|
|
|
157
|
-
| `default` | Whisper-based streaming transcription |
|
|
158
|
-
| `turbo` | Faster Whisper variant |
|
|
159
|
-
| `deepgram-nova-3` | Deepgram Nova 3 (supports interim results + VAD) |
|
|
160
|
-
| `gpt-realtime-mini` | OpenAI Realtime |
|
|
161
|
-
|
|
162
153
|
### Observing a session
|
|
163
154
|
|
|
164
|
-
Watch a live
|
|
155
|
+
Watch a live streaming session from another client. Only active WebSocket sessions can be observed — pre-recorded transcriptions submitted via file upload cannot.
|
|
165
156
|
|
|
166
157
|
```typescript
|
|
167
158
|
const observer = client.streaming.observe("txn_abc123");
|
|
168
159
|
|
|
169
160
|
observer.on("transcript", (msg) => {
|
|
170
|
-
|
|
161
|
+
if (msg.is_final) {
|
|
162
|
+
console.log(msg.transcript.text);
|
|
163
|
+
}
|
|
171
164
|
});
|
|
172
165
|
```
|
|
173
166
|
|
|
@@ -175,8 +168,8 @@ observer.on("transcript", (msg) => {
|
|
|
175
168
|
|
|
176
169
|
```typescript
|
|
177
170
|
const usage = await client.usage.get({
|
|
178
|
-
startDate: "
|
|
179
|
-
endDate: "
|
|
171
|
+
startDate: "2026-01-01",
|
|
172
|
+
endDate: "2026-01-31",
|
|
180
173
|
granularity: "day", // "hour" | "day"
|
|
181
174
|
});
|
|
182
175
|
|
package/dist/index.cjs
CHANGED
|
@@ -348,6 +348,10 @@ var TranscriptionStream = class extends TypedEmitter {
|
|
|
348
348
|
}
|
|
349
349
|
this.#ws.send(data);
|
|
350
350
|
}
|
|
351
|
+
/** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
|
|
352
|
+
keepAlive() {
|
|
353
|
+
this.#send({ type: "keepalive" });
|
|
354
|
+
}
|
|
351
355
|
finalize() {
|
|
352
356
|
this.#send({ type: "finalize" });
|
|
353
357
|
}
|
package/dist/index.d.cts
CHANGED
|
@@ -34,16 +34,12 @@ export interface paths {
|
|
|
34
34
|
};
|
|
35
35
|
/**
|
|
36
36
|
* Stream audio for live transcription
|
|
37
|
-
* @description Initiates a WebSocket connection for live audio transcription.
|
|
38
|
-
*
|
|
39
|
-
* The `model` query parameter determines the transcription backend:
|
|
40
|
-
* - `default`, `turbo` — Whisper-based streaming (TranscriptionSession DO)
|
|
41
|
-
* - `gpt-realtime-mini`, `deepgram-nova-3` — LLM provider relay (RealtimeSession DO)
|
|
37
|
+
* @description Initiates a WebSocket connection for live audio transcription. Only streaming models (e.g. `audicle-o1-realtime`) are supported — prerecorded models must use `POST /v1/transcribe`.
|
|
42
38
|
*
|
|
43
39
|
* **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
|
|
44
40
|
* - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
|
|
45
41
|
* - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
|
|
46
|
-
* - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
|
|
42
|
+
* - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
|
|
47
43
|
* - `{"type": "error", "code": "...", "message": "..."}` — error occurred
|
|
48
44
|
* - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
|
|
49
45
|
*
|
|
@@ -240,6 +236,7 @@ export interface components {
|
|
|
240
236
|
end: number;
|
|
241
237
|
text: string;
|
|
242
238
|
confidence?: number;
|
|
239
|
+
speaker?: number;
|
|
243
240
|
words?: components["schemas"]["TranscriptionWord"][];
|
|
244
241
|
};
|
|
245
242
|
TranscriptionWord: {
|
|
@@ -247,6 +244,7 @@ export interface components {
|
|
|
247
244
|
start: number;
|
|
248
245
|
end: number;
|
|
249
246
|
confidence?: number;
|
|
247
|
+
speaker?: number;
|
|
250
248
|
};
|
|
251
249
|
/** @description Usage and cost info, populated when status is 'completed' */
|
|
252
250
|
TranscriptionUsage: {
|
|
@@ -258,6 +256,13 @@ export interface components {
|
|
|
258
256
|
total_requests: number;
|
|
259
257
|
total_duration_seconds: number;
|
|
260
258
|
total_cost_cents: number;
|
|
259
|
+
usage_by_model: {
|
|
260
|
+
[key: string]: {
|
|
261
|
+
requests: number;
|
|
262
|
+
duration_seconds: number;
|
|
263
|
+
cost_cents: number;
|
|
264
|
+
};
|
|
265
|
+
};
|
|
261
266
|
breakdown: {
|
|
262
267
|
period: string;
|
|
263
268
|
requests: number;
|
|
@@ -313,14 +318,14 @@ export interface operations {
|
|
|
313
318
|
/** @description API key for authentication */
|
|
314
319
|
token: string;
|
|
315
320
|
/** @description Transcription model */
|
|
316
|
-
model?: "
|
|
321
|
+
model?: "audicle-o1" | "audicle-o1-realtime";
|
|
317
322
|
/** @description ISO 639-1 language code */
|
|
318
323
|
language?: "af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "yue";
|
|
319
324
|
/** @description Audio sample rate in Hz (8000-48000) */
|
|
320
325
|
sample_rate?: number;
|
|
321
326
|
/** @description Audio encoding */
|
|
322
327
|
encoding?: "pcm_s16le";
|
|
323
|
-
/** @description Include interim (partial) transcripts (
|
|
328
|
+
/** @description Include interim (partial) transcripts (streaming models only) */
|
|
324
329
|
interim_results?: boolean | null;
|
|
325
330
|
};
|
|
326
331
|
header?: never;
|
|
@@ -387,10 +392,10 @@ export interface operations {
|
|
|
387
392
|
*/
|
|
388
393
|
audio_url?: string;
|
|
389
394
|
/**
|
|
390
|
-
* @default
|
|
395
|
+
* @default audicle-o1
|
|
391
396
|
* @enum {string}
|
|
392
397
|
*/
|
|
393
|
-
model?: "
|
|
398
|
+
model?: "audicle-o1";
|
|
394
399
|
language?: string;
|
|
395
400
|
/**
|
|
396
401
|
* @description Enable speaker diarization
|
|
@@ -741,7 +746,7 @@ export type UsageResponse = components["schemas"]["UsageResponse"];
|
|
|
741
746
|
export type HealthResponse = components["schemas"]["HealthResponse"];
|
|
742
747
|
export type ErrorResponse = components["schemas"]["ErrorResponse"];
|
|
743
748
|
export type TranscriptionStatus = Transcription["status"];
|
|
744
|
-
export type TranscriptionModel = "
|
|
749
|
+
export type TranscriptionModel = "audicle-o1" | "audicle-o1-realtime";
|
|
745
750
|
export interface ListTranscriptsParams {
|
|
746
751
|
cursor?: string;
|
|
747
752
|
limit?: number;
|
|
@@ -783,7 +788,7 @@ export interface TranscribeFileInput {
|
|
|
783
788
|
audioUrl?: string;
|
|
784
789
|
}
|
|
785
790
|
export interface TranscribeOptions {
|
|
786
|
-
model?: "
|
|
791
|
+
model?: "audicle-o1";
|
|
787
792
|
language?: string;
|
|
788
793
|
speakerLabels?: boolean;
|
|
789
794
|
wordTimestamps?: boolean;
|
|
@@ -809,7 +814,7 @@ export interface StreamWord {
|
|
|
809
814
|
start: number;
|
|
810
815
|
end: number;
|
|
811
816
|
confidence?: number;
|
|
812
|
-
speaker?:
|
|
817
|
+
speaker?: number | null;
|
|
813
818
|
punctuated_word?: string;
|
|
814
819
|
}
|
|
815
820
|
export interface SessionBeginMessage {
|
|
@@ -859,7 +864,7 @@ export type StreamEvents = {
|
|
|
859
864
|
};
|
|
860
865
|
};
|
|
861
866
|
export interface TranscribeStreamOptions {
|
|
862
|
-
model?: "
|
|
867
|
+
model?: "audicle-o1-realtime";
|
|
863
868
|
language?: string;
|
|
864
869
|
sample_rate?: number;
|
|
865
870
|
encoding?: "pcm_s16le";
|
|
@@ -869,6 +874,8 @@ export declare class TranscriptionStream extends TypedEmitter<StreamEvents> {
|
|
|
869
874
|
#private;
|
|
870
875
|
constructor(baseUrl: string, apiKey: string, options?: TranscribeStreamOptions);
|
|
871
876
|
sendAudio(data: ArrayBuffer | ArrayBufferView): void;
|
|
877
|
+
/** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
|
|
878
|
+
keepAlive(): void;
|
|
872
879
|
finalize(): void;
|
|
873
880
|
stop(): void;
|
|
874
881
|
configure(config: Record<string, unknown>): void;
|
package/dist/index.d.ts
CHANGED
|
@@ -34,16 +34,12 @@ export interface paths {
|
|
|
34
34
|
};
|
|
35
35
|
/**
|
|
36
36
|
* Stream audio for live transcription
|
|
37
|
-
* @description Initiates a WebSocket connection for live audio transcription.
|
|
38
|
-
*
|
|
39
|
-
* The `model` query parameter determines the transcription backend:
|
|
40
|
-
* - `default`, `turbo` — Whisper-based streaming (TranscriptionSession DO)
|
|
41
|
-
* - `gpt-realtime-mini`, `deepgram-nova-3` — LLM provider relay (RealtimeSession DO)
|
|
37
|
+
* @description Initiates a WebSocket connection for live audio transcription. Only streaming models (e.g. `audicle-o1-realtime`) are supported — prerecorded models must use `POST /v1/transcribe`.
|
|
42
38
|
*
|
|
43
39
|
* **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
|
|
44
40
|
* - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
|
|
45
41
|
* - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
|
|
46
|
-
* - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
|
|
42
|
+
* - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
|
|
47
43
|
* - `{"type": "error", "code": "...", "message": "..."}` — error occurred
|
|
48
44
|
* - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
|
|
49
45
|
*
|
|
@@ -240,6 +236,7 @@ export interface components {
|
|
|
240
236
|
end: number;
|
|
241
237
|
text: string;
|
|
242
238
|
confidence?: number;
|
|
239
|
+
speaker?: number;
|
|
243
240
|
words?: components["schemas"]["TranscriptionWord"][];
|
|
244
241
|
};
|
|
245
242
|
TranscriptionWord: {
|
|
@@ -247,6 +244,7 @@ export interface components {
|
|
|
247
244
|
start: number;
|
|
248
245
|
end: number;
|
|
249
246
|
confidence?: number;
|
|
247
|
+
speaker?: number;
|
|
250
248
|
};
|
|
251
249
|
/** @description Usage and cost info, populated when status is 'completed' */
|
|
252
250
|
TranscriptionUsage: {
|
|
@@ -258,6 +256,13 @@ export interface components {
|
|
|
258
256
|
total_requests: number;
|
|
259
257
|
total_duration_seconds: number;
|
|
260
258
|
total_cost_cents: number;
|
|
259
|
+
usage_by_model: {
|
|
260
|
+
[key: string]: {
|
|
261
|
+
requests: number;
|
|
262
|
+
duration_seconds: number;
|
|
263
|
+
cost_cents: number;
|
|
264
|
+
};
|
|
265
|
+
};
|
|
261
266
|
breakdown: {
|
|
262
267
|
period: string;
|
|
263
268
|
requests: number;
|
|
@@ -313,14 +318,14 @@ export interface operations {
|
|
|
313
318
|
/** @description API key for authentication */
|
|
314
319
|
token: string;
|
|
315
320
|
/** @description Transcription model */
|
|
316
|
-
model?: "
|
|
321
|
+
model?: "audicle-o1" | "audicle-o1-realtime";
|
|
317
322
|
/** @description ISO 639-1 language code */
|
|
318
323
|
language?: "af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "yue";
|
|
319
324
|
/** @description Audio sample rate in Hz (8000-48000) */
|
|
320
325
|
sample_rate?: number;
|
|
321
326
|
/** @description Audio encoding */
|
|
322
327
|
encoding?: "pcm_s16le";
|
|
323
|
-
/** @description Include interim (partial) transcripts (
|
|
328
|
+
/** @description Include interim (partial) transcripts (streaming models only) */
|
|
324
329
|
interim_results?: boolean | null;
|
|
325
330
|
};
|
|
326
331
|
header?: never;
|
|
@@ -387,10 +392,10 @@ export interface operations {
|
|
|
387
392
|
*/
|
|
388
393
|
audio_url?: string;
|
|
389
394
|
/**
|
|
390
|
-
* @default
|
|
395
|
+
* @default audicle-o1
|
|
391
396
|
* @enum {string}
|
|
392
397
|
*/
|
|
393
|
-
model?: "
|
|
398
|
+
model?: "audicle-o1";
|
|
394
399
|
language?: string;
|
|
395
400
|
/**
|
|
396
401
|
* @description Enable speaker diarization
|
|
@@ -741,7 +746,7 @@ export type UsageResponse = components["schemas"]["UsageResponse"];
|
|
|
741
746
|
export type HealthResponse = components["schemas"]["HealthResponse"];
|
|
742
747
|
export type ErrorResponse = components["schemas"]["ErrorResponse"];
|
|
743
748
|
export type TranscriptionStatus = Transcription["status"];
|
|
744
|
-
export type TranscriptionModel = "
|
|
749
|
+
export type TranscriptionModel = "audicle-o1" | "audicle-o1-realtime";
|
|
745
750
|
export interface ListTranscriptsParams {
|
|
746
751
|
cursor?: string;
|
|
747
752
|
limit?: number;
|
|
@@ -783,7 +788,7 @@ export interface TranscribeFileInput {
|
|
|
783
788
|
audioUrl?: string;
|
|
784
789
|
}
|
|
785
790
|
export interface TranscribeOptions {
|
|
786
|
-
model?: "
|
|
791
|
+
model?: "audicle-o1";
|
|
787
792
|
language?: string;
|
|
788
793
|
speakerLabels?: boolean;
|
|
789
794
|
wordTimestamps?: boolean;
|
|
@@ -809,7 +814,7 @@ export interface StreamWord {
|
|
|
809
814
|
start: number;
|
|
810
815
|
end: number;
|
|
811
816
|
confidence?: number;
|
|
812
|
-
speaker?:
|
|
817
|
+
speaker?: number | null;
|
|
813
818
|
punctuated_word?: string;
|
|
814
819
|
}
|
|
815
820
|
export interface SessionBeginMessage {
|
|
@@ -859,7 +864,7 @@ export type StreamEvents = {
|
|
|
859
864
|
};
|
|
860
865
|
};
|
|
861
866
|
export interface TranscribeStreamOptions {
|
|
862
|
-
model?: "
|
|
867
|
+
model?: "audicle-o1-realtime";
|
|
863
868
|
language?: string;
|
|
864
869
|
sample_rate?: number;
|
|
865
870
|
encoding?: "pcm_s16le";
|
|
@@ -869,6 +874,8 @@ export declare class TranscriptionStream extends TypedEmitter<StreamEvents> {
|
|
|
869
874
|
#private;
|
|
870
875
|
constructor(baseUrl: string, apiKey: string, options?: TranscribeStreamOptions);
|
|
871
876
|
sendAudio(data: ArrayBuffer | ArrayBufferView): void;
|
|
877
|
+
/** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
|
|
878
|
+
keepAlive(): void;
|
|
872
879
|
finalize(): void;
|
|
873
880
|
stop(): void;
|
|
874
881
|
configure(config: Record<string, unknown>): void;
|
package/dist/index.js
CHANGED
|
@@ -303,6 +303,10 @@ var TranscriptionStream = class extends TypedEmitter {
|
|
|
303
303
|
}
|
|
304
304
|
this.#ws.send(data);
|
|
305
305
|
}
|
|
306
|
+
/** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
|
|
307
|
+
keepAlive() {
|
|
308
|
+
this.#send({ type: "keepalive" });
|
|
309
|
+
}
|
|
306
310
|
finalize() {
|
|
307
311
|
this.#send({ type: "finalize" });
|
|
308
312
|
}
|