@audicle/sdk 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Audicle Node.js SDK
2
2
 
3
- The official Node.js/TypeScript client for the [Audicle](https://audicle.ai) transcription API. Supports batch transcription, real-time streaming over WebSocket, and full transcript management.
3
+ The official Node.js/TypeScript client for the [Audicle](https://audicle.ai) transcription API. Supports pre-recorded transcription, real-time streaming over WebSocket, and full transcript management.
4
4
 
5
5
  ## Installation
6
6
 
@@ -22,7 +22,7 @@ const result = await client.transcribe({
22
22
  console.log(result.result.text);
23
23
  ```
24
24
 
25
- ## Batch Transcription
25
+ ## Pre-Recorded Transcription
26
26
 
27
27
  ### From a file
28
28
 
@@ -49,7 +49,7 @@ const result = await client.transcribe({
49
49
  const result = await client.transcribe(
50
50
  { file: audioBlob },
51
51
  {
52
- model: "default",
52
+ model: "audicle-o1",
53
53
  language: "en",
54
54
  speakerLabels: true,
55
55
  wordTimestamps: true,
@@ -118,13 +118,13 @@ const result = await client.transcripts.wait("txn_abc123", {
118
118
  });
119
119
  ```
120
120
 
121
- ## Real-Time Streaming
121
+ ## Streaming
122
122
 
123
123
  Stream audio over WebSocket for live transcription:
124
124
 
125
125
  ```typescript
126
126
  const stream = client.streaming.transcribe({
127
- model: "default",
127
+ model: "audicle-o1-realtime",
128
128
  sample_rate: 16000,
129
129
  encoding: "pcm_s16le",
130
130
  });
@@ -150,24 +150,17 @@ stream.sendAudio(pcmBuffer);
150
150
  stream.finalize();
151
151
  ```
152
152
 
153
- ### Models
154
-
155
- | Model | Description |
156
- |---|---|
157
- | `default` | Whisper-based streaming transcription |
158
- | `turbo` | Faster Whisper variant |
159
- | `deepgram-nova-3` | Deepgram Nova 3 (supports interim results + VAD) |
160
- | `gpt-realtime-mini` | OpenAI Realtime |
161
-
162
153
  ### Observing a session
163
154
 
164
- Watch a live transcription session from the dashboard or another client:
155
+ Watch a live streaming session from another client. Only active WebSocket sessions can be observed — pre-recorded transcriptions submitted via file upload cannot.
165
156
 
166
157
  ```typescript
167
158
  const observer = client.streaming.observe("txn_abc123");
168
159
 
169
160
  observer.on("transcript", (msg) => {
170
- console.log(msg.transcript.text);
161
+ if (msg.is_final) {
162
+ console.log(msg.transcript.text);
163
+ }
171
164
  });
172
165
  ```
173
166
 
@@ -175,8 +168,8 @@ observer.on("transcript", (msg) => {
175
168
 
176
169
  ```typescript
177
170
  const usage = await client.usage.get({
178
- startDate: "2025-01-01",
179
- endDate: "2025-01-31",
171
+ startDate: "2026-01-01",
172
+ endDate: "2026-01-31",
180
173
  granularity: "day", // "hour" | "day"
181
174
  });
182
175
 
package/dist/index.cjs CHANGED
@@ -348,6 +348,10 @@ var TranscriptionStream = class extends TypedEmitter {
348
348
  }
349
349
  this.#ws.send(data);
350
350
  }
351
+ /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
352
+ keepAlive() {
353
+ this.#send({ type: "keepalive" });
354
+ }
351
355
  finalize() {
352
356
  this.#send({ type: "finalize" });
353
357
  }
package/dist/index.d.cts CHANGED
@@ -34,16 +34,12 @@ export interface paths {
34
34
  };
35
35
  /**
36
36
  * Stream audio for live transcription
37
- * @description Initiates a WebSocket connection for live audio transcription.
38
- *
39
- * The `model` query parameter determines the transcription backend:
40
- * - `default`, `turbo` — Whisper-based streaming (TranscriptionSession DO)
41
- * - `gpt-realtime-mini`, `deepgram-nova-3` — LLM provider relay (RealtimeSession DO)
37
+ * @description Initiates a WebSocket connection for live audio transcription. Only streaming models (e.g. `audicle-o1-realtime`) are supported — prerecorded models must use `POST /v1/transcribe`.
42
38
  *
43
39
  * **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
44
40
  * - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
45
41
  * - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
46
- * - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (realtime models only)
42
+ * - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
47
43
  * - `{"type": "error", "code": "...", "message": "..."}` — error occurred
48
44
  * - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
49
45
  *
@@ -240,6 +236,7 @@ export interface components {
240
236
  end: number;
241
237
  text: string;
242
238
  confidence?: number;
239
+ speaker?: number;
243
240
  words?: components["schemas"]["TranscriptionWord"][];
244
241
  };
245
242
  TranscriptionWord: {
@@ -247,6 +244,7 @@ export interface components {
247
244
  start: number;
248
245
  end: number;
249
246
  confidence?: number;
247
+ speaker?: number;
250
248
  };
251
249
  /** @description Usage and cost info, populated when status is 'completed' */
252
250
  TranscriptionUsage: {
@@ -258,6 +256,13 @@ export interface components {
258
256
  total_requests: number;
259
257
  total_duration_seconds: number;
260
258
  total_cost_cents: number;
259
+ usage_by_model: {
260
+ [key: string]: {
261
+ requests: number;
262
+ duration_seconds: number;
263
+ cost_cents: number;
264
+ };
265
+ };
261
266
  breakdown: {
262
267
  period: string;
263
268
  requests: number;
@@ -313,14 +318,14 @@ export interface operations {
313
318
  /** @description API key for authentication */
314
319
  token: string;
315
320
  /** @description Transcription model */
316
- model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
321
+ model?: "audicle-o1" | "audicle-o1-realtime";
317
322
  /** @description ISO 639-1 language code */
318
323
  language?: "af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "yue";
319
324
  /** @description Audio sample rate in Hz (8000-48000) */
320
325
  sample_rate?: number;
321
326
  /** @description Audio encoding */
322
327
  encoding?: "pcm_s16le";
323
- /** @description Include interim (partial) transcripts (realtime models only) */
328
+ /** @description Include interim (partial) transcripts (streaming models only) */
324
329
  interim_results?: boolean | null;
325
330
  };
326
331
  header?: never;
@@ -387,10 +392,10 @@ export interface operations {
387
392
  */
388
393
  audio_url?: string;
389
394
  /**
390
- * @default default
395
+ * @default audicle-o1
391
396
  * @enum {string}
392
397
  */
393
- model?: "default" | "turbo";
398
+ model?: "audicle-o1";
394
399
  language?: string;
395
400
  /**
396
401
  * @description Enable speaker diarization
@@ -741,7 +746,7 @@ export type UsageResponse = components["schemas"]["UsageResponse"];
741
746
  export type HealthResponse = components["schemas"]["HealthResponse"];
742
747
  export type ErrorResponse = components["schemas"]["ErrorResponse"];
743
748
  export type TranscriptionStatus = Transcription["status"];
744
- export type TranscriptionModel = "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
749
+ export type TranscriptionModel = "audicle-o1" | "audicle-o1-realtime";
745
750
  export interface ListTranscriptsParams {
746
751
  cursor?: string;
747
752
  limit?: number;
@@ -783,7 +788,7 @@ export interface TranscribeFileInput {
783
788
  audioUrl?: string;
784
789
  }
785
790
  export interface TranscribeOptions {
786
- model?: "default" | "turbo";
791
+ model?: "audicle-o1";
787
792
  language?: string;
788
793
  speakerLabels?: boolean;
789
794
  wordTimestamps?: boolean;
@@ -809,7 +814,7 @@ export interface StreamWord {
809
814
  start: number;
810
815
  end: number;
811
816
  confidence?: number;
812
- speaker?: string | null;
817
+ speaker?: number | null;
813
818
  punctuated_word?: string;
814
819
  }
815
820
  export interface SessionBeginMessage {
@@ -859,7 +864,7 @@ export type StreamEvents = {
859
864
  };
860
865
  };
861
866
  export interface TranscribeStreamOptions {
862
- model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
867
+ model?: "audicle-o1-realtime";
863
868
  language?: string;
864
869
  sample_rate?: number;
865
870
  encoding?: "pcm_s16le";
@@ -869,6 +874,8 @@ export declare class TranscriptionStream extends TypedEmitter<StreamEvents> {
869
874
  #private;
870
875
  constructor(baseUrl: string, apiKey: string, options?: TranscribeStreamOptions);
871
876
  sendAudio(data: ArrayBuffer | ArrayBufferView): void;
877
+ /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
878
+ keepAlive(): void;
872
879
  finalize(): void;
873
880
  stop(): void;
874
881
  configure(config: Record<string, unknown>): void;
package/dist/index.d.ts CHANGED
@@ -34,16 +34,12 @@ export interface paths {
34
34
  };
35
35
  /**
36
36
  * Stream audio for live transcription
37
- * @description Initiates a WebSocket connection for live audio transcription.
38
- *
39
- * The `model` query parameter determines the transcription backend:
40
- * - `default`, `turbo` — Whisper-based streaming (TranscriptionSession DO)
41
- * - `gpt-realtime-mini`, `deepgram-nova-3` — LLM provider relay (RealtimeSession DO)
37
+ * @description Initiates a WebSocket connection for live audio transcription. Only streaming models (e.g. `audicle-o1-realtime`) are supported — prerecorded models must use `POST /v1/transcribe`.
42
38
  *
43
39
  * **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
44
40
  * - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
45
41
  * - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
46
- * - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (realtime models only)
42
+ * - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
47
43
  * - `{"type": "error", "code": "...", "message": "..."}` — error occurred
48
44
  * - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
49
45
  *
@@ -240,6 +236,7 @@ export interface components {
240
236
  end: number;
241
237
  text: string;
242
238
  confidence?: number;
239
+ speaker?: number;
243
240
  words?: components["schemas"]["TranscriptionWord"][];
244
241
  };
245
242
  TranscriptionWord: {
@@ -247,6 +244,7 @@ export interface components {
247
244
  start: number;
248
245
  end: number;
249
246
  confidence?: number;
247
+ speaker?: number;
250
248
  };
251
249
  /** @description Usage and cost info, populated when status is 'completed' */
252
250
  TranscriptionUsage: {
@@ -258,6 +256,13 @@ export interface components {
258
256
  total_requests: number;
259
257
  total_duration_seconds: number;
260
258
  total_cost_cents: number;
259
+ usage_by_model: {
260
+ [key: string]: {
261
+ requests: number;
262
+ duration_seconds: number;
263
+ cost_cents: number;
264
+ };
265
+ };
261
266
  breakdown: {
262
267
  period: string;
263
268
  requests: number;
@@ -313,14 +318,14 @@ export interface operations {
313
318
  /** @description API key for authentication */
314
319
  token: string;
315
320
  /** @description Transcription model */
316
- model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
321
+ model?: "audicle-o1" | "audicle-o1-realtime";
317
322
  /** @description ISO 639-1 language code */
318
323
  language?: "af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "yue";
319
324
  /** @description Audio sample rate in Hz (8000-48000) */
320
325
  sample_rate?: number;
321
326
  /** @description Audio encoding */
322
327
  encoding?: "pcm_s16le";
323
- /** @description Include interim (partial) transcripts (realtime models only) */
328
+ /** @description Include interim (partial) transcripts (streaming models only) */
324
329
  interim_results?: boolean | null;
325
330
  };
326
331
  header?: never;
@@ -387,10 +392,10 @@ export interface operations {
387
392
  */
388
393
  audio_url?: string;
389
394
  /**
390
- * @default default
395
+ * @default audicle-o1
391
396
  * @enum {string}
392
397
  */
393
- model?: "default" | "turbo";
398
+ model?: "audicle-o1";
394
399
  language?: string;
395
400
  /**
396
401
  * @description Enable speaker diarization
@@ -741,7 +746,7 @@ export type UsageResponse = components["schemas"]["UsageResponse"];
741
746
  export type HealthResponse = components["schemas"]["HealthResponse"];
742
747
  export type ErrorResponse = components["schemas"]["ErrorResponse"];
743
748
  export type TranscriptionStatus = Transcription["status"];
744
- export type TranscriptionModel = "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
749
+ export type TranscriptionModel = "audicle-o1" | "audicle-o1-realtime";
745
750
  export interface ListTranscriptsParams {
746
751
  cursor?: string;
747
752
  limit?: number;
@@ -783,7 +788,7 @@ export interface TranscribeFileInput {
783
788
  audioUrl?: string;
784
789
  }
785
790
  export interface TranscribeOptions {
786
- model?: "default" | "turbo";
791
+ model?: "audicle-o1";
787
792
  language?: string;
788
793
  speakerLabels?: boolean;
789
794
  wordTimestamps?: boolean;
@@ -809,7 +814,7 @@ export interface StreamWord {
809
814
  start: number;
810
815
  end: number;
811
816
  confidence?: number;
812
- speaker?: string | null;
817
+ speaker?: number | null;
813
818
  punctuated_word?: string;
814
819
  }
815
820
  export interface SessionBeginMessage {
@@ -859,7 +864,7 @@ export type StreamEvents = {
859
864
  };
860
865
  };
861
866
  export interface TranscribeStreamOptions {
862
- model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
867
+ model?: "audicle-o1-realtime";
863
868
  language?: string;
864
869
  sample_rate?: number;
865
870
  encoding?: "pcm_s16le";
@@ -869,6 +874,8 @@ export declare class TranscriptionStream extends TypedEmitter<StreamEvents> {
869
874
  #private;
870
875
  constructor(baseUrl: string, apiKey: string, options?: TranscribeStreamOptions);
871
876
  sendAudio(data: ArrayBuffer | ArrayBufferView): void;
877
+ /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
878
+ keepAlive(): void;
872
879
  finalize(): void;
873
880
  stop(): void;
874
881
  configure(config: Record<string, unknown>): void;
package/dist/index.js CHANGED
@@ -303,6 +303,10 @@ var TranscriptionStream = class extends TypedEmitter {
303
303
  }
304
304
  this.#ws.send(data);
305
305
  }
306
+ /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
307
+ keepAlive() {
308
+ this.#send({ type: "keepalive" });
309
+ }
306
310
  finalize() {
307
311
  this.#send({ type: "finalize" });
308
312
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@audicle/sdk",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "type": "module",
5
5
  "description": "Official Node.js/TypeScript SDK for the Audicle transcription API",
6
6
  "license": "MIT",