@audicle/sdk 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -49,7 +49,7 @@ const result = await client.transcribe({
49
49
  const result = await client.transcribe(
50
50
  { file: audioBlob },
51
51
  {
52
- model: "default",
52
+ model: "audicle-o1",
53
53
  language: "en",
54
54
  speakerLabels: true,
55
55
  wordTimestamps: true,
@@ -118,13 +118,13 @@ const result = await client.transcripts.wait("txn_abc123", {
118
118
  });
119
119
  ```
120
120
 
121
- ## Real-Time Streaming
121
+ ## Streaming
122
122
 
123
123
  Stream audio over WebSocket for live transcription:
124
124
 
125
125
  ```typescript
126
126
  const stream = client.streaming.transcribe({
127
- model: "default",
127
+ model: "audicle-o1-realtime",
128
128
  sample_rate: 16000,
129
129
  encoding: "pcm_s16le",
130
130
  });
@@ -152,22 +152,26 @@ stream.finalize();
152
152
 
153
153
  ### Models
154
154
 
155
+ Both models can be used for streaming via WebSocket. `audicle-o1-realtime` additionally supports interim (non-final) results and voice activity detection (VAD) events.
156
+
155
157
  | Model | Description |
156
158
  |---|---|
157
- | `default` | Whisper-based streaming transcription |
158
- | `turbo` | Faster Whisper variant |
159
- | `deepgram-nova-3` | Deepgram Nova 3 (supports interim results + VAD) |
160
- | `gpt-realtime-mini` | OpenAI Realtime |
159
+ | `audicle-o1` | Whisper-based final results only |
160
+ | `audicle-o1-realtime` | Low-latency interim results + VAD |
161
161
 
162
162
  ### Observing a session
163
163
 
164
- Watch a live transcription session from the dashboard or another client:
164
+ Watch a live streaming session from another client. Only active WebSocket sessions can be observed — batch transcriptions submitted via file upload cannot.
165
+
166
+ The `audicle-o1-realtime` model delivers interim results to observers in real time. The `audicle-o1` model only sends final results.
165
167
 
166
168
  ```typescript
167
169
  const observer = client.streaming.observe("txn_abc123");
168
170
 
169
171
  observer.on("transcript", (msg) => {
170
- console.log(msg.transcript.text);
172
+ if (msg.is_final) {
173
+ console.log(msg.transcript.text);
174
+ }
171
175
  });
172
176
  ```
173
177
 
@@ -175,8 +179,8 @@ observer.on("transcript", (msg) => {
175
179
 
176
180
  ```typescript
177
181
  const usage = await client.usage.get({
178
- startDate: "2025-01-01",
179
- endDate: "2025-01-31",
182
+ startDate: "2026-01-01",
183
+ endDate: "2026-01-31",
180
184
  granularity: "day", // "hour" | "day"
181
185
  });
182
186
 
package/dist/index.cjs CHANGED
@@ -348,6 +348,10 @@ var TranscriptionStream = class extends TypedEmitter {
348
348
  }
349
349
  this.#ws.send(data);
350
350
  }
351
+ /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
352
+ keepAlive() {
353
+ this.#send({ type: "keepalive" });
354
+ }
351
355
  finalize() {
352
356
  this.#send({ type: "finalize" });
353
357
  }
package/dist/index.d.cts CHANGED
@@ -37,13 +37,13 @@ export interface paths {
37
37
  * @description Initiates a WebSocket connection for live audio transcription.
38
38
  *
39
39
  * The `model` query parameter determines the transcription backend:
40
- * - `default`, `turbo` — Whisper-based streaming (TranscriptionSession DO)
41
- * - `gpt-realtime-mini`, `deepgram-nova-3`LLM provider relay (RealtimeSession DO)
40
+ * - `audicle-o1` — Whisper-based streaming (TranscriptionSession DO)
41
+ * - `audicle-o1-realtime` — streaming provider relay (StreamingSession DO)
42
42
  *
43
43
  * **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
44
44
  * - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
45
45
  * - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
46
- * - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (realtime models only)
46
+ * - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (streaming models only)
47
47
  * - `{"type": "error", "code": "...", "message": "..."}` — error occurred
48
48
  * - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
49
49
  *
@@ -258,6 +258,13 @@ export interface components {
258
258
  total_requests: number;
259
259
  total_duration_seconds: number;
260
260
  total_cost_cents: number;
261
+ usage_by_model: {
262
+ [key: string]: {
263
+ requests: number;
264
+ duration_seconds: number;
265
+ cost_cents: number;
266
+ };
267
+ };
261
268
  breakdown: {
262
269
  period: string;
263
270
  requests: number;
@@ -313,14 +320,14 @@ export interface operations {
313
320
  /** @description API key for authentication */
314
321
  token: string;
315
322
  /** @description Transcription model */
316
- model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
323
+ model?: "audicle-o1" | "audicle-o1-realtime";
317
324
  /** @description ISO 639-1 language code */
318
325
  language?: "af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "yue";
319
326
  /** @description Audio sample rate in Hz (8000-48000) */
320
327
  sample_rate?: number;
321
328
  /** @description Audio encoding */
322
329
  encoding?: "pcm_s16le";
323
- /** @description Include interim (partial) transcripts (realtime models only) */
330
+ /** @description Include interim (partial) transcripts (streaming models only) */
324
331
  interim_results?: boolean | null;
325
332
  };
326
333
  header?: never;
@@ -387,10 +394,10 @@ export interface operations {
387
394
  */
388
395
  audio_url?: string;
389
396
  /**
390
- * @default default
397
+ * @default audicle-o1
391
398
  * @enum {string}
392
399
  */
393
- model?: "default" | "turbo";
400
+ model?: "audicle-o1";
394
401
  language?: string;
395
402
  /**
396
403
  * @description Enable speaker diarization
@@ -741,7 +748,7 @@ export type UsageResponse = components["schemas"]["UsageResponse"];
741
748
  export type HealthResponse = components["schemas"]["HealthResponse"];
742
749
  export type ErrorResponse = components["schemas"]["ErrorResponse"];
743
750
  export type TranscriptionStatus = Transcription["status"];
744
- export type TranscriptionModel = "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
751
+ export type TranscriptionModel = "audicle-o1" | "audicle-o1-realtime";
745
752
  export interface ListTranscriptsParams {
746
753
  cursor?: string;
747
754
  limit?: number;
@@ -783,7 +790,7 @@ export interface TranscribeFileInput {
783
790
  audioUrl?: string;
784
791
  }
785
792
  export interface TranscribeOptions {
786
- model?: "default" | "turbo";
793
+ model?: "audicle-o1";
787
794
  language?: string;
788
795
  speakerLabels?: boolean;
789
796
  wordTimestamps?: boolean;
@@ -859,7 +866,7 @@ export type StreamEvents = {
859
866
  };
860
867
  };
861
868
  export interface TranscribeStreamOptions {
862
- model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
869
+ model?: "audicle-o1" | "audicle-o1-realtime";
863
870
  language?: string;
864
871
  sample_rate?: number;
865
872
  encoding?: "pcm_s16le";
@@ -869,6 +876,8 @@ export declare class TranscriptionStream extends TypedEmitter<StreamEvents> {
869
876
  #private;
870
877
  constructor(baseUrl: string, apiKey: string, options?: TranscribeStreamOptions);
871
878
  sendAudio(data: ArrayBuffer | ArrayBufferView): void;
879
+ /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
880
+ keepAlive(): void;
872
881
  finalize(): void;
873
882
  stop(): void;
874
883
  configure(config: Record<string, unknown>): void;
package/dist/index.d.ts CHANGED
@@ -37,13 +37,13 @@ export interface paths {
37
37
  * @description Initiates a WebSocket connection for live audio transcription.
38
38
  *
39
39
  * The `model` query parameter determines the transcription backend:
40
- * - `default`, `turbo` — Whisper-based streaming (TranscriptionSession DO)
41
- * - `gpt-realtime-mini`, `deepgram-nova-3`LLM provider relay (RealtimeSession DO)
40
+ * - `audicle-o1` — Whisper-based streaming (TranscriptionSession DO)
41
+ * - `audicle-o1-realtime` — streaming provider relay (StreamingSession DO)
42
42
  *
43
43
  * **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
44
44
  * - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
45
45
  * - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
46
- * - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (realtime models only)
46
+ * - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (streaming models only)
47
47
  * - `{"type": "error", "code": "...", "message": "..."}` — error occurred
48
48
  * - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
49
49
  *
@@ -258,6 +258,13 @@ export interface components {
258
258
  total_requests: number;
259
259
  total_duration_seconds: number;
260
260
  total_cost_cents: number;
261
+ usage_by_model: {
262
+ [key: string]: {
263
+ requests: number;
264
+ duration_seconds: number;
265
+ cost_cents: number;
266
+ };
267
+ };
261
268
  breakdown: {
262
269
  period: string;
263
270
  requests: number;
@@ -313,14 +320,14 @@ export interface operations {
313
320
  /** @description API key for authentication */
314
321
  token: string;
315
322
  /** @description Transcription model */
316
- model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
323
+ model?: "audicle-o1" | "audicle-o1-realtime";
317
324
  /** @description ISO 639-1 language code */
318
325
  language?: "af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "yue";
319
326
  /** @description Audio sample rate in Hz (8000-48000) */
320
327
  sample_rate?: number;
321
328
  /** @description Audio encoding */
322
329
  encoding?: "pcm_s16le";
323
- /** @description Include interim (partial) transcripts (realtime models only) */
330
+ /** @description Include interim (partial) transcripts (streaming models only) */
324
331
  interim_results?: boolean | null;
325
332
  };
326
333
  header?: never;
@@ -387,10 +394,10 @@ export interface operations {
387
394
  */
388
395
  audio_url?: string;
389
396
  /**
390
- * @default default
397
+ * @default audicle-o1
391
398
  * @enum {string}
392
399
  */
393
- model?: "default" | "turbo";
400
+ model?: "audicle-o1";
394
401
  language?: string;
395
402
  /**
396
403
  * @description Enable speaker diarization
@@ -741,7 +748,7 @@ export type UsageResponse = components["schemas"]["UsageResponse"];
741
748
  export type HealthResponse = components["schemas"]["HealthResponse"];
742
749
  export type ErrorResponse = components["schemas"]["ErrorResponse"];
743
750
  export type TranscriptionStatus = Transcription["status"];
744
- export type TranscriptionModel = "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
751
+ export type TranscriptionModel = "audicle-o1" | "audicle-o1-realtime";
745
752
  export interface ListTranscriptsParams {
746
753
  cursor?: string;
747
754
  limit?: number;
@@ -783,7 +790,7 @@ export interface TranscribeFileInput {
783
790
  audioUrl?: string;
784
791
  }
785
792
  export interface TranscribeOptions {
786
- model?: "default" | "turbo";
793
+ model?: "audicle-o1";
787
794
  language?: string;
788
795
  speakerLabels?: boolean;
789
796
  wordTimestamps?: boolean;
@@ -859,7 +866,7 @@ export type StreamEvents = {
859
866
  };
860
867
  };
861
868
  export interface TranscribeStreamOptions {
862
- model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
869
+ model?: "audicle-o1" | "audicle-o1-realtime";
863
870
  language?: string;
864
871
  sample_rate?: number;
865
872
  encoding?: "pcm_s16le";
@@ -869,6 +876,8 @@ export declare class TranscriptionStream extends TypedEmitter<StreamEvents> {
869
876
  #private;
870
877
  constructor(baseUrl: string, apiKey: string, options?: TranscribeStreamOptions);
871
878
  sendAudio(data: ArrayBuffer | ArrayBufferView): void;
879
+ /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
880
+ keepAlive(): void;
872
881
  finalize(): void;
873
882
  stop(): void;
874
883
  configure(config: Record<string, unknown>): void;
package/dist/index.js CHANGED
@@ -303,6 +303,10 @@ var TranscriptionStream = class extends TypedEmitter {
303
303
  }
304
304
  this.#ws.send(data);
305
305
  }
306
+ /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
307
+ keepAlive() {
308
+ this.#send({ type: "keepalive" });
309
+ }
306
310
  finalize() {
307
311
  this.#send({ type: "finalize" });
308
312
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@audicle/sdk",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "type": "module",
5
5
  "description": "Official Node.js/TypeScript SDK for the Audicle transcription API",
6
6
  "license": "MIT",