npm - @audicle/sdk - Versions diffs - 0.1.0 → 0.1.1 - Mend

@audicle/sdk 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -49,7 +49,7 @@ const result = await client.transcribe({
 const result = await client.transcribe(
   { file: audioBlob },
   {
-    model: "default",
+    model: "audicle-o1",
     language: "en",
     speakerLabels: true,
     wordTimestamps: true,
@@ -118,13 +118,13 @@ const result = await client.transcripts.wait("txn_abc123", {
 });
 ```
-## Real-Time Streaming
+## Streaming
 Stream audio over WebSocket for live transcription:
 ```typescript
 const stream = client.streaming.transcribe({
-  model: "default",
+  model: "audicle-o1-realtime",
   sample_rate: 16000,
   encoding: "pcm_s16le",
 });
@@ -152,22 +152,26 @@ stream.finalize();
 ### Models
+Both models can be used for streaming via WebSocket. `audicle-o1-realtime` additionally supports interim (non-final) results and voice activity detection (VAD) events.
 | Model | Description |
 |---|---|
-| `default` | Whisper-based streaming transcription |
-| `turbo` | Faster Whisper variant |
-| `deepgram-nova-3` | Deepgram Nova 3 (supports interim results + VAD) |
-| `gpt-realtime-mini` | OpenAI Realtime |
+| `audicle-o1` | Whisper-based — final results only |
+| `audicle-o1-realtime` | Low-latency — interim results + VAD |
 ### Observing a session
-Watch a live transcription session from the dashboard or another client:
+Watch a live streaming session from another client. Only active WebSocket sessions can be observed — batch transcriptions submitted via file upload cannot.
+The `audicle-o1-realtime` model delivers interim results to observers in real time. The `audicle-o1` model only sends final results.
 ```typescript
 const observer = client.streaming.observe("txn_abc123");
 observer.on("transcript", (msg) => {
-  console.log(msg.transcript.text);
+  if (msg.is_final) {
+    console.log(msg.transcript.text);
+  }
 });
 ```
@@ -175,8 +179,8 @@ observer.on("transcript", (msg) => {
 ```typescript
 const usage = await client.usage.get({
-  startDate: "2025-01-01",
-  endDate: "2025-01-31",
+  startDate: "2026-01-01",
+  endDate: "2026-01-31",
   granularity: "day", // "hour" | "day"
 });

package/dist/index.cjs CHANGED Viewed

@@ -348,6 +348,10 @@ var TranscriptionStream = class extends TypedEmitter {
     }
     this.#ws.send(data);
   }
+  /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
+  keepAlive() {
+    this.#send({ type: "keepalive" });
+  }
   finalize() {
     this.#send({ type: "finalize" });
   }

package/dist/index.d.cts CHANGED Viewed

@@ -37,13 +37,13 @@ export interface paths {
 		 * @description Initiates a WebSocket connection for live audio transcription.
 		 *
 		 *     The `model` query parameter determines the transcription backend:
-		 *     - `default`, `turbo` — Whisper-based streaming (TranscriptionSession DO)
-		 *     - `gpt-realtime-mini`, `deepgram-nova-3` — LLM provider relay (RealtimeSession DO)
+		 *     - `audicle-o1` — Whisper-based streaming (TranscriptionSession DO)
+		 *     - `audicle-o1-realtime` — streaming provider relay (StreamingSession DO)
 		 *
 		 *     **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
 		 *     - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
 		 *     - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
-		 *     - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (realtime models only)
+		 *     - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (streaming models only)
 		 *     - `{"type": "error", "code": "...", "message": "..."}` — error occurred
 		 *     - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
 		 *
@@ -258,6 +258,13 @@ export interface components {
 			total_requests: number;
 			total_duration_seconds: number;
 			total_cost_cents: number;
+			usage_by_model: {
+				[key: string]: {
+					requests: number;
+					duration_seconds: number;
+					cost_cents: number;
+				};
+			};
 			breakdown: {
 				period: string;
 				requests: number;
@@ -313,14 +320,14 @@ export interface operations {
 				/** @description API key for authentication */
 				token: string;
 				/** @description Transcription model */
-				model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
+				model?: "audicle-o1" | "audicle-o1-realtime";
 				/** @description ISO 639-1 language code */
 				language?: "af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "yue";
 				/** @description Audio sample rate in Hz (8000-48000) */
 				sample_rate?: number;
 				/** @description Audio encoding */
 				encoding?: "pcm_s16le";
-				/** @description Include interim (partial) transcripts (realtime models only) */
+				/** @description Include interim (partial) transcripts (streaming models only) */
 				interim_results?: boolean | null;
 			};
 			header?: never;
@@ -387,10 +394,10 @@ export interface operations {
 					 */
 					audio_url?: string;
 					/**
-					 * @default default
+					 * @default audicle-o1
 					 * @enum {string}
 					 */
-					model?: "default" | "turbo";
+					model?: "audicle-o1";
 					language?: string;
 					/**
 					 * @description Enable speaker diarization
@@ -741,7 +748,7 @@ export type UsageResponse = components["schemas"]["UsageResponse"];
 export type HealthResponse = components["schemas"]["HealthResponse"];
 export type ErrorResponse = components["schemas"]["ErrorResponse"];
 export type TranscriptionStatus = Transcription["status"];
-export type TranscriptionModel = "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
+export type TranscriptionModel = "audicle-o1" | "audicle-o1-realtime";
 export interface ListTranscriptsParams {
 	cursor?: string;
 	limit?: number;
@@ -783,7 +790,7 @@ export interface TranscribeFileInput {
 	audioUrl?: string;
 }
 export interface TranscribeOptions {
-	model?: "default" | "turbo";
+	model?: "audicle-o1";
 	language?: string;
 	speakerLabels?: boolean;
 	wordTimestamps?: boolean;
@@ -859,7 +866,7 @@ export type StreamEvents = {
 	};
 };
 export interface TranscribeStreamOptions {
-	model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
+	model?: "audicle-o1" | "audicle-o1-realtime";
 	language?: string;
 	sample_rate?: number;
 	encoding?: "pcm_s16le";
@@ -869,6 +876,8 @@ export declare class TranscriptionStream extends TypedEmitter<StreamEvents> {
 	#private;
 	constructor(baseUrl: string, apiKey: string, options?: TranscribeStreamOptions);
 	sendAudio(data: ArrayBuffer | ArrayBufferView): void;
+	/** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
+	keepAlive(): void;
 	finalize(): void;
 	stop(): void;
 	configure(config: Record<string, unknown>): void;

package/dist/index.d.ts CHANGED Viewed

@@ -37,13 +37,13 @@ export interface paths {
 		 * @description Initiates a WebSocket connection for live audio transcription.
 		 *
 		 *     The `model` query parameter determines the transcription backend:
-		 *     - `default`, `turbo` — Whisper-based streaming (TranscriptionSession DO)
-		 *     - `gpt-realtime-mini`, `deepgram-nova-3` — LLM provider relay (RealtimeSession DO)
+		 *     - `audicle-o1` — Whisper-based streaming (TranscriptionSession DO)
+		 *     - `audicle-o1-realtime` — streaming provider relay (StreamingSession DO)
 		 *
 		 *     **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
 		 *     - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
 		 *     - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
-		 *     - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (realtime models only)
+		 *     - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection (streaming models only)
 		 *     - `{"type": "error", "code": "...", "message": "..."}` — error occurred
 		 *     - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
 		 *
@@ -258,6 +258,13 @@ export interface components {
 			total_requests: number;
 			total_duration_seconds: number;
 			total_cost_cents: number;
+			usage_by_model: {
+				[key: string]: {
+					requests: number;
+					duration_seconds: number;
+					cost_cents: number;
+				};
+			};
 			breakdown: {
 				period: string;
 				requests: number;
@@ -313,14 +320,14 @@ export interface operations {
 				/** @description API key for authentication */
 				token: string;
 				/** @description Transcription model */
-				model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
+				model?: "audicle-o1" | "audicle-o1-realtime";
 				/** @description ISO 639-1 language code */
 				language?: "af" | "am" | "ar" | "as" | "az" | "ba" | "be" | "bg" | "bn" | "bo" | "br" | "bs" | "ca" | "cs" | "cy" | "da" | "de" | "el" | "en" | "es" | "et" | "eu" | "fa" | "fi" | "fo" | "fr" | "gl" | "gu" | "ha" | "haw" | "he" | "hi" | "hr" | "ht" | "hu" | "hy" | "id" | "is" | "it" | "ja" | "jw" | "ka" | "kk" | "km" | "kn" | "ko" | "la" | "lb" | "ln" | "lo" | "lt" | "lv" | "mg" | "mi" | "mk" | "ml" | "mn" | "mr" | "ms" | "mt" | "my" | "ne" | "nl" | "nn" | "no" | "oc" | "pa" | "pl" | "ps" | "pt" | "ro" | "ru" | "sa" | "sd" | "si" | "sk" | "sl" | "sn" | "so" | "sq" | "sr" | "su" | "sv" | "sw" | "ta" | "te" | "tg" | "th" | "tk" | "tl" | "tr" | "tt" | "uk" | "ur" | "uz" | "vi" | "yi" | "yo" | "zh" | "yue";
 				/** @description Audio sample rate in Hz (8000-48000) */
 				sample_rate?: number;
 				/** @description Audio encoding */
 				encoding?: "pcm_s16le";
-				/** @description Include interim (partial) transcripts (realtime models only) */
+				/** @description Include interim (partial) transcripts (streaming models only) */
 				interim_results?: boolean | null;
 			};
 			header?: never;
@@ -387,10 +394,10 @@ export interface operations {
 					 */
 					audio_url?: string;
 					/**
-					 * @default default
+					 * @default audicle-o1
 					 * @enum {string}
 					 */
-					model?: "default" | "turbo";
+					model?: "audicle-o1";
 					language?: string;
 					/**
 					 * @description Enable speaker diarization
@@ -741,7 +748,7 @@ export type UsageResponse = components["schemas"]["UsageResponse"];
 export type HealthResponse = components["schemas"]["HealthResponse"];
 export type ErrorResponse = components["schemas"]["ErrorResponse"];
 export type TranscriptionStatus = Transcription["status"];
-export type TranscriptionModel = "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
+export type TranscriptionModel = "audicle-o1" | "audicle-o1-realtime";
 export interface ListTranscriptsParams {
 	cursor?: string;
 	limit?: number;
@@ -783,7 +790,7 @@ export interface TranscribeFileInput {
 	audioUrl?: string;
 }
 export interface TranscribeOptions {
-	model?: "default" | "turbo";
+	model?: "audicle-o1";
 	language?: string;
 	speakerLabels?: boolean;
 	wordTimestamps?: boolean;
@@ -859,7 +866,7 @@ export type StreamEvents = {
 	};
 };
 export interface TranscribeStreamOptions {
-	model?: "default" | "turbo" | "gpt-realtime-mini" | "deepgram-nova-3";
+	model?: "audicle-o1" | "audicle-o1-realtime";
 	language?: string;
 	sample_rate?: number;
 	encoding?: "pcm_s16le";
@@ -869,6 +876,8 @@ export declare class TranscriptionStream extends TypedEmitter<StreamEvents> {
 	#private;
 	constructor(baseUrl: string, apiKey: string, options?: TranscribeStreamOptions);
 	sendAudio(data: ArrayBuffer | ArrayBufferView): void;
+	/** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
+	keepAlive(): void;
 	finalize(): void;
 	stop(): void;
 	configure(config: Record<string, unknown>): void;

package/dist/index.js CHANGED Viewed

@@ -303,6 +303,10 @@ var TranscriptionStream = class extends TypedEmitter {
     }
     this.#ws.send(data);
   }
+  /** Send a keepalive to prevent the 10-second inactivity timeout during pauses in audio. */
+  keepAlive() {
+    this.#send({ type: "keepalive" });
+  }
   finalize() {
     this.#send({ type: "finalize" });
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@audicle/sdk",
-	"version": "0.1.0",
+	"version": "0.1.1",
 	"type": "module",
 	"description": "Official Node.js/TypeScript SDK for the Audicle transcription API",
 	"license": "MIT",