@audicle/sdk 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -14
- package/dist/index.d.cts +6 -8
- package/dist/index.d.ts +6 -8
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Audicle Node.js SDK
|
|
2
2
|
|
|
3
|
-
The official Node.js/TypeScript client for the [Audicle](https://audicle.ai) transcription API. Supports
|
|
3
|
+
The official Node.js/TypeScript client for the [Audicle](https://audicle.ai) transcription API. Supports pre-recorded transcription, real-time streaming over WebSocket, and full transcript management.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -22,7 +22,7 @@ const result = await client.transcribe({
|
|
|
22
22
|
console.log(result.result.text);
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
-
##
|
|
25
|
+
## Pre-Recorded Transcription
|
|
26
26
|
|
|
27
27
|
### From a file
|
|
28
28
|
|
|
@@ -150,20 +150,9 @@ stream.sendAudio(pcmBuffer);
|
|
|
150
150
|
stream.finalize();
|
|
151
151
|
```
|
|
152
152
|
|
|
153
|
-
### Models
|
|
154
|
-
|
|
155
|
-
Both models can be used for streaming via WebSocket. `audicle-o1-realtime` additionally supports interim (non-final) results and voice activity detection (VAD) events.
|
|
156
|
-
|
|
157
|
-
| Model | Description |
|
|
158
|
-
|---|---|
|
|
159
|
-
| `audicle-o1` | Whisper-based — final results only |
|
|
160
|
-
| `audicle-o1-realtime` | Low-latency — interim results + VAD |
|
|
161
|
-
|
|
162
153
|
### Observing a session
|
|
163
154
|
|
|
164
|
-
Watch a live streaming session from another client. Only active WebSocket sessions can be observed —
|
|
165
|
-
|
|
166
|
-
The `audicle-o1-realtime` model delivers interim results to observers in real time. The `audicle-o1` model only sends final results.
|
|
155
|
+
Watch a live streaming session from another client. Only active WebSocket sessions can be observed — pre-recorded transcriptions submitted via file upload cannot.
|
|
167
156
|
|
|
168
157
|
```typescript
|
|
169
158
|
const observer = client.streaming.observe("txn_abc123");
|
package/dist/index.d.cts
CHANGED
|
@@ -34,16 +34,12 @@ export interface paths {
|
|
|
34
34
|
};
|
|
35
35
|
/**
|
|
36
36
|
* Stream audio for live transcription
|
|
37
|
-
* @description Initiates a WebSocket connection for live audio transcription.
|
|
38
|
-
*
|
|
39
|
-
* The `model` query parameter determines the transcription backend:
|
|
40
|
-
* - `audicle-o1` — Whisper-based streaming (TranscriptionSession DO)
|
|
41
|
-
* - `audicle-o1-realtime` — streaming provider relay (StreamingSession DO)
|
|
37
|
+
* @description Initiates a WebSocket connection for live audio transcription. Only streaming models (e.g. `audicle-o1-realtime`) are supported — prerecorded models must use `POST /v1/transcribe`.
|
|
42
38
|
*
|
|
43
39
|
* **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
|
|
44
40
|
* - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
|
|
45
41
|
* - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
|
|
46
|
-
* - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
|
|
42
|
+
* - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
|
|
47
43
|
* - `{"type": "error", "code": "...", "message": "..."}` — error occurred
|
|
48
44
|
* - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
|
|
49
45
|
*
|
|
@@ -240,6 +236,7 @@ export interface components {
|
|
|
240
236
|
end: number;
|
|
241
237
|
text: string;
|
|
242
238
|
confidence?: number;
|
|
239
|
+
speaker?: number;
|
|
243
240
|
words?: components["schemas"]["TranscriptionWord"][];
|
|
244
241
|
};
|
|
245
242
|
TranscriptionWord: {
|
|
@@ -247,6 +244,7 @@ export interface components {
|
|
|
247
244
|
start: number;
|
|
248
245
|
end: number;
|
|
249
246
|
confidence?: number;
|
|
247
|
+
speaker?: number;
|
|
250
248
|
};
|
|
251
249
|
/** @description Usage and cost info, populated when status is 'completed' */
|
|
252
250
|
TranscriptionUsage: {
|
|
@@ -816,7 +814,7 @@ export interface StreamWord {
|
|
|
816
814
|
start: number;
|
|
817
815
|
end: number;
|
|
818
816
|
confidence?: number;
|
|
819
|
-
speaker?:
|
|
817
|
+
speaker?: number | null;
|
|
820
818
|
punctuated_word?: string;
|
|
821
819
|
}
|
|
822
820
|
export interface SessionBeginMessage {
|
|
@@ -866,7 +864,7 @@ export type StreamEvents = {
|
|
|
866
864
|
};
|
|
867
865
|
};
|
|
868
866
|
export interface TranscribeStreamOptions {
|
|
869
|
-
model?: "audicle-o1
|
|
867
|
+
model?: "audicle-o1-realtime";
|
|
870
868
|
language?: string;
|
|
871
869
|
sample_rate?: number;
|
|
872
870
|
encoding?: "pcm_s16le";
|
package/dist/index.d.ts
CHANGED
|
@@ -34,16 +34,12 @@ export interface paths {
|
|
|
34
34
|
};
|
|
35
35
|
/**
|
|
36
36
|
* Stream audio for live transcription
|
|
37
|
-
* @description Initiates a WebSocket connection for live audio transcription.
|
|
38
|
-
*
|
|
39
|
-
* The `model` query parameter determines the transcription backend:
|
|
40
|
-
* - `audicle-o1` — Whisper-based streaming (TranscriptionSession DO)
|
|
41
|
-
* - `audicle-o1-realtime` — streaming provider relay (StreamingSession DO)
|
|
37
|
+
* @description Initiates a WebSocket connection for live audio transcription. Only streaming models (e.g. `audicle-o1-realtime`) are supported — prerecorded models must use `POST /v1/transcribe`.
|
|
42
38
|
*
|
|
43
39
|
* **Protocol:** Send binary audio frames (PCM s16le) over the WebSocket. The server responds with JSON messages:
|
|
44
40
|
* - `{"type": "session.begin", "id": "...", "model": "..."}` — connection established
|
|
45
41
|
* - `{"type": "transcript", "is_final": true/false, "transcript": {"text": "...", "start": 0.0, "end": 1.5}}` — transcription results
|
|
46
|
-
* - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
|
|
42
|
+
* - `{"type": "vad", "event": "speech_start|speech_end", "timestamp_ms": N}` — voice activity detection
|
|
47
43
|
* - `{"type": "error", "code": "...", "message": "..."}` — error occurred
|
|
48
44
|
* - `{"type": "session.end", "reason": "...", "usage": {...}}` — session ended
|
|
49
45
|
*
|
|
@@ -240,6 +236,7 @@ export interface components {
|
|
|
240
236
|
end: number;
|
|
241
237
|
text: string;
|
|
242
238
|
confidence?: number;
|
|
239
|
+
speaker?: number;
|
|
243
240
|
words?: components["schemas"]["TranscriptionWord"][];
|
|
244
241
|
};
|
|
245
242
|
TranscriptionWord: {
|
|
@@ -247,6 +244,7 @@ export interface components {
|
|
|
247
244
|
start: number;
|
|
248
245
|
end: number;
|
|
249
246
|
confidence?: number;
|
|
247
|
+
speaker?: number;
|
|
250
248
|
};
|
|
251
249
|
/** @description Usage and cost info, populated when status is 'completed' */
|
|
252
250
|
TranscriptionUsage: {
|
|
@@ -816,7 +814,7 @@ export interface StreamWord {
|
|
|
816
814
|
start: number;
|
|
817
815
|
end: number;
|
|
818
816
|
confidence?: number;
|
|
819
|
-
speaker?:
|
|
817
|
+
speaker?: number | null;
|
|
820
818
|
punctuated_word?: string;
|
|
821
819
|
}
|
|
822
820
|
export interface SessionBeginMessage {
|
|
@@ -866,7 +864,7 @@ export type StreamEvents = {
|
|
|
866
864
|
};
|
|
867
865
|
};
|
|
868
866
|
export interface TranscribeStreamOptions {
|
|
869
|
-
model?: "audicle-o1
|
|
867
|
+
model?: "audicle-o1-realtime";
|
|
870
868
|
language?: string;
|
|
871
869
|
sample_rate?: number;
|
|
872
870
|
encoding?: "pcm_s16le";
|