assemblyai 4.34.0 → 4.34.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/dist/assemblyai.streaming.umd.js +20 -5
- package/dist/assemblyai.streaming.umd.min.js +1 -1
- package/dist/assemblyai.umd.js +20 -5
- package/dist/assemblyai.umd.min.js +1 -1
- package/dist/browser.mjs +17 -2
- package/dist/bun.mjs +17 -2
- package/dist/deno.mjs +17 -2
- package/dist/index.cjs +20 -5
- package/dist/index.mjs +20 -5
- package/dist/node.cjs +17 -2
- package/dist/node.mjs +17 -2
- package/dist/services/streaming/service.d.ts +2 -1
- package/dist/streaming.browser.mjs +17 -2
- package/dist/streaming.cjs +19 -4
- package/dist/streaming.mjs +19 -4
- package/dist/types/streaming/index.d.ts +32 -4
- package/dist/workerd.mjs +17 -2
- package/package.json +1 -1
- package/src/services/streaming/service.ts +24 -1
- package/src/types/streaming/index.ts +35 -1
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
StreamingEventMessage,
|
|
16
16
|
TurnEvent,
|
|
17
17
|
LLMGatewayResponseEvent,
|
|
18
|
+
SpeakerRevisionEvent,
|
|
18
19
|
StreamingUpdateConfiguration,
|
|
19
20
|
StreamingForceEndpoint,
|
|
20
21
|
WarningEvent,
|
|
@@ -262,6 +263,10 @@ export class StreamingTranscriber {
|
|
|
262
263
|
searchParams.set("prompt", this.params.prompt);
|
|
263
264
|
}
|
|
264
265
|
|
|
266
|
+
if (this.params.agentContext) {
|
|
267
|
+
searchParams.set("agent_context", this.params.agentContext);
|
|
268
|
+
}
|
|
269
|
+
|
|
265
270
|
if (this.params.filterProfanity) {
|
|
266
271
|
searchParams.set(
|
|
267
272
|
"filter_profanity",
|
|
@@ -274,7 +279,13 @@ export class StreamingTranscriber {
|
|
|
274
279
|
"[Deprecation Warning] The speech model `u3-pro` is deprecated and will be removed in a future release. Please use `u3-rt-pro` instead.",
|
|
275
280
|
);
|
|
276
281
|
}
|
|
277
|
-
|
|
282
|
+
if (this.params.speechModel !== undefined) {
|
|
283
|
+
searchParams.set("speech_model", this.params.speechModel.toString());
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (this.params.languageCode !== undefined) {
|
|
287
|
+
searchParams.set("language_code", this.params.languageCode);
|
|
288
|
+
}
|
|
278
289
|
|
|
279
290
|
if (this.params.languageDetection !== undefined) {
|
|
280
291
|
searchParams.set(
|
|
@@ -389,6 +400,10 @@ export class StreamingTranscriber {
|
|
|
389
400
|
searchParams.set("redact_pii_sub", this.params.redactPiiSub);
|
|
390
401
|
}
|
|
391
402
|
|
|
403
|
+
if (this.params.mode !== undefined) {
|
|
404
|
+
searchParams.set("mode", this.params.mode);
|
|
405
|
+
}
|
|
406
|
+
|
|
392
407
|
if (this.params.llmGateway !== undefined) {
|
|
393
408
|
searchParams.set("llm_gateway", JSON.stringify(this.params.llmGateway));
|
|
394
409
|
}
|
|
@@ -404,6 +419,10 @@ export class StreamingTranscriber {
|
|
|
404
419
|
event: "llmGatewayResponse",
|
|
405
420
|
listener: (event: LLMGatewayResponseEvent) => void,
|
|
406
421
|
): void;
|
|
422
|
+
on(
|
|
423
|
+
event: "speakerRevision",
|
|
424
|
+
listener: (event: SpeakerRevisionEvent) => void,
|
|
425
|
+
): void;
|
|
407
426
|
on(event: "warning", listener: (event: WarningEvent) => void): void;
|
|
408
427
|
on(event: "vad", listener: (event: VadFrame) => void): void;
|
|
409
428
|
on(event: "error", listener: (error: Error) => void): void;
|
|
@@ -508,6 +527,10 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
|
|
|
508
527
|
this.listeners.llmGatewayResponse?.(message);
|
|
509
528
|
break;
|
|
510
529
|
}
|
|
530
|
+
case "SpeakerRevision": {
|
|
531
|
+
this.listeners.speakerRevision?.(message);
|
|
532
|
+
break;
|
|
533
|
+
}
|
|
511
534
|
case "Warning": {
|
|
512
535
|
const warning = message as WarningEvent;
|
|
513
536
|
console.warn(
|
|
@@ -91,7 +91,9 @@ export type StreamingTranscriberParams = {
|
|
|
91
91
|
keyterms?: string[];
|
|
92
92
|
keytermsPrompt?: string[];
|
|
93
93
|
prompt?: string;
|
|
94
|
-
|
|
94
|
+
agentContext?: string;
|
|
95
|
+
speechModel?: StreamingSpeechModel;
|
|
96
|
+
languageCode?: string;
|
|
95
97
|
languageDetection?: boolean;
|
|
96
98
|
domain?: StreamingDomain;
|
|
97
99
|
inactivityTimeout?: number;
|
|
@@ -107,6 +109,7 @@ export type StreamingTranscriberParams = {
|
|
|
107
109
|
redactPii?: boolean;
|
|
108
110
|
redactPiiPolicies?: StreamingPiiPolicy[];
|
|
109
111
|
redactPiiSub?: StreamingPiiSubstitution;
|
|
112
|
+
mode?: StreamingMode;
|
|
110
113
|
llmGateway?: LLMGatewayConfig;
|
|
111
114
|
webhookUrl?: string;
|
|
112
115
|
webhookAuthHeaderName?: string;
|
|
@@ -146,6 +149,7 @@ export type StreamingEvents =
|
|
|
146
149
|
| "turn"
|
|
147
150
|
| "speechStarted"
|
|
148
151
|
| "llmGatewayResponse"
|
|
152
|
+
| "speakerRevision"
|
|
149
153
|
| "warning"
|
|
150
154
|
| "vad"
|
|
151
155
|
| "error";
|
|
@@ -156,6 +160,7 @@ export type StreamingListeners = {
|
|
|
156
160
|
turn?: (event: TurnEvent) => void;
|
|
157
161
|
speechStarted?: (event: SpeechStartedEvent) => void;
|
|
158
162
|
llmGatewayResponse?: (event: LLMGatewayResponseEvent) => void;
|
|
163
|
+
speakerRevision?: (event: SpeakerRevisionEvent) => void;
|
|
159
164
|
warning?: (event: WarningEvent) => void;
|
|
160
165
|
vad?: (event: VadFrame) => void;
|
|
161
166
|
error?: (error: Error) => void;
|
|
@@ -165,11 +170,14 @@ export type StreamingSpeechModel =
|
|
|
165
170
|
| "universal-streaming-english"
|
|
166
171
|
| "universal-streaming-multilingual"
|
|
167
172
|
| "u3-rt-pro"
|
|
173
|
+
| "u3-rt-pro-beta-1"
|
|
168
174
|
| "whisper-rt"
|
|
169
175
|
| "u3-pro";
|
|
170
176
|
|
|
171
177
|
export type StreamingDomain = "medical-v1";
|
|
172
178
|
|
|
179
|
+
export type StreamingMode = "max_accuracy" | "min_latency" | "balanced";
|
|
180
|
+
|
|
173
181
|
export type VoiceFocusModel = "near-field" | "far-field";
|
|
174
182
|
|
|
175
183
|
export type StreamingPiiSubstitution = "hash" | "entity_name";
|
|
@@ -330,6 +338,7 @@ export type StreamingUpdateConfiguration = {
|
|
|
330
338
|
format_turns?: boolean;
|
|
331
339
|
keyterms_prompt?: string[];
|
|
332
340
|
prompt?: string;
|
|
341
|
+
agent_context?: string;
|
|
333
342
|
filter_profanity?: boolean;
|
|
334
343
|
interruption_delay?: number;
|
|
335
344
|
turn_left_pad_ms?: number;
|
|
@@ -358,12 +367,37 @@ export type LLMGatewayResponseEvent = {
|
|
|
358
367
|
data: unknown;
|
|
359
368
|
};
|
|
360
369
|
|
|
370
|
+
/**
|
|
371
|
+
* A single earlier Turn whose speaker labels were revised by reclustering.
|
|
372
|
+
* Match by `turn_order` against the original Turn; replace its per-word
|
|
373
|
+
* `speaker` assignments (and the turn-level `speaker_label`) with these. Text
|
|
374
|
+
* and word timestamps are unchanged from the original Turn.
|
|
375
|
+
*/
|
|
376
|
+
export type SpeakerRevisionItem = {
|
|
377
|
+
turn_order: number;
|
|
378
|
+
speaker_label?: string;
|
|
379
|
+
words: StreamingWord[];
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Server-side correction to previously-emitted Turns' speaker labels.
|
|
384
|
+
* Diarization-only (emitted only when `speakerLabels` is enabled). Sent once
|
|
385
|
+
* per offline-recluster resolve; `revisions` carries one entry per earlier
|
|
386
|
+
* Turn whose label actually changed (unchanged turns are omitted). Apply each
|
|
387
|
+
* entry by matching its `turn_order`.
|
|
388
|
+
*/
|
|
389
|
+
export type SpeakerRevisionEvent = {
|
|
390
|
+
type: "SpeakerRevision";
|
|
391
|
+
revisions: SpeakerRevisionItem[];
|
|
392
|
+
};
|
|
393
|
+
|
|
361
394
|
export type StreamingEventMessage =
|
|
362
395
|
| BeginEvent
|
|
363
396
|
| TurnEvent
|
|
364
397
|
| SpeechStartedEvent
|
|
365
398
|
| TerminationEvent
|
|
366
399
|
| LLMGatewayResponseEvent
|
|
400
|
+
| SpeakerRevisionEvent
|
|
367
401
|
| ErrorEvent
|
|
368
402
|
| WarningEvent;
|
|
369
403
|
|