assemblyai 4.34.0 → 4.34.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/dist/assemblyai.streaming.umd.js +17 -5
- package/dist/assemblyai.streaming.umd.min.js +1 -1
- package/dist/assemblyai.umd.js +17 -5
- package/dist/assemblyai.umd.min.js +1 -1
- package/dist/browser.mjs +14 -2
- package/dist/bun.mjs +14 -2
- package/dist/deno.mjs +14 -2
- package/dist/index.cjs +17 -5
- package/dist/index.mjs +17 -5
- package/dist/node.cjs +14 -2
- package/dist/node.mjs +14 -2
- package/dist/services/streaming/service.d.ts +2 -1
- package/dist/streaming.browser.mjs +14 -2
- package/dist/streaming.cjs +16 -4
- package/dist/streaming.mjs +16 -4
- package/dist/types/streaming/index.d.ts +31 -4
- package/dist/workerd.mjs +14 -2
- package/package.json +1 -1
- package/src/services/streaming/service.ts +20 -1
- package/src/types/streaming/index.ts +34 -1
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
StreamingEventMessage,
|
|
16
16
|
TurnEvent,
|
|
17
17
|
LLMGatewayResponseEvent,
|
|
18
|
+
SpeakerRevisionEvent,
|
|
18
19
|
StreamingUpdateConfiguration,
|
|
19
20
|
StreamingForceEndpoint,
|
|
20
21
|
WarningEvent,
|
|
@@ -262,6 +263,10 @@ export class StreamingTranscriber {
|
|
|
262
263
|
searchParams.set("prompt", this.params.prompt);
|
|
263
264
|
}
|
|
264
265
|
|
|
266
|
+
if (this.params.agentContext) {
|
|
267
|
+
searchParams.set("agent_context", this.params.agentContext);
|
|
268
|
+
}
|
|
269
|
+
|
|
265
270
|
if (this.params.filterProfanity) {
|
|
266
271
|
searchParams.set(
|
|
267
272
|
"filter_profanity",
|
|
@@ -274,7 +279,9 @@ export class StreamingTranscriber {
|
|
|
274
279
|
"[Deprecation Warning] The speech model `u3-pro` is deprecated and will be removed in a future release. Please use `u3-rt-pro` instead.",
|
|
275
280
|
);
|
|
276
281
|
}
|
|
277
|
-
|
|
282
|
+
if (this.params.speechModel !== undefined) {
|
|
283
|
+
searchParams.set("speech_model", this.params.speechModel.toString());
|
|
284
|
+
}
|
|
278
285
|
|
|
279
286
|
if (this.params.languageDetection !== undefined) {
|
|
280
287
|
searchParams.set(
|
|
@@ -389,6 +396,10 @@ export class StreamingTranscriber {
|
|
|
389
396
|
searchParams.set("redact_pii_sub", this.params.redactPiiSub);
|
|
390
397
|
}
|
|
391
398
|
|
|
399
|
+
if (this.params.mode !== undefined) {
|
|
400
|
+
searchParams.set("mode", this.params.mode);
|
|
401
|
+
}
|
|
402
|
+
|
|
392
403
|
if (this.params.llmGateway !== undefined) {
|
|
393
404
|
searchParams.set("llm_gateway", JSON.stringify(this.params.llmGateway));
|
|
394
405
|
}
|
|
@@ -404,6 +415,10 @@ export class StreamingTranscriber {
|
|
|
404
415
|
event: "llmGatewayResponse",
|
|
405
416
|
listener: (event: LLMGatewayResponseEvent) => void,
|
|
406
417
|
): void;
|
|
418
|
+
on(
|
|
419
|
+
event: "speakerRevision",
|
|
420
|
+
listener: (event: SpeakerRevisionEvent) => void,
|
|
421
|
+
): void;
|
|
407
422
|
on(event: "warning", listener: (event: WarningEvent) => void): void;
|
|
408
423
|
on(event: "vad", listener: (event: VadFrame) => void): void;
|
|
409
424
|
on(event: "error", listener: (error: Error) => void): void;
|
|
@@ -508,6 +523,10 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
|
|
|
508
523
|
this.listeners.llmGatewayResponse?.(message);
|
|
509
524
|
break;
|
|
510
525
|
}
|
|
526
|
+
case "SpeakerRevision": {
|
|
527
|
+
this.listeners.speakerRevision?.(message);
|
|
528
|
+
break;
|
|
529
|
+
}
|
|
511
530
|
case "Warning": {
|
|
512
531
|
const warning = message as WarningEvent;
|
|
513
532
|
console.warn(
|
|
@@ -91,7 +91,8 @@ export type StreamingTranscriberParams = {
|
|
|
91
91
|
keyterms?: string[];
|
|
92
92
|
keytermsPrompt?: string[];
|
|
93
93
|
prompt?: string;
|
|
94
|
-
|
|
94
|
+
agentContext?: string;
|
|
95
|
+
speechModel?: StreamingSpeechModel;
|
|
95
96
|
languageDetection?: boolean;
|
|
96
97
|
domain?: StreamingDomain;
|
|
97
98
|
inactivityTimeout?: number;
|
|
@@ -107,6 +108,7 @@ export type StreamingTranscriberParams = {
|
|
|
107
108
|
redactPii?: boolean;
|
|
108
109
|
redactPiiPolicies?: StreamingPiiPolicy[];
|
|
109
110
|
redactPiiSub?: StreamingPiiSubstitution;
|
|
111
|
+
mode?: StreamingMode;
|
|
110
112
|
llmGateway?: LLMGatewayConfig;
|
|
111
113
|
webhookUrl?: string;
|
|
112
114
|
webhookAuthHeaderName?: string;
|
|
@@ -146,6 +148,7 @@ export type StreamingEvents =
|
|
|
146
148
|
| "turn"
|
|
147
149
|
| "speechStarted"
|
|
148
150
|
| "llmGatewayResponse"
|
|
151
|
+
| "speakerRevision"
|
|
149
152
|
| "warning"
|
|
150
153
|
| "vad"
|
|
151
154
|
| "error";
|
|
@@ -156,6 +159,7 @@ export type StreamingListeners = {
|
|
|
156
159
|
turn?: (event: TurnEvent) => void;
|
|
157
160
|
speechStarted?: (event: SpeechStartedEvent) => void;
|
|
158
161
|
llmGatewayResponse?: (event: LLMGatewayResponseEvent) => void;
|
|
162
|
+
speakerRevision?: (event: SpeakerRevisionEvent) => void;
|
|
159
163
|
warning?: (event: WarningEvent) => void;
|
|
160
164
|
vad?: (event: VadFrame) => void;
|
|
161
165
|
error?: (error: Error) => void;
|
|
@@ -165,11 +169,14 @@ export type StreamingSpeechModel =
|
|
|
165
169
|
| "universal-streaming-english"
|
|
166
170
|
| "universal-streaming-multilingual"
|
|
167
171
|
| "u3-rt-pro"
|
|
172
|
+
| "u3-rt-pro-beta-1"
|
|
168
173
|
| "whisper-rt"
|
|
169
174
|
| "u3-pro";
|
|
170
175
|
|
|
171
176
|
export type StreamingDomain = "medical-v1";
|
|
172
177
|
|
|
178
|
+
export type StreamingMode = "max_accuracy" | "min_latency" | "balanced";
|
|
179
|
+
|
|
173
180
|
export type VoiceFocusModel = "near-field" | "far-field";
|
|
174
181
|
|
|
175
182
|
export type StreamingPiiSubstitution = "hash" | "entity_name";
|
|
@@ -330,6 +337,7 @@ export type StreamingUpdateConfiguration = {
|
|
|
330
337
|
format_turns?: boolean;
|
|
331
338
|
keyterms_prompt?: string[];
|
|
332
339
|
prompt?: string;
|
|
340
|
+
agent_context?: string;
|
|
333
341
|
filter_profanity?: boolean;
|
|
334
342
|
interruption_delay?: number;
|
|
335
343
|
turn_left_pad_ms?: number;
|
|
@@ -358,12 +366,37 @@ export type LLMGatewayResponseEvent = {
|
|
|
358
366
|
data: unknown;
|
|
359
367
|
};
|
|
360
368
|
|
|
369
|
+
/**
|
|
370
|
+
* A single earlier Turn whose speaker labels were revised by reclustering.
|
|
371
|
+
* Match by `turn_order` against the original Turn; replace its per-word
|
|
372
|
+
* `speaker` assignments (and the turn-level `speaker_label`) with these. Text
|
|
373
|
+
* and word timestamps are unchanged from the original Turn.
|
|
374
|
+
*/
|
|
375
|
+
export type SpeakerRevisionItem = {
|
|
376
|
+
turn_order: number;
|
|
377
|
+
speaker_label?: string;
|
|
378
|
+
words: StreamingWord[];
|
|
379
|
+
};
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Server-side correction to previously-emitted Turns' speaker labels.
|
|
383
|
+
* Diarization-only (emitted only when `speakerLabels` is enabled). Sent once
|
|
384
|
+
* per offline-recluster resolve; `revisions` carries one entry per earlier
|
|
385
|
+
* Turn whose label actually changed (unchanged turns are omitted). Apply each
|
|
386
|
+
* entry by matching its `turn_order`.
|
|
387
|
+
*/
|
|
388
|
+
export type SpeakerRevisionEvent = {
|
|
389
|
+
type: "SpeakerRevision";
|
|
390
|
+
revisions: SpeakerRevisionItem[];
|
|
391
|
+
};
|
|
392
|
+
|
|
361
393
|
export type StreamingEventMessage =
|
|
362
394
|
| BeginEvent
|
|
363
395
|
| TurnEvent
|
|
364
396
|
| SpeechStartedEvent
|
|
365
397
|
| TerminationEvent
|
|
366
398
|
| LLMGatewayResponseEvent
|
|
399
|
+
| SpeakerRevisionEvent
|
|
367
400
|
| ErrorEvent
|
|
368
401
|
| WarningEvent;
|
|
369
402
|
|