assemblyai 4.34.0 → 4.34.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ import {
15
15
  StreamingEventMessage,
16
16
  TurnEvent,
17
17
  LLMGatewayResponseEvent,
18
+ SpeakerRevisionEvent,
18
19
  StreamingUpdateConfiguration,
19
20
  StreamingForceEndpoint,
20
21
  WarningEvent,
@@ -262,6 +263,10 @@ export class StreamingTranscriber {
262
263
  searchParams.set("prompt", this.params.prompt);
263
264
  }
264
265
 
266
+ if (this.params.agentContext) {
267
+ searchParams.set("agent_context", this.params.agentContext);
268
+ }
269
+
265
270
  if (this.params.filterProfanity) {
266
271
  searchParams.set(
267
272
  "filter_profanity",
@@ -274,7 +279,9 @@ export class StreamingTranscriber {
274
279
  "[Deprecation Warning] The speech model `u3-pro` is deprecated and will be removed in a future release. Please use `u3-rt-pro` instead.",
275
280
  );
276
281
  }
277
- searchParams.set("speech_model", this.params.speechModel.toString());
282
+ if (this.params.speechModel !== undefined) {
283
+ searchParams.set("speech_model", this.params.speechModel.toString());
284
+ }
278
285
 
279
286
  if (this.params.languageDetection !== undefined) {
280
287
  searchParams.set(
@@ -389,6 +396,10 @@ export class StreamingTranscriber {
389
396
  searchParams.set("redact_pii_sub", this.params.redactPiiSub);
390
397
  }
391
398
 
399
+ if (this.params.mode !== undefined) {
400
+ searchParams.set("mode", this.params.mode);
401
+ }
402
+
392
403
  if (this.params.llmGateway !== undefined) {
393
404
  searchParams.set("llm_gateway", JSON.stringify(this.params.llmGateway));
394
405
  }
@@ -404,6 +415,10 @@ export class StreamingTranscriber {
404
415
  event: "llmGatewayResponse",
405
416
  listener: (event: LLMGatewayResponseEvent) => void,
406
417
  ): void;
418
+ on(
419
+ event: "speakerRevision",
420
+ listener: (event: SpeakerRevisionEvent) => void,
421
+ ): void;
407
422
  on(event: "warning", listener: (event: WarningEvent) => void): void;
408
423
  on(event: "vad", listener: (event: VadFrame) => void): void;
409
424
  on(event: "error", listener: (error: Error) => void): void;
@@ -508,6 +523,10 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
508
523
  this.listeners.llmGatewayResponse?.(message);
509
524
  break;
510
525
  }
526
+ case "SpeakerRevision": {
527
+ this.listeners.speakerRevision?.(message);
528
+ break;
529
+ }
511
530
  case "Warning": {
512
531
  const warning = message as WarningEvent;
513
532
  console.warn(
@@ -91,7 +91,8 @@ export type StreamingTranscriberParams = {
91
91
  keyterms?: string[];
92
92
  keytermsPrompt?: string[];
93
93
  prompt?: string;
94
- speechModel: StreamingSpeechModel;
94
+ agentContext?: string;
95
+ speechModel?: StreamingSpeechModel;
95
96
  languageDetection?: boolean;
96
97
  domain?: StreamingDomain;
97
98
  inactivityTimeout?: number;
@@ -107,6 +108,7 @@ export type StreamingTranscriberParams = {
107
108
  redactPii?: boolean;
108
109
  redactPiiPolicies?: StreamingPiiPolicy[];
109
110
  redactPiiSub?: StreamingPiiSubstitution;
111
+ mode?: StreamingMode;
110
112
  llmGateway?: LLMGatewayConfig;
111
113
  webhookUrl?: string;
112
114
  webhookAuthHeaderName?: string;
@@ -146,6 +148,7 @@ export type StreamingEvents =
146
148
  | "turn"
147
149
  | "speechStarted"
148
150
  | "llmGatewayResponse"
151
+ | "speakerRevision"
149
152
  | "warning"
150
153
  | "vad"
151
154
  | "error";
@@ -156,6 +159,7 @@ export type StreamingListeners = {
156
159
  turn?: (event: TurnEvent) => void;
157
160
  speechStarted?: (event: SpeechStartedEvent) => void;
158
161
  llmGatewayResponse?: (event: LLMGatewayResponseEvent) => void;
162
+ speakerRevision?: (event: SpeakerRevisionEvent) => void;
159
163
  warning?: (event: WarningEvent) => void;
160
164
  vad?: (event: VadFrame) => void;
161
165
  error?: (error: Error) => void;
@@ -165,11 +169,14 @@ export type StreamingSpeechModel =
165
169
  | "universal-streaming-english"
166
170
  | "universal-streaming-multilingual"
167
171
  | "u3-rt-pro"
172
+ | "u3-rt-pro-beta-1"
168
173
  | "whisper-rt"
169
174
  | "u3-pro";
170
175
 
171
176
  export type StreamingDomain = "medical-v1";
172
177
 
178
+ export type StreamingMode = "max_accuracy" | "min_latency" | "balanced";
179
+
173
180
  export type VoiceFocusModel = "near-field" | "far-field";
174
181
 
175
182
  export type StreamingPiiSubstitution = "hash" | "entity_name";
@@ -330,6 +337,7 @@ export type StreamingUpdateConfiguration = {
330
337
  format_turns?: boolean;
331
338
  keyterms_prompt?: string[];
332
339
  prompt?: string;
340
+ agent_context?: string;
333
341
  filter_profanity?: boolean;
334
342
  interruption_delay?: number;
335
343
  turn_left_pad_ms?: number;
@@ -358,12 +366,37 @@ export type LLMGatewayResponseEvent = {
358
366
  data: unknown;
359
367
  };
360
368
 
369
+ /**
370
+ * A single earlier Turn whose speaker labels were revised by reclustering.
371
+ * Match by `turn_order` against the original Turn; replace its per-word
372
+ * `speaker` assignments (and the turn-level `speaker_label`) with these. Text
373
+ * and word timestamps are unchanged from the original Turn.
374
+ */
375
+ export type SpeakerRevisionItem = {
376
+ turn_order: number;
377
+ speaker_label?: string;
378
+ words: StreamingWord[];
379
+ };
380
+
381
+ /**
382
+ * Server-side correction to previously-emitted Turns' speaker labels.
383
+ * Diarization-only (emitted only when `speakerLabels` is enabled). Sent once
384
+ * per offline-recluster resolve; `revisions` carries one entry per earlier
385
+ * Turn whose label actually changed (unchanged turns are omitted). Apply each
386
+ * entry by matching its `turn_order`.
387
+ */
388
+ export type SpeakerRevisionEvent = {
389
+ type: "SpeakerRevision";
390
+ revisions: SpeakerRevisionItem[];
391
+ };
392
+
361
393
  export type StreamingEventMessage =
362
394
  | BeginEvent
363
395
  | TurnEvent
364
396
  | SpeechStartedEvent
365
397
  | TerminationEvent
366
398
  | LLMGatewayResponseEvent
399
+ | SpeakerRevisionEvent
367
400
  | ErrorEvent
368
401
  | WarningEvent;
369
402