voice-router-dev 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -4978,9 +4978,6 @@ declare const StreamingSupportedBitDepthEnum: {
4978
4978
  /**
4979
4979
  * ListenV1EncodingParameter type definition
4980
4980
  */
4981
- /**
4982
- * ListenV1EncodingParameter type definition
4983
- */
4984
4981
  type ListenV1EncodingParameter = typeof ListenV1EncodingParameter[keyof typeof ListenV1EncodingParameter];
4985
4982
  declare const ListenV1EncodingParameter: {
4986
4983
  readonly linear16: "linear16";
@@ -5412,9 +5409,6 @@ declare function createVoiceRouter(config: VoiceRouterConfig, adapters?: Transcr
5412
5409
  /**
5413
5410
  * SpeakV1EncodingParameter type definition
5414
5411
  */
5415
- /**
5416
- * SpeakV1EncodingParameter type definition
5417
- */
5418
5412
  type SpeakV1EncodingParameter = typeof SpeakV1EncodingParameter[keyof typeof SpeakV1EncodingParameter];
5419
5413
  declare const SpeakV1EncodingParameter: {
5420
5414
  readonly linear16: "linear16";
@@ -5452,9 +5446,6 @@ declare const SpeakV1EncodingParameter: {
5452
5446
  /**
5453
5447
  * SpeakV1ContainerParameter type definition
5454
5448
  */
5455
- /**
5456
- * SpeakV1ContainerParameter type definition
5457
- */
5458
5449
  type SpeakV1ContainerParameter = typeof SpeakV1ContainerParameter[keyof typeof SpeakV1ContainerParameter];
5459
5450
  declare const SpeakV1ContainerParameter: {
5460
5451
  readonly none: "none";
@@ -5491,9 +5482,6 @@ declare const SpeakV1ContainerParameter: {
5491
5482
  /**
5492
5483
  * SpeakV1SampleRateParameter type definition
5493
5484
  */
5494
- /**
5495
- * SpeakV1SampleRateParameter type definition
5496
- */
5497
5485
  type SpeakV1SampleRateParameter = typeof SpeakV1SampleRateParameter[keyof typeof SpeakV1SampleRateParameter];
5498
5486
  declare const SpeakV1SampleRateParameter: {
5499
5487
  readonly NUMBER_16000: 16000;
@@ -6740,8 +6728,7 @@ declare abstract class BaseWebhookHandler {
6740
6728
  * Gladia webhook handler
6741
6729
  *
6742
6730
  * Handles webhook callbacks from Gladia API:
6743
- * - transcription.created - Job created and queued
6744
- * - transcription.success - Job completed successfully
6731
+ * - transcription.success - Job completed successfully (includes full transcript)
6745
6732
  * - transcription.error - Job failed with error
6746
6733
  *
6747
6734
  * @example
@@ -6763,13 +6750,26 @@ declare abstract class BaseWebhookHandler {
6763
6750
  *
6764
6751
  * if (event.eventType === 'transcription.completed') {
6765
6752
  * console.log('Transcript:', event.data?.text);
6753
+ * console.log('Utterances:', event.data?.utterances);
6766
6754
  * }
6767
6755
  * ```
6768
6756
  */
6769
6757
  declare class GladiaWebhookHandler extends BaseWebhookHandler {
6770
6758
  readonly provider: TranscriptionProvider;
6759
+ /**
6760
+ * Convert Gladia WordDTO to unified Word type
6761
+ */
6762
+ private mapWord;
6763
+ /**
6764
+ * Convert Gladia UtteranceDTO to unified Utterance type
6765
+ */
6766
+ private mapUtterance;
6771
6767
  /**
6772
6768
  * Check if payload matches Gladia webhook format
6769
+ *
6770
+ * Gladia callbacks have the structure:
6771
+ * - { id, event: "transcription.success", payload: TranscriptionResultDTO, custom_metadata? }
6772
+ * - { id, event: "transcription.error", error: ErrorDTO, custom_metadata? }
6773
6773
  */
6774
6774
  matches(payload: unknown, _options?: {
6775
6775
  queryParams?: Record<string, string>;
package/dist/index.d.ts CHANGED
@@ -4978,9 +4978,6 @@ declare const StreamingSupportedBitDepthEnum: {
4978
4978
  /**
4979
4979
  * ListenV1EncodingParameter type definition
4980
4980
  */
4981
- /**
4982
- * ListenV1EncodingParameter type definition
4983
- */
4984
4981
  type ListenV1EncodingParameter = typeof ListenV1EncodingParameter[keyof typeof ListenV1EncodingParameter];
4985
4982
  declare const ListenV1EncodingParameter: {
4986
4983
  readonly linear16: "linear16";
@@ -5412,9 +5409,6 @@ declare function createVoiceRouter(config: VoiceRouterConfig, adapters?: Transcr
5412
5409
  /**
5413
5410
  * SpeakV1EncodingParameter type definition
5414
5411
  */
5415
- /**
5416
- * SpeakV1EncodingParameter type definition
5417
- */
5418
5412
  type SpeakV1EncodingParameter = typeof SpeakV1EncodingParameter[keyof typeof SpeakV1EncodingParameter];
5419
5413
  declare const SpeakV1EncodingParameter: {
5420
5414
  readonly linear16: "linear16";
@@ -5452,9 +5446,6 @@ declare const SpeakV1EncodingParameter: {
5452
5446
  /**
5453
5447
  * SpeakV1ContainerParameter type definition
5454
5448
  */
5455
- /**
5456
- * SpeakV1ContainerParameter type definition
5457
- */
5458
5449
  type SpeakV1ContainerParameter = typeof SpeakV1ContainerParameter[keyof typeof SpeakV1ContainerParameter];
5459
5450
  declare const SpeakV1ContainerParameter: {
5460
5451
  readonly none: "none";
@@ -5491,9 +5482,6 @@ declare const SpeakV1ContainerParameter: {
5491
5482
  /**
5492
5483
  * SpeakV1SampleRateParameter type definition
5493
5484
  */
5494
- /**
5495
- * SpeakV1SampleRateParameter type definition
5496
- */
5497
5485
  type SpeakV1SampleRateParameter = typeof SpeakV1SampleRateParameter[keyof typeof SpeakV1SampleRateParameter];
5498
5486
  declare const SpeakV1SampleRateParameter: {
5499
5487
  readonly NUMBER_16000: 16000;
@@ -6740,8 +6728,7 @@ declare abstract class BaseWebhookHandler {
6740
6728
  * Gladia webhook handler
6741
6729
  *
6742
6730
  * Handles webhook callbacks from Gladia API:
6743
- * - transcription.created - Job created and queued
6744
- * - transcription.success - Job completed successfully
6731
+ * - transcription.success - Job completed successfully (includes full transcript)
6745
6732
  * - transcription.error - Job failed with error
6746
6733
  *
6747
6734
  * @example
@@ -6763,13 +6750,26 @@ declare abstract class BaseWebhookHandler {
6763
6750
  *
6764
6751
  * if (event.eventType === 'transcription.completed') {
6765
6752
  * console.log('Transcript:', event.data?.text);
6753
+ * console.log('Utterances:', event.data?.utterances);
6766
6754
  * }
6767
6755
  * ```
6768
6756
  */
6769
6757
  declare class GladiaWebhookHandler extends BaseWebhookHandler {
6770
6758
  readonly provider: TranscriptionProvider;
6759
+ /**
6760
+ * Convert Gladia WordDTO to unified Word type
6761
+ */
6762
+ private mapWord;
6763
+ /**
6764
+ * Convert Gladia UtteranceDTO to unified Utterance type
6765
+ */
6766
+ private mapUtterance;
6771
6767
  /**
6772
6768
  * Check if payload matches Gladia webhook format
6769
+ *
6770
+ * Gladia callbacks have the structure:
6771
+ * - { id, event: "transcription.success", payload: TranscriptionResultDTO, custom_metadata? }
6772
+ * - { id, event: "transcription.error", error: ErrorDTO, custom_metadata? }
6773
6773
  */
6774
6774
  matches(payload: unknown, _options?: {
6775
6775
  queryParams?: Record<string, string>;
package/dist/index.js CHANGED
@@ -2896,6 +2896,15 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2896
2896
  });
2897
2897
  let sessionStatus = "connecting";
2898
2898
  const sessionId = `assemblyai-${Date.now()}-${Math.random().toString(36).substring(7)}`;
2899
+ let audioBuffer = Buffer.alloc(0);
2900
+ const MIN_CHUNK_SIZE = 1600;
2901
+ const MAX_CHUNK_SIZE = 32e3;
2902
+ const flushAudioBuffer = () => {
2903
+ if (audioBuffer.length > 0 && ws.readyState === import_ws2.default.OPEN) {
2904
+ ws.send(audioBuffer);
2905
+ audioBuffer = Buffer.alloc(0);
2906
+ }
2907
+ };
2899
2908
  ws.on("open", () => {
2900
2909
  sessionStatus = "open";
2901
2910
  callbacks?.onOpen?.();
@@ -2984,8 +2993,13 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2984
2993
  if (ws.readyState !== import_ws2.default.OPEN) {
2985
2994
  throw new Error("WebSocket is not open");
2986
2995
  }
2987
- ws.send(chunk.data);
2996
+ audioBuffer = Buffer.concat([audioBuffer, chunk.data]);
2997
+ if (audioBuffer.length >= MIN_CHUNK_SIZE || audioBuffer.length >= MAX_CHUNK_SIZE) {
2998
+ ws.send(audioBuffer);
2999
+ audioBuffer = Buffer.alloc(0);
3000
+ }
2988
3001
  if (chunk.isLast) {
3002
+ flushAudioBuffer();
2989
3003
  ws.send(
2990
3004
  JSON.stringify({
2991
3005
  terminate_session: true
@@ -2998,6 +3012,7 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2998
3012
  return;
2999
3013
  }
3000
3014
  sessionStatus = "closing";
3015
+ flushAudioBuffer();
3001
3016
  if (ws.readyState === import_ws2.default.OPEN) {
3002
3017
  ws.send(
3003
3018
  JSON.stringify({
@@ -4338,15 +4353,46 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
4338
4353
  super(...arguments);
4339
4354
  this.provider = "gladia";
4340
4355
  }
4356
+ /**
4357
+ * Convert Gladia WordDTO to unified Word type
4358
+ */
4359
+ mapWord(word) {
4360
+ return {
4361
+ text: word.word,
4362
+ start: word.start,
4363
+ end: word.end,
4364
+ confidence: word.confidence
4365
+ };
4366
+ }
4367
+ /**
4368
+ * Convert Gladia UtteranceDTO to unified Utterance type
4369
+ */
4370
+ mapUtterance(utterance) {
4371
+ return {
4372
+ text: utterance.text,
4373
+ start: utterance.start,
4374
+ end: utterance.end,
4375
+ confidence: utterance.confidence,
4376
+ speaker: utterance.speaker !== void 0 ? String(utterance.speaker) : void 0,
4377
+ words: utterance.words?.map((w) => this.mapWord(w))
4378
+ };
4379
+ }
4341
4380
  /**
4342
4381
  * Check if payload matches Gladia webhook format
4382
+ *
4383
+ * Gladia callbacks have the structure:
4384
+ * - { id, event: "transcription.success", payload: TranscriptionResultDTO, custom_metadata? }
4385
+ * - { id, event: "transcription.error", error: ErrorDTO, custom_metadata? }
4343
4386
  */
4344
4387
  matches(payload, _options) {
4345
4388
  if (!payload || typeof payload !== "object") {
4346
4389
  return false;
4347
4390
  }
4348
4391
  const obj = payload;
4349
- if (!("event" in obj) || !("payload" in obj)) {
4392
+ if (!("id" in obj) || !("event" in obj)) {
4393
+ return false;
4394
+ }
4395
+ if (typeof obj.id !== "string") {
4350
4396
  return false;
4351
4397
  }
4352
4398
  if (typeof obj.event !== "string") {
@@ -4355,11 +4401,13 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
4355
4401
  if (!obj.event.startsWith("transcription.")) {
4356
4402
  return false;
4357
4403
  }
4358
- if (!obj.payload || typeof obj.payload !== "object") {
4404
+ if (obj.event === "transcription.success" && !("payload" in obj)) {
4405
+ return false;
4406
+ }
4407
+ if (obj.event === "transcription.error" && !("error" in obj)) {
4359
4408
  return false;
4360
4409
  }
4361
- const payloadObj = obj.payload;
4362
- return typeof payloadObj.id === "string";
4410
+ return true;
4363
4411
  }
4364
4412
  /**
4365
4413
  * Parse Gladia webhook payload to unified format
@@ -4368,38 +4416,57 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
4368
4416
  if (!this.matches(payload)) {
4369
4417
  return this.createErrorEvent(payload, "Invalid Gladia webhook payload");
4370
4418
  }
4371
- const webhookPayload = payload;
4372
- const jobId = webhookPayload.payload.id;
4373
- const event = webhookPayload.event;
4374
- if (event === "transcription.created") {
4375
- return {
4376
- success: true,
4377
- provider: this.provider,
4378
- eventType: "transcription.created",
4379
- data: {
4380
- id: jobId,
4381
- status: "queued"
4382
- },
4383
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
4384
- raw: payload
4385
- };
4386
- }
4419
+ const obj = payload;
4420
+ const jobId = obj.id;
4421
+ const event = obj.event;
4387
4422
  if (event === "transcription.success") {
4423
+ const successPayload = payload;
4424
+ const result = successPayload.payload;
4425
+ const transcription = result.transcription;
4426
+ const metadata = result.metadata;
4427
+ const utterances = transcription?.utterances?.map(
4428
+ (u) => this.mapUtterance(u)
4429
+ );
4430
+ const words = transcription?.utterances?.flatMap(
4431
+ (u) => u.words?.map((w) => this.mapWord(w)) ?? []
4432
+ );
4433
+ const speakerIds = /* @__PURE__ */ new Set();
4434
+ transcription?.utterances?.forEach((u) => {
4435
+ if (u.speaker !== void 0) {
4436
+ speakerIds.add(u.speaker);
4437
+ }
4438
+ });
4439
+ const speakers = speakerIds.size > 0 ? Array.from(speakerIds).map((id) => ({ id: String(id) })) : void 0;
4440
+ const summary = result.summarization?.success && result.summarization.results ? result.summarization.results : void 0;
4388
4441
  return {
4389
4442
  success: true,
4390
4443
  provider: this.provider,
4391
4444
  eventType: "transcription.completed",
4392
4445
  data: {
4393
4446
  id: jobId,
4394
- status: "completed"
4395
- // Note: Full transcript data needs to be fetched via API
4396
- // using GladiaAdapter.getTranscript(jobId)
4447
+ status: "completed",
4448
+ text: transcription?.full_transcript,
4449
+ duration: metadata?.audio_duration,
4450
+ language: transcription?.languages?.[0],
4451
+ speakers,
4452
+ words,
4453
+ utterances,
4454
+ summary,
4455
+ metadata: {
4456
+ transcription_time: metadata?.transcription_time,
4457
+ billing_time: metadata?.billing_time,
4458
+ number_of_distinct_channels: metadata?.number_of_distinct_channels,
4459
+ custom_metadata: successPayload.custom_metadata
4460
+ },
4461
+ completedAt: (/* @__PURE__ */ new Date()).toISOString()
4397
4462
  },
4398
4463
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
4399
4464
  raw: payload
4400
4465
  };
4401
4466
  }
4402
4467
  if (event === "transcription.error") {
4468
+ const errorPayload = payload;
4469
+ const error = errorPayload.error;
4403
4470
  return {
4404
4471
  success: false,
4405
4472
  provider: this.provider,
@@ -4407,7 +4474,11 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
4407
4474
  data: {
4408
4475
  id: jobId,
4409
4476
  status: "error",
4410
- error: "Transcription failed"
4477
+ error: error?.message || "Transcription failed",
4478
+ metadata: {
4479
+ error_code: error?.code,
4480
+ custom_metadata: errorPayload.custom_metadata
4481
+ }
4411
4482
  },
4412
4483
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
4413
4484
  raw: payload