voice-router-dev 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1935,12 +1935,27 @@ var GladiaAdapter = class extends BaseAdapter {
1935
1935
  utterances: this.extractUtterances(transcription),
1936
1936
  summary: result?.summarization?.results || void 0,
1937
1937
  metadata: {
1938
- requestParams: response.request_params,
1939
- customMetadata: response.custom_metadata
1938
+ requestParams: response.request_params
1940
1939
  },
1941
1940
  createdAt: response.created_at,
1942
1941
  completedAt: response.completed_at || void 0
1943
1942
  },
1943
+ // Extended data - fully typed from OpenAPI specs
1944
+ extended: {
1945
+ translation: result?.translation || void 0,
1946
+ moderation: result?.moderation || void 0,
1947
+ entities: result?.named_entity_recognition || void 0,
1948
+ sentiment: result?.sentiment_analysis || void 0,
1949
+ audioToLlm: result?.audio_to_llm || void 0,
1950
+ chapters: result?.chapterization || void 0,
1951
+ speakerReidentification: result?.speaker_reidentification || void 0,
1952
+ structuredData: result?.structured_data_extraction || void 0,
1953
+ customMetadata: response.custom_metadata || void 0
1954
+ },
1955
+ // Request tracking
1956
+ tracking: {
1957
+ requestId: response.request_id
1958
+ },
1944
1959
  raw: response
1945
1960
  };
1946
1961
  }
@@ -1968,7 +1983,7 @@ var GladiaAdapter = class extends BaseAdapter {
1968
1983
  }))
1969
1984
  );
1970
1985
  return extractWords(allWords, (item) => ({
1971
- text: item.word.word,
1986
+ word: item.word.word,
1972
1987
  start: item.word.start,
1973
1988
  end: item.word.end,
1974
1989
  confidence: item.word.confidence,
@@ -1988,11 +2003,11 @@ var GladiaAdapter = class extends BaseAdapter {
1988
2003
  end: utterance.end,
1989
2004
  speaker: utterance.speaker?.toString(),
1990
2005
  confidence: utterance.confidence,
1991
- words: utterance.words.map((word) => ({
1992
- text: word.word,
1993
- start: word.start,
1994
- end: word.end,
1995
- confidence: word.confidence
2006
+ words: utterance.words.map((w) => ({
2007
+ word: w.word,
2008
+ start: w.start,
2009
+ end: w.end,
2010
+ confidence: w.confidence
1996
2011
  }))
1997
2012
  }));
1998
2013
  }
@@ -2124,11 +2139,11 @@ var GladiaAdapter = class extends BaseAdapter {
2124
2139
  text: utterance.text,
2125
2140
  isFinal: messageData.is_final,
2126
2141
  confidence: utterance.confidence,
2127
- words: utterance.words.map((word) => ({
2128
- text: word.word,
2129
- start: word.start,
2130
- end: word.end,
2131
- confidence: word.confidence
2142
+ words: utterance.words.map((w) => ({
2143
+ word: w.word,
2144
+ start: w.start,
2145
+ end: w.end,
2146
+ confidence: w.confidence
2132
2147
  })),
2133
2148
  data: message
2134
2149
  });
@@ -2142,11 +2157,11 @@ var GladiaAdapter = class extends BaseAdapter {
2142
2157
  end: utterance.end,
2143
2158
  speaker: utterance.speaker?.toString(),
2144
2159
  confidence: utterance.confidence,
2145
- words: utterance.words.map((word) => ({
2146
- text: word.word,
2147
- start: word.start,
2148
- end: word.end,
2149
- confidence: word.confidence
2160
+ words: utterance.words.map((w) => ({
2161
+ word: w.word,
2162
+ start: w.start,
2163
+ end: w.end,
2164
+ confidence: w.confidence
2150
2165
  }))
2151
2166
  };
2152
2167
  callbacks?.onUtterance?.(utteranceData);
@@ -2796,18 +2811,29 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2796
2811
  status,
2797
2812
  language: response.language_code,
2798
2813
  duration: response.audio_duration ? response.audio_duration / 1e3 : void 0,
2799
- // Convert ms to seconds
2800
2814
  speakers: this.extractSpeakers(response),
2801
2815
  words: this.extractWords(response),
2802
2816
  utterances: this.extractUtterances(response),
2803
2817
  summary: response.summary || void 0,
2804
2818
  metadata: {
2805
- audioUrl: response.audio_url,
2806
- entities: response.entities,
2807
- sentimentAnalysis: response.sentiment_analysis_results,
2808
- contentModeration: response.content_safety_labels
2819
+ audioUrl: response.audio_url
2809
2820
  }
2810
2821
  },
2822
+ // Extended data - fully typed from OpenAPI specs
2823
+ extended: {
2824
+ chapters: response.chapters || void 0,
2825
+ entities: response.entities || void 0,
2826
+ sentimentResults: response.sentiment_analysis_results || void 0,
2827
+ highlights: response.auto_highlights_result || void 0,
2828
+ contentSafety: response.content_safety_labels || void 0,
2829
+ topics: response.iab_categories_result || void 0,
2830
+ languageConfidence: response.language_confidence ?? void 0,
2831
+ throttled: response.throttled ?? void 0
2832
+ },
2833
+ // Request tracking
2834
+ tracking: {
2835
+ requestId: response.id
2836
+ },
2811
2837
  raw: response
2812
2838
  };
2813
2839
  }
@@ -2840,14 +2866,14 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2840
2866
  if (!transcript.words || transcript.words.length === 0) {
2841
2867
  return void 0;
2842
2868
  }
2843
- return transcript.words.map((word) => ({
2844
- text: word.text,
2845
- start: word.start / 1e3,
2869
+ return transcript.words.map((w) => ({
2870
+ word: w.text,
2871
+ start: w.start / 1e3,
2846
2872
  // Convert ms to seconds
2847
- end: word.end / 1e3,
2873
+ end: w.end / 1e3,
2848
2874
  // Convert ms to seconds
2849
- confidence: word.confidence,
2850
- speaker: word.speaker || void 0
2875
+ confidence: w.confidence,
2876
+ speaker: w.speaker || void 0
2851
2877
  }));
2852
2878
  }
2853
2879
  /**
@@ -2865,11 +2891,11 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2865
2891
  // Convert ms to seconds
2866
2892
  speaker: utterance.speaker || void 0,
2867
2893
  confidence: utterance.confidence,
2868
- words: utterance.words.map((word) => ({
2869
- text: word.text,
2870
- start: word.start / 1e3,
2871
- end: word.end / 1e3,
2872
- confidence: word.confidence
2894
+ words: utterance.words.map((w) => ({
2895
+ word: w.text,
2896
+ start: w.start / 1e3,
2897
+ end: w.end / 1e3,
2898
+ confidence: w.confidence
2873
2899
  }))
2874
2900
  }));
2875
2901
  }
@@ -2962,12 +2988,12 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2962
2988
  text: turnMsg.transcript,
2963
2989
  isFinal: turnMsg.end_of_turn,
2964
2990
  confidence: turnMsg.end_of_turn_confidence,
2965
- words: turnMsg.words.map((word) => ({
2966
- text: word.text,
2967
- start: word.start / 1e3,
2991
+ words: turnMsg.words.map((w) => ({
2992
+ word: w.text,
2993
+ start: w.start / 1e3,
2968
2994
  // Convert ms to seconds
2969
- end: word.end / 1e3,
2970
- confidence: word.confidence
2995
+ end: w.end / 1e3,
2996
+ confidence: w.confidence
2971
2997
  })),
2972
2998
  data: turnMsg
2973
2999
  });
@@ -3278,14 +3304,20 @@ var DeepgramAdapter = class extends BaseAdapter {
3278
3304
  speakers: this.extractSpeakers(response),
3279
3305
  words: this.extractWords(alternative),
3280
3306
  utterances: this.extractUtterances(response),
3281
- summary: this.extractSummary(alternative),
3282
- metadata: {
3283
- modelInfo: response.metadata?.model_info,
3284
- channels: response.metadata?.channels,
3285
- sentiment: response.results.sentiments,
3286
- intents: response.results.intents,
3287
- topics: response.results.topics
3288
- }
3307
+ summary: this.extractSummary(alternative)
3308
+ },
3309
+ // Extended data - fully typed from OpenAPI specs
3310
+ extended: {
3311
+ metadata: response.metadata,
3312
+ requestId: response.metadata?.request_id,
3313
+ sha256: response.metadata?.sha256,
3314
+ modelInfo: response.metadata?.model_info,
3315
+ tags: response.metadata?.tags
3316
+ },
3317
+ // Request tracking
3318
+ tracking: {
3319
+ requestId: response.metadata?.request_id,
3320
+ audioHash: response.metadata?.sha256
3289
3321
  },
3290
3322
  raw: response
3291
3323
  };
@@ -3320,11 +3352,11 @@ var DeepgramAdapter = class extends BaseAdapter {
3320
3352
  return void 0;
3321
3353
  }
3322
3354
  return alternative.words.map(
3323
- (word) => ({
3324
- text: word.word || "",
3325
- start: word.start || 0,
3326
- end: word.end || 0,
3327
- confidence: word.confidence,
3355
+ (w) => ({
3356
+ word: w.word || "",
3357
+ start: w.start || 0,
3358
+ end: w.end || 0,
3359
+ confidence: w.confidence,
3328
3360
  speaker: void 0
3329
3361
  // Speaker info is at utterance level, not word level
3330
3362
  })
@@ -3344,11 +3376,11 @@ var DeepgramAdapter = class extends BaseAdapter {
3344
3376
  end: utterance.end || 0,
3345
3377
  speaker: utterance.speaker?.toString(),
3346
3378
  confidence: utterance.confidence,
3347
- words: utterance.words?.map((word) => ({
3348
- text: word.word || "",
3349
- start: word.start || 0,
3350
- end: word.end || 0,
3351
- confidence: word.confidence
3379
+ words: utterance.words?.map((w) => ({
3380
+ word: w.word || "",
3381
+ start: w.start || 0,
3382
+ end: w.end || 0,
3383
+ confidence: w.confidence
3352
3384
  }))
3353
3385
  }));
3354
3386
  }
@@ -3439,11 +3471,11 @@ var DeepgramAdapter = class extends BaseAdapter {
3439
3471
  if (channel) {
3440
3472
  const transcript = channel.transcript;
3441
3473
  const isFinal = message.is_final;
3442
- const words = channel.words?.map((word) => ({
3443
- text: word.word,
3444
- start: word.start,
3445
- end: word.end,
3446
- confidence: word.confidence
3474
+ const words = channel.words?.map((w) => ({
3475
+ word: w.word,
3476
+ start: w.start,
3477
+ end: w.end,
3478
+ confidence: w.confidence
3447
3479
  }));
3448
3480
  callbacks?.onTranscript?.({
3449
3481
  type: "transcript",
@@ -3784,12 +3816,12 @@ var AzureSTTAdapter = class extends BaseAdapter {
3784
3816
  const recognizedPhrases = transcriptionData.recognizedPhrases || [];
3785
3817
  const fullText = combinedPhrases.map((phrase) => phrase.display || phrase.lexical).join(" ") || "";
3786
3818
  const words = recognizedPhrases.flatMap(
3787
- (phrase) => (phrase.nBest?.[0]?.words || []).map((word) => ({
3788
- text: word.word,
3789
- start: word.offsetInTicks / 1e7,
3819
+ (phrase) => (phrase.nBest?.[0]?.words || []).map((w) => ({
3820
+ word: w.word,
3821
+ start: w.offsetInTicks / 1e7,
3790
3822
  // Convert ticks to seconds
3791
- end: (word.offsetInTicks + word.durationInTicks) / 1e7,
3792
- confidence: word.confidence,
3823
+ end: (w.offsetInTicks + w.durationInTicks) / 1e7,
3824
+ confidence: w.confidence,
3793
3825
  speaker: phrase.speaker !== void 0 ? phrase.speaker.toString() : void 0
3794
3826
  }))
3795
3827
  );
@@ -3801,11 +3833,12 @@ var AzureSTTAdapter = class extends BaseAdapter {
3801
3833
  id: String(speakerId),
3802
3834
  label: `Speaker ${speakerId}`
3803
3835
  })) : void 0;
3836
+ const transcriptionId = transcription.self?.split("/").pop() || "";
3804
3837
  return {
3805
3838
  success: true,
3806
3839
  provider: this.name,
3807
3840
  data: {
3808
- id: transcription.self?.split("/").pop() || "",
3841
+ id: transcriptionId,
3809
3842
  text: fullText,
3810
3843
  confidence: recognizedPhrases[0]?.nBest?.[0]?.confidence,
3811
3844
  status: "completed",
@@ -3816,6 +3849,10 @@ var AzureSTTAdapter = class extends BaseAdapter {
3816
3849
  createdAt: transcription.createdDateTime,
3817
3850
  completedAt: transcription.lastActionDateTime
3818
3851
  },
3852
+ extended: {},
3853
+ tracking: {
3854
+ requestId: transcriptionId
3855
+ },
3819
3856
  raw: {
3820
3857
  transcription,
3821
3858
  transcriptionData
@@ -4010,16 +4047,21 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
4010
4047
  */
4011
4048
  normalizeResponse(response, model, isDiarization) {
4012
4049
  if ("text" in response && Object.keys(response).length === 1) {
4050
+ const requestId2 = `openai-${Date.now()}`;
4013
4051
  return {
4014
4052
  success: true,
4015
4053
  provider: this.name,
4016
4054
  data: {
4017
- id: `openai-${Date.now()}`,
4055
+ id: requestId2,
4018
4056
  text: response.text,
4019
4057
  status: "completed",
4020
4058
  language: void 0,
4021
4059
  confidence: void 0
4022
4060
  },
4061
+ extended: {},
4062
+ tracking: {
4063
+ requestId: requestId2
4064
+ },
4023
4065
  raw: response
4024
4066
  };
4025
4067
  }
@@ -4038,11 +4080,12 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
4038
4080
  end: segment.end,
4039
4081
  confidence: void 0
4040
4082
  }));
4083
+ const requestId2 = `openai-${Date.now()}`;
4041
4084
  return {
4042
4085
  success: true,
4043
4086
  provider: this.name,
4044
4087
  data: {
4045
- id: `openai-${Date.now()}`,
4088
+ id: requestId2,
4046
4089
  text: diarizedResponse.text,
4047
4090
  status: "completed",
4048
4091
  language: void 0,
@@ -4050,39 +4093,53 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
4050
4093
  speakers,
4051
4094
  utterances
4052
4095
  },
4096
+ extended: {},
4097
+ tracking: {
4098
+ requestId: requestId2
4099
+ },
4053
4100
  raw: response
4054
4101
  };
4055
4102
  }
4056
4103
  if ("duration" in response && "language" in response) {
4057
4104
  const verboseResponse = response;
4058
- const words = verboseResponse.words?.map((word) => ({
4059
- text: word.word,
4060
- start: word.start,
4061
- end: word.end,
4105
+ const words = verboseResponse.words?.map((w) => ({
4106
+ word: w.word,
4107
+ start: w.start,
4108
+ end: w.end,
4062
4109
  confidence: void 0
4063
4110
  }));
4111
+ const requestId2 = `openai-${Date.now()}`;
4064
4112
  return {
4065
4113
  success: true,
4066
4114
  provider: this.name,
4067
4115
  data: {
4068
- id: `openai-${Date.now()}`,
4116
+ id: requestId2,
4069
4117
  text: verboseResponse.text,
4070
4118
  status: "completed",
4071
4119
  language: verboseResponse.language,
4072
4120
  duration: verboseResponse.duration,
4073
4121
  words
4074
4122
  },
4123
+ extended: {},
4124
+ tracking: {
4125
+ requestId: requestId2
4126
+ },
4075
4127
  raw: response
4076
4128
  };
4077
4129
  }
4130
+ const requestId = `openai-${Date.now()}`;
4078
4131
  return {
4079
4132
  success: true,
4080
4133
  provider: this.name,
4081
4134
  data: {
4082
- id: `openai-${Date.now()}`,
4135
+ id: requestId,
4083
4136
  text: "text" in response ? response.text : "",
4084
4137
  status: "completed"
4085
4138
  },
4139
+ extended: {},
4140
+ tracking: {
4141
+ requestId
4142
+ },
4086
4143
  raw: response
4087
4144
  };
4088
4145
  }
@@ -4314,7 +4371,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
4314
4371
  normalizeResponse(response) {
4315
4372
  const text = response.results.filter((r) => r.type === "word" && r.alternatives).map((r) => r.alternatives[0]?.content || "").join(" ");
4316
4373
  const words = response.results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
4317
- text: result.alternatives?.[0]?.content || "",
4374
+ word: result.alternatives?.[0]?.content || "",
4318
4375
  start: result.start_time,
4319
4376
  end: result.end_time,
4320
4377
  confidence: result.alternatives?.[0]?.confidence,
@@ -4381,6 +4438,10 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
4381
4438
  summary: response.summary?.content,
4382
4439
  createdAt: response.job.created_at
4383
4440
  },
4441
+ extended: {},
4442
+ tracking: {
4443
+ requestId: response.job.id
4444
+ },
4384
4445
  raw: response
4385
4446
  };
4386
4447
  }
@@ -4461,12 +4522,12 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
4461
4522
  /**
4462
4523
  * Convert Gladia WordDTO to unified Word type
4463
4524
  */
4464
- mapWord(word) {
4525
+ mapWord(w) {
4465
4526
  return {
4466
- text: word.word,
4467
- start: word.start,
4468
- end: word.end,
4469
- confidence: word.confidence
4527
+ word: w.word,
4528
+ start: w.start,
4529
+ end: w.end,
4530
+ confidence: w.confidence
4470
4531
  };
4471
4532
  }
4472
4533
  /**
@@ -4804,11 +4865,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
4804
4865
  raw: payload
4805
4866
  };
4806
4867
  }
4807
- const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((word) => ({
4808
- text: word.word || "",
4809
- start: word.start || 0,
4810
- end: word.end || 0,
4811
- confidence: word.confidence
4868
+ const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((w) => ({
4869
+ word: w.word || "",
4870
+ start: w.start || 0,
4871
+ end: w.end || 0,
4872
+ confidence: w.confidence
4812
4873
  })) : void 0;
4813
4874
  const speakers = response.results.utterances && response.results.utterances.length > 0 ? response.results.utterances.map((utterance) => ({
4814
4875
  id: utterance.speaker?.toString() || "unknown",
@@ -4822,11 +4883,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
4822
4883
  end: utterance.end || 0,
4823
4884
  speaker: utterance.speaker?.toString(),
4824
4885
  confidence: utterance.confidence,
4825
- words: utterance.words && utterance.words.length > 0 ? utterance.words.map((word) => ({
4826
- text: word.word || "",
4827
- start: word.start || 0,
4828
- end: word.end || 0,
4829
- confidence: word.confidence
4886
+ words: utterance.words && utterance.words.length > 0 ? utterance.words.map((w) => ({
4887
+ word: w.word || "",
4888
+ start: w.start || 0,
4889
+ end: w.end || 0,
4890
+ confidence: w.confidence
4830
4891
  })) : void 0
4831
4892
  })) : void 0;
4832
4893
  const summary = alternative.summaries?.[0]?.summary;