voice-router-dev 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2000,12 +2000,27 @@ var GladiaAdapter = class extends BaseAdapter {
2000
2000
  utterances: this.extractUtterances(transcription),
2001
2001
  summary: result?.summarization?.results || void 0,
2002
2002
  metadata: {
2003
- requestParams: response.request_params,
2004
- customMetadata: response.custom_metadata
2003
+ requestParams: response.request_params
2005
2004
  },
2006
2005
  createdAt: response.created_at,
2007
2006
  completedAt: response.completed_at || void 0
2008
2007
  },
2008
+ // Extended data - fully typed from OpenAPI specs
2009
+ extended: {
2010
+ translation: result?.translation || void 0,
2011
+ moderation: result?.moderation || void 0,
2012
+ entities: result?.named_entity_recognition || void 0,
2013
+ sentiment: result?.sentiment_analysis || void 0,
2014
+ audioToLlm: result?.audio_to_llm || void 0,
2015
+ chapters: result?.chapterization || void 0,
2016
+ speakerReidentification: result?.speaker_reidentification || void 0,
2017
+ structuredData: result?.structured_data_extraction || void 0,
2018
+ customMetadata: response.custom_metadata || void 0
2019
+ },
2020
+ // Request tracking
2021
+ tracking: {
2022
+ requestId: response.request_id
2023
+ },
2009
2024
  raw: response
2010
2025
  };
2011
2026
  }
@@ -2033,7 +2048,7 @@ var GladiaAdapter = class extends BaseAdapter {
2033
2048
  }))
2034
2049
  );
2035
2050
  return extractWords(allWords, (item) => ({
2036
- text: item.word.word,
2051
+ word: item.word.word,
2037
2052
  start: item.word.start,
2038
2053
  end: item.word.end,
2039
2054
  confidence: item.word.confidence,
@@ -2053,11 +2068,11 @@ var GladiaAdapter = class extends BaseAdapter {
2053
2068
  end: utterance.end,
2054
2069
  speaker: utterance.speaker?.toString(),
2055
2070
  confidence: utterance.confidence,
2056
- words: utterance.words.map((word) => ({
2057
- text: word.word,
2058
- start: word.start,
2059
- end: word.end,
2060
- confidence: word.confidence
2071
+ words: utterance.words.map((w) => ({
2072
+ word: w.word,
2073
+ start: w.start,
2074
+ end: w.end,
2075
+ confidence: w.confidence
2061
2076
  }))
2062
2077
  }));
2063
2078
  }
@@ -2189,11 +2204,11 @@ var GladiaAdapter = class extends BaseAdapter {
2189
2204
  text: utterance.text,
2190
2205
  isFinal: messageData.is_final,
2191
2206
  confidence: utterance.confidence,
2192
- words: utterance.words.map((word) => ({
2193
- text: word.word,
2194
- start: word.start,
2195
- end: word.end,
2196
- confidence: word.confidence
2207
+ words: utterance.words.map((w) => ({
2208
+ word: w.word,
2209
+ start: w.start,
2210
+ end: w.end,
2211
+ confidence: w.confidence
2197
2212
  })),
2198
2213
  data: message
2199
2214
  });
@@ -2207,11 +2222,11 @@ var GladiaAdapter = class extends BaseAdapter {
2207
2222
  end: utterance.end,
2208
2223
  speaker: utterance.speaker?.toString(),
2209
2224
  confidence: utterance.confidence,
2210
- words: utterance.words.map((word) => ({
2211
- text: word.word,
2212
- start: word.start,
2213
- end: word.end,
2214
- confidence: word.confidence
2225
+ words: utterance.words.map((w) => ({
2226
+ word: w.word,
2227
+ start: w.start,
2228
+ end: w.end,
2229
+ confidence: w.confidence
2215
2230
  }))
2216
2231
  };
2217
2232
  callbacks?.onUtterance?.(utteranceData);
@@ -2861,18 +2876,29 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2861
2876
  status,
2862
2877
  language: response.language_code,
2863
2878
  duration: response.audio_duration ? response.audio_duration / 1e3 : void 0,
2864
- // Convert ms to seconds
2865
2879
  speakers: this.extractSpeakers(response),
2866
2880
  words: this.extractWords(response),
2867
2881
  utterances: this.extractUtterances(response),
2868
2882
  summary: response.summary || void 0,
2869
2883
  metadata: {
2870
- audioUrl: response.audio_url,
2871
- entities: response.entities,
2872
- sentimentAnalysis: response.sentiment_analysis_results,
2873
- contentModeration: response.content_safety_labels
2884
+ audioUrl: response.audio_url
2874
2885
  }
2875
2886
  },
2887
+ // Extended data - fully typed from OpenAPI specs
2888
+ extended: {
2889
+ chapters: response.chapters || void 0,
2890
+ entities: response.entities || void 0,
2891
+ sentimentResults: response.sentiment_analysis_results || void 0,
2892
+ highlights: response.auto_highlights_result || void 0,
2893
+ contentSafety: response.content_safety_labels || void 0,
2894
+ topics: response.iab_categories_result || void 0,
2895
+ languageConfidence: response.language_confidence ?? void 0,
2896
+ throttled: response.throttled ?? void 0
2897
+ },
2898
+ // Request tracking
2899
+ tracking: {
2900
+ requestId: response.id
2901
+ },
2876
2902
  raw: response
2877
2903
  };
2878
2904
  }
@@ -2905,14 +2931,14 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2905
2931
  if (!transcript.words || transcript.words.length === 0) {
2906
2932
  return void 0;
2907
2933
  }
2908
- return transcript.words.map((word) => ({
2909
- text: word.text,
2910
- start: word.start / 1e3,
2934
+ return transcript.words.map((w) => ({
2935
+ word: w.text,
2936
+ start: w.start / 1e3,
2911
2937
  // Convert ms to seconds
2912
- end: word.end / 1e3,
2938
+ end: w.end / 1e3,
2913
2939
  // Convert ms to seconds
2914
- confidence: word.confidence,
2915
- speaker: word.speaker || void 0
2940
+ confidence: w.confidence,
2941
+ speaker: w.speaker || void 0
2916
2942
  }));
2917
2943
  }
2918
2944
  /**
@@ -2930,11 +2956,11 @@ var AssemblyAIAdapter = class extends BaseAdapter {
2930
2956
  // Convert ms to seconds
2931
2957
  speaker: utterance.speaker || void 0,
2932
2958
  confidence: utterance.confidence,
2933
- words: utterance.words.map((word) => ({
2934
- text: word.text,
2935
- start: word.start / 1e3,
2936
- end: word.end / 1e3,
2937
- confidence: word.confidence
2959
+ words: utterance.words.map((w) => ({
2960
+ word: w.text,
2961
+ start: w.start / 1e3,
2962
+ end: w.end / 1e3,
2963
+ confidence: w.confidence
2938
2964
  }))
2939
2965
  }));
2940
2966
  }
@@ -3027,12 +3053,12 @@ var AssemblyAIAdapter = class extends BaseAdapter {
3027
3053
  text: turnMsg.transcript,
3028
3054
  isFinal: turnMsg.end_of_turn,
3029
3055
  confidence: turnMsg.end_of_turn_confidence,
3030
- words: turnMsg.words.map((word) => ({
3031
- text: word.text,
3032
- start: word.start / 1e3,
3056
+ words: turnMsg.words.map((w) => ({
3057
+ word: w.text,
3058
+ start: w.start / 1e3,
3033
3059
  // Convert ms to seconds
3034
- end: word.end / 1e3,
3035
- confidence: word.confidence
3060
+ end: w.end / 1e3,
3061
+ confidence: w.confidence
3036
3062
  })),
3037
3063
  data: turnMsg
3038
3064
  });
@@ -3343,14 +3369,20 @@ var DeepgramAdapter = class extends BaseAdapter {
3343
3369
  speakers: this.extractSpeakers(response),
3344
3370
  words: this.extractWords(alternative),
3345
3371
  utterances: this.extractUtterances(response),
3346
- summary: this.extractSummary(alternative),
3347
- metadata: {
3348
- modelInfo: response.metadata?.model_info,
3349
- channels: response.metadata?.channels,
3350
- sentiment: response.results.sentiments,
3351
- intents: response.results.intents,
3352
- topics: response.results.topics
3353
- }
3372
+ summary: this.extractSummary(alternative)
3373
+ },
3374
+ // Extended data - fully typed from OpenAPI specs
3375
+ extended: {
3376
+ metadata: response.metadata,
3377
+ requestId: response.metadata?.request_id,
3378
+ sha256: response.metadata?.sha256,
3379
+ modelInfo: response.metadata?.model_info,
3380
+ tags: response.metadata?.tags
3381
+ },
3382
+ // Request tracking
3383
+ tracking: {
3384
+ requestId: response.metadata?.request_id,
3385
+ audioHash: response.metadata?.sha256
3354
3386
  },
3355
3387
  raw: response
3356
3388
  };
@@ -3385,11 +3417,11 @@ var DeepgramAdapter = class extends BaseAdapter {
3385
3417
  return void 0;
3386
3418
  }
3387
3419
  return alternative.words.map(
3388
- (word) => ({
3389
- text: word.word || "",
3390
- start: word.start || 0,
3391
- end: word.end || 0,
3392
- confidence: word.confidence,
3420
+ (w) => ({
3421
+ word: w.word || "",
3422
+ start: w.start || 0,
3423
+ end: w.end || 0,
3424
+ confidence: w.confidence,
3393
3425
  speaker: void 0
3394
3426
  // Speaker info is at utterance level, not word level
3395
3427
  })
@@ -3409,11 +3441,11 @@ var DeepgramAdapter = class extends BaseAdapter {
3409
3441
  end: utterance.end || 0,
3410
3442
  speaker: utterance.speaker?.toString(),
3411
3443
  confidence: utterance.confidence,
3412
- words: utterance.words?.map((word) => ({
3413
- text: word.word || "",
3414
- start: word.start || 0,
3415
- end: word.end || 0,
3416
- confidence: word.confidence
3444
+ words: utterance.words?.map((w) => ({
3445
+ word: w.word || "",
3446
+ start: w.start || 0,
3447
+ end: w.end || 0,
3448
+ confidence: w.confidence
3417
3449
  }))
3418
3450
  }));
3419
3451
  }
@@ -3504,11 +3536,11 @@ var DeepgramAdapter = class extends BaseAdapter {
3504
3536
  if (channel) {
3505
3537
  const transcript = channel.transcript;
3506
3538
  const isFinal = message.is_final;
3507
- const words = channel.words?.map((word) => ({
3508
- text: word.word,
3509
- start: word.start,
3510
- end: word.end,
3511
- confidence: word.confidence
3539
+ const words = channel.words?.map((w) => ({
3540
+ word: w.word,
3541
+ start: w.start,
3542
+ end: w.end,
3543
+ confidence: w.confidence
3512
3544
  }));
3513
3545
  callbacks?.onTranscript?.({
3514
3546
  type: "transcript",
@@ -3849,12 +3881,12 @@ var AzureSTTAdapter = class extends BaseAdapter {
3849
3881
  const recognizedPhrases = transcriptionData.recognizedPhrases || [];
3850
3882
  const fullText = combinedPhrases.map((phrase) => phrase.display || phrase.lexical).join(" ") || "";
3851
3883
  const words = recognizedPhrases.flatMap(
3852
- (phrase) => (phrase.nBest?.[0]?.words || []).map((word) => ({
3853
- text: word.word,
3854
- start: word.offsetInTicks / 1e7,
3884
+ (phrase) => (phrase.nBest?.[0]?.words || []).map((w) => ({
3885
+ word: w.word,
3886
+ start: w.offsetInTicks / 1e7,
3855
3887
  // Convert ticks to seconds
3856
- end: (word.offsetInTicks + word.durationInTicks) / 1e7,
3857
- confidence: word.confidence,
3888
+ end: (w.offsetInTicks + w.durationInTicks) / 1e7,
3889
+ confidence: w.confidence,
3858
3890
  speaker: phrase.speaker !== void 0 ? phrase.speaker.toString() : void 0
3859
3891
  }))
3860
3892
  );
@@ -3866,11 +3898,12 @@ var AzureSTTAdapter = class extends BaseAdapter {
3866
3898
  id: String(speakerId),
3867
3899
  label: `Speaker ${speakerId}`
3868
3900
  })) : void 0;
3901
+ const transcriptionId = transcription.self?.split("/").pop() || "";
3869
3902
  return {
3870
3903
  success: true,
3871
3904
  provider: this.name,
3872
3905
  data: {
3873
- id: transcription.self?.split("/").pop() || "",
3906
+ id: transcriptionId,
3874
3907
  text: fullText,
3875
3908
  confidence: recognizedPhrases[0]?.nBest?.[0]?.confidence,
3876
3909
  status: "completed",
@@ -3881,6 +3914,10 @@ var AzureSTTAdapter = class extends BaseAdapter {
3881
3914
  createdAt: transcription.createdDateTime,
3882
3915
  completedAt: transcription.lastActionDateTime
3883
3916
  },
3917
+ extended: {},
3918
+ tracking: {
3919
+ requestId: transcriptionId
3920
+ },
3884
3921
  raw: {
3885
3922
  transcription,
3886
3923
  transcriptionData
@@ -4075,16 +4112,21 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
4075
4112
  */
4076
4113
  normalizeResponse(response, model, isDiarization) {
4077
4114
  if ("text" in response && Object.keys(response).length === 1) {
4115
+ const requestId2 = `openai-${Date.now()}`;
4078
4116
  return {
4079
4117
  success: true,
4080
4118
  provider: this.name,
4081
4119
  data: {
4082
- id: `openai-${Date.now()}`,
4120
+ id: requestId2,
4083
4121
  text: response.text,
4084
4122
  status: "completed",
4085
4123
  language: void 0,
4086
4124
  confidence: void 0
4087
4125
  },
4126
+ extended: {},
4127
+ tracking: {
4128
+ requestId: requestId2
4129
+ },
4088
4130
  raw: response
4089
4131
  };
4090
4132
  }
@@ -4103,11 +4145,12 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
4103
4145
  end: segment.end,
4104
4146
  confidence: void 0
4105
4147
  }));
4148
+ const requestId2 = `openai-${Date.now()}`;
4106
4149
  return {
4107
4150
  success: true,
4108
4151
  provider: this.name,
4109
4152
  data: {
4110
- id: `openai-${Date.now()}`,
4153
+ id: requestId2,
4111
4154
  text: diarizedResponse.text,
4112
4155
  status: "completed",
4113
4156
  language: void 0,
@@ -4115,39 +4158,53 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
4115
4158
  speakers,
4116
4159
  utterances
4117
4160
  },
4161
+ extended: {},
4162
+ tracking: {
4163
+ requestId: requestId2
4164
+ },
4118
4165
  raw: response
4119
4166
  };
4120
4167
  }
4121
4168
  if ("duration" in response && "language" in response) {
4122
4169
  const verboseResponse = response;
4123
- const words = verboseResponse.words?.map((word) => ({
4124
- text: word.word,
4125
- start: word.start,
4126
- end: word.end,
4170
+ const words = verboseResponse.words?.map((w) => ({
4171
+ word: w.word,
4172
+ start: w.start,
4173
+ end: w.end,
4127
4174
  confidence: void 0
4128
4175
  }));
4176
+ const requestId2 = `openai-${Date.now()}`;
4129
4177
  return {
4130
4178
  success: true,
4131
4179
  provider: this.name,
4132
4180
  data: {
4133
- id: `openai-${Date.now()}`,
4181
+ id: requestId2,
4134
4182
  text: verboseResponse.text,
4135
4183
  status: "completed",
4136
4184
  language: verboseResponse.language,
4137
4185
  duration: verboseResponse.duration,
4138
4186
  words
4139
4187
  },
4188
+ extended: {},
4189
+ tracking: {
4190
+ requestId: requestId2
4191
+ },
4140
4192
  raw: response
4141
4193
  };
4142
4194
  }
4195
+ const requestId = `openai-${Date.now()}`;
4143
4196
  return {
4144
4197
  success: true,
4145
4198
  provider: this.name,
4146
4199
  data: {
4147
- id: `openai-${Date.now()}`,
4200
+ id: requestId,
4148
4201
  text: "text" in response ? response.text : "",
4149
4202
  status: "completed"
4150
4203
  },
4204
+ extended: {},
4205
+ tracking: {
4206
+ requestId
4207
+ },
4151
4208
  raw: response
4152
4209
  };
4153
4210
  }
@@ -4379,7 +4436,7 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
4379
4436
  normalizeResponse(response) {
4380
4437
  const text = response.results.filter((r) => r.type === "word" && r.alternatives).map((r) => r.alternatives[0]?.content || "").join(" ");
4381
4438
  const words = response.results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
4382
- text: result.alternatives?.[0]?.content || "",
4439
+ word: result.alternatives?.[0]?.content || "",
4383
4440
  start: result.start_time,
4384
4441
  end: result.end_time,
4385
4442
  confidence: result.alternatives?.[0]?.confidence,
@@ -4446,6 +4503,10 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
4446
4503
  summary: response.summary?.content,
4447
4504
  createdAt: response.job.created_at
4448
4505
  },
4506
+ extended: {},
4507
+ tracking: {
4508
+ requestId: response.job.id
4509
+ },
4449
4510
  raw: response
4450
4511
  };
4451
4512
  }
@@ -4526,12 +4587,12 @@ var GladiaWebhookHandler = class extends BaseWebhookHandler {
4526
4587
  /**
4527
4588
  * Convert Gladia WordDTO to unified Word type
4528
4589
  */
4529
- mapWord(word) {
4590
+ mapWord(w) {
4530
4591
  return {
4531
- text: word.word,
4532
- start: word.start,
4533
- end: word.end,
4534
- confidence: word.confidence
4592
+ word: w.word,
4593
+ start: w.start,
4594
+ end: w.end,
4595
+ confidence: w.confidence
4535
4596
  };
4536
4597
  }
4537
4598
  /**
@@ -4869,11 +4930,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
4869
4930
  raw: payload
4870
4931
  };
4871
4932
  }
4872
- const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((word) => ({
4873
- text: word.word || "",
4874
- start: word.start || 0,
4875
- end: word.end || 0,
4876
- confidence: word.confidence
4933
+ const words = alternative.words && alternative.words.length > 0 ? alternative.words.map((w) => ({
4934
+ word: w.word || "",
4935
+ start: w.start || 0,
4936
+ end: w.end || 0,
4937
+ confidence: w.confidence
4877
4938
  })) : void 0;
4878
4939
  const speakers = response.results.utterances && response.results.utterances.length > 0 ? response.results.utterances.map((utterance) => ({
4879
4940
  id: utterance.speaker?.toString() || "unknown",
@@ -4887,11 +4948,11 @@ var DeepgramWebhookHandler = class extends BaseWebhookHandler {
4887
4948
  end: utterance.end || 0,
4888
4949
  speaker: utterance.speaker?.toString(),
4889
4950
  confidence: utterance.confidence,
4890
- words: utterance.words && utterance.words.length > 0 ? utterance.words.map((word) => ({
4891
- text: word.word || "",
4892
- start: word.start || 0,
4893
- end: word.end || 0,
4894
- confidence: word.confidence
4951
+ words: utterance.words && utterance.words.length > 0 ? utterance.words.map((w) => ({
4952
+ word: w.word || "",
4953
+ start: w.start || 0,
4954
+ end: w.end || 0,
4955
+ confidence: w.confidence
4895
4956
  })) : void 0
4896
4957
  })) : void 0;
4897
4958
  const summary = alternative.summaries?.[0]?.summary;