@absolutejs/voice-deepgram 0.0.19 → 0.0.20-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +106 -34
  2. package/package.json +2 -2
package/dist/index.js CHANGED
@@ -88,7 +88,28 @@ var collectPhraseHintTerms = (options) => (options.phraseHints ?? []).flatMap((h
88
88
  hint.text,
89
89
  ...hint.aliases ?? []
90
90
  ]);
91
+ var collectLexiconTerms = (options) => (options.lexicon ?? []).flatMap((entry) => [
92
+ entry.text,
93
+ ...entry.aliases ?? []
94
+ ]);
91
95
  var normalizeKeyterms = (value) => value === undefined ? [] : Array.isArray(value) ? value : [value];
96
+ var MAX_KEYTERM_COUNT = 16;
97
+ var MAX_KEYTERM_LENGTH = 48;
98
+ var countScripts = (value) => {
99
+ const scripts = new Set;
100
+ if (/\p{Script=Latin}/u.test(value)) {
101
+ scripts.add("latin");
102
+ }
103
+ if (/\p{Script=Devanagari}/u.test(value)) {
104
+ scripts.add("devanagari");
105
+ }
106
+ return scripts.size;
107
+ };
108
+ var scoreKeytermCandidate = (value) => {
109
+ const normalized = value.trim();
110
+ return (countScripts(normalized) >= 2 ? 40 : 0) + (normalized.includes(" ") ? 20 : 0) + (/[^\x00-\x7F]/u.test(normalized) ? 10 : 0) + (normalized.includes("'") ? 5 : 0) + Math.min(normalized.length, 20);
111
+ };
112
+ var selectKeyterms = (terms) => terms.map((term) => term.trim()).filter((term) => term.length >= 2 && term.length <= MAX_KEYTERM_LENGTH).filter((term, index, list) => list.indexOf(term) === index).sort((left, right) => scoreKeytermCandidate(right) - scoreKeytermCandidate(left)).slice(0, MAX_KEYTERM_COUNT);
92
113
  var formatErrorMessage = (details) => {
93
114
  const parts = [
94
115
  details.code ? `code=${details.code}` : undefined,
@@ -123,8 +144,17 @@ var normalizeWords = (words) => {
123
144
  }
124
145
  const first = words[0];
125
146
  const last = words.at(-1);
147
+ const speakerCounts = new Map;
148
+ for (const word of words) {
149
+ if (typeof word.speaker !== "number") {
150
+ continue;
151
+ }
152
+ speakerCounts.set(word.speaker, (speakerCounts.get(word.speaker) ?? 0) + 1);
153
+ }
154
+ const speaker = [...speakerCounts.entries()].sort((left, right) => right[1] - left[1])[0]?.[0];
126
155
  return {
127
156
  endedAtMs: typeof last?.end === "number" ? Math.round(last.end * 1000) : undefined,
157
+ speaker,
128
158
  startedAtMs: typeof first?.start === "number" ? Math.round(first.start * 1000) : undefined
129
159
  };
130
160
  };
@@ -160,79 +190,113 @@ var buildFluxTranscript = (payload) => {
160
190
  };
161
191
  };
162
192
  var omitUndefined = (value) => Object.fromEntries(Object.entries(value).filter(([, entry]) => entry !== undefined));
163
- var buildLiveOptions = (config, format) => {
164
- const isFlux = String(config.model).startsWith("flux");
165
- const language = typeof config.language === "string" && config.language.trim().length > 0 ? config.language.trim() : undefined;
166
- const options = {
193
+ var normalizeLanguageCode = (value) => {
194
+ const normalized = value?.trim();
195
+ return normalized && normalized.length > 0 ? normalized : undefined;
196
+ };
197
+ var resolveStrategyLanguage = (options) => {
198
+ if (options.languageStrategy?.mode !== "fixed") {
199
+ return;
200
+ }
201
+ return normalizeLanguageCode(options.languageStrategy.primaryLanguage);
202
+ };
203
+ var resolveDeepgramModel = (config, options) => {
204
+ if (config.model !== "flux") {
205
+ return config.model;
206
+ }
207
+ if (options.languageStrategy?.mode === "allow-switching" || options.languageStrategy?.mode === "auto-detect") {
208
+ return "flux-general-multi";
209
+ }
210
+ const language = normalizeLanguageCode(options.languageStrategy?.mode === "fixed" ? options.languageStrategy.primaryLanguage : config.language);
211
+ if (!language || language.startsWith("en")) {
212
+ return "flux-general-en";
213
+ }
214
+ return "flux-general-multi";
215
+ };
216
+ var buildLiveOptions = (config, format, options) => {
217
+ const model = resolveDeepgramModel(config, options);
218
+ const isFlux = String(model).startsWith("flux");
219
+ const language = normalizeLanguageCode(config.language) ?? resolveStrategyLanguage(options);
220
+ const liveOptions = {
167
221
  encoding: "linear16",
168
- model: config.model,
222
+ model,
169
223
  sample_rate: format.sampleRateHz
170
224
  };
171
225
  if (!isFlux) {
172
- options.channels = format.channels;
226
+ liveOptions.channels = format.channels;
173
227
  if (config.punctuate !== undefined) {
174
- options.punctuate = config.punctuate;
228
+ liveOptions.punctuate = config.punctuate;
175
229
  }
176
230
  if (config.smartFormat !== undefined) {
177
- options.smart_format = config.smartFormat;
231
+ liveOptions.smart_format = config.smartFormat;
178
232
  }
179
233
  if (config.interimResults !== undefined) {
180
- options.interim_results = config.interimResults;
234
+ liveOptions.interim_results = config.interimResults;
181
235
  }
182
236
  if (config.endpointing !== undefined) {
183
- options.endpointing = config.endpointing;
237
+ liveOptions.endpointing = config.endpointing;
184
238
  }
185
239
  if (language) {
186
- options.language = language;
240
+ liveOptions.language = language;
187
241
  }
188
242
  if (config.utteranceEndMs !== undefined) {
189
- options.utterance_end_ms = config.utteranceEndMs;
243
+ liveOptions.utterance_end_ms = config.utteranceEndMs;
190
244
  }
191
245
  if (config.vadEvents !== undefined) {
192
- options.vad_events = config.vadEvents;
246
+ liveOptions.vad_events = config.vadEvents;
193
247
  }
194
248
  } else {
195
- options.eager_eot_threshold = config.eagerEotThreshold ?? 0.8;
196
- options.eot_threshold = config.eotThreshold ?? 0.82;
197
- options.eot_timeout_ms = config.eotTimeoutMs ?? 1200;
249
+ liveOptions.eager_eot_threshold = config.eagerEotThreshold ?? 0.8;
250
+ liveOptions.eot_threshold = config.eotThreshold ?? 0.82;
251
+ liveOptions.eot_timeout_ms = config.eotTimeoutMs ?? 1200;
198
252
  }
199
253
  if (!isFlux && config.diarize !== undefined) {
200
- options.diarize = config.diarize;
254
+ liveOptions.diarize = config.diarize;
201
255
  }
202
256
  if (!isFlux && config.numerals !== undefined) {
203
- options.numerals = config.numerals;
257
+ liveOptions.numerals = config.numerals;
204
258
  }
205
259
  if (!isFlux && config.profanityFilter !== undefined) {
206
- options.profanity_filter = config.profanityFilter;
260
+ liveOptions.profanity_filter = config.profanityFilter;
207
261
  }
208
262
  if (!isFlux && config.redact !== undefined) {
209
- options.redact = config.redact;
263
+ liveOptions.redact = config.redact;
210
264
  }
211
265
  if (config.tag !== undefined) {
212
- options.tag = config.tag;
266
+ liveOptions.tag = config.tag;
213
267
  }
214
268
  if (config.extra) {
215
- options.extra = config.extra;
269
+ liveOptions.extra = config.extra;
216
270
  }
217
271
  const keyterm = config.keyterms ?? config.keyterm;
218
272
  if (keyterm !== undefined) {
219
- options.keyterm = keyterm;
273
+ liveOptions.keyterm = keyterm;
220
274
  }
221
- return omitUndefined(options);
275
+ return omitUndefined(liveOptions);
222
276
  };
223
277
  var buildUrl = (config, input) => {
224
- const url = new URL(String(config.model).startsWith("flux") ? LISTEN_V2_URL : LISTEN_V1_URL);
278
+ const context = {
279
+ format: {
280
+ channels: input.context.format.channels,
281
+ container: "raw",
282
+ encoding: "pcm_s16le",
283
+ sampleRateHz: input.context.format.sampleRateHz
284
+ },
285
+ languageStrategy: input.context.languageStrategy,
286
+ lexicon: input.context.lexicon,
287
+ sessionId: input.context.sessionId
288
+ };
289
+ const url = new URL(String(resolveDeepgramModel(config, context)).startsWith("flux") ? LISTEN_V2_URL : LISTEN_V1_URL);
225
290
  const keytermTerms = [
226
291
  ...normalizeKeyterms(config.keyterms ?? config.keyterm),
292
+ ...collectLexiconTerms(context),
227
293
  ...input.phraseHintTerms ?? []
228
- ].filter((value, index, list) => list.indexOf(value) === index);
294
+ ];
295
+ const selectedKeyterms = selectKeyterms(keytermTerms);
229
296
  const options = buildLiveOptions({
230
297
  ...config,
231
- keyterms: keytermTerms.length > 0 ? keytermTerms : config.keyterms
232
- }, {
233
- channels: input.channels,
234
- sampleRateHz: input.sampleRateHz
235
- });
298
+ keyterms: selectedKeyterms.length > 0 ? selectedKeyterms : config.keyterms
299
+ }, input.context.format, context);
236
300
  for (const [key, value] of Object.entries(options)) {
237
301
  if (isNil(value)) {
238
302
  continue;
@@ -275,12 +339,20 @@ var resolveOpenFailure = (error, url, timeoutMs) => {
275
339
  var deepgram = (config) => ({
276
340
  kind: "stt",
277
341
  open: async (options) => {
342
+ const runtimeOptions = options;
278
343
  const emitsNativeEndOfTurn = String(config.model).startsWith("flux");
279
344
  const listeners = createListenerMap();
280
345
  const url = buildUrl(config, {
281
- channels: options.format.channels,
282
- phraseHintTerms: collectPhraseHintTerms(options),
283
- sampleRateHz: options.format.sampleRateHz
346
+ context: {
347
+ format: {
348
+ channels: runtimeOptions.format.channels,
349
+ sampleRateHz: runtimeOptions.format.sampleRateHz
350
+ },
351
+ languageStrategy: runtimeOptions.languageStrategy,
352
+ lexicon: runtimeOptions.lexicon,
353
+ sessionId: runtimeOptions.sessionId
354
+ },
355
+ phraseHintTerms: collectPhraseHintTerms(runtimeOptions)
284
356
  });
285
357
  const connection = await createTransport(url, config.apiKey, config.authMode);
286
358
  const connectTimeoutMs = config.connectTimeoutMs ?? 8000;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice-deepgram",
3
- "version": "0.0.19",
3
+ "version": "0.0.20-beta.1",
4
4
  "description": "Deepgram speech-to-text adapter for @absolutejs/voice",
5
5
  "repository": {
6
6
  "type": "git",
@@ -28,7 +28,7 @@
28
28
  "typecheck": "bun run tsc --noEmit"
29
29
  },
30
30
  "dependencies": {
31
- "@absolutejs/voice": "0.0.21"
31
+ "@absolutejs/voice": "0.0.22-beta.65"
32
32
  },
33
33
  "devDependencies": {
34
34
  "@absolutejs/absolute": "0.19.0-beta.648",