@fonoster/apiserver 0.16.9 → 0.16.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 Fonoster Inc
3
+ Copyright (c) 2026 Fonoster Inc
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
@@ -56,7 +56,7 @@ function createListApplications(prisma) {
56
56
  const items = result.map(applicationWithEncodedStruct_1.applicationWithEncodedStruct);
57
57
  callback(null, {
58
58
  items,
59
- nextPageToken: (_a = result[result.length - 1]) === null || _a === void 0 ? void 0 : _a.ref
59
+ nextPageToken: items.length < pageSize ? undefined : (_a = result[result.length - 1]) === null || _a === void 0 ? void 0 : _a.ref
60
60
  });
61
61
  });
62
62
  return (0, common_1.withErrorHandlingAndValidation)(listApplications, common_1.Validators.listRequestSchema);
@@ -49,7 +49,7 @@ function listSecrets(prisma) {
49
49
  })).map(common_1.datesMapper);
50
50
  callback(null, {
51
51
  items: result,
52
- nextPageToken: (_a = result[result.length - 1]) === null || _a === void 0 ? void 0 : _a.ref
52
+ nextPageToken: result.length < pageSize ? undefined : (_a = result[result.length - 1]) === null || _a === void 0 ? void 0 : _a.ref
53
53
  });
54
54
  });
55
55
  return (0, common_1.withErrorHandlingAndValidation)(fn, common_1.Validators.listRequestSchema);
@@ -83,6 +83,16 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
83
83
  streamTranscribe(stream) {
84
84
  const connection = this.client.listen.live(buildTranscribeConfig(this.engineConfig.config));
85
85
  const out = new stream_1.Stream();
86
+ // Track last transcript for UtteranceEnd fallback
87
+ // According to Deepgram docs: "If you receive an UtteranceEnd event without a
88
+ // preceding speech_final: true, it's advisable to process the last-received
89
+ // transcript as a complete utterance."
90
+ // UtteranceEnd fires after finalized words, so we store the last finalized transcript
91
+ // but also keep any transcript as a fallback
92
+ let lastFinalizedTranscript = null;
93
+ let lastFinalizedTranscriptTime = 0;
94
+ let lastAnyTranscript = null;
95
+ let lastAnyTranscriptTime = 0;
86
96
  // Add error handler immediately to catch any connection errors
87
97
  connection.on(LiveTranscriptionEvents.Error, (err) => {
88
98
  logger.error("error on Deepgram connection", { err });
@@ -106,8 +116,23 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
106
116
  });
107
117
  connection.on(LiveTranscriptionEvents.Transcript, (data) => {
108
118
  var _a, _b, _c;
109
- if (!((_c = (_b = (_a = data.channel) === null || _a === void 0 ? void 0 : _a.alternatives) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.transcript) ||
110
- !data.speech_final) {
119
+ const transcript = (_c = (_b = (_a = data.channel) === null || _a === void 0 ? void 0 : _a.alternatives) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.transcript;
120
+ const hasTranscript = !!transcript;
121
+ const isFinal = data.is_final === true;
122
+ const speechFinal = data.speech_final === true;
123
+ // Store any transcript for UtteranceEnd fallback
124
+ if (hasTranscript) {
125
+ lastAnyTranscript = transcript;
126
+ lastAnyTranscriptTime = Date.now();
127
+ // Store finalized transcripts separately (preferred for UtteranceEnd)
128
+ if (isFinal || speechFinal) {
129
+ lastFinalizedTranscript = transcript;
130
+ lastFinalizedTranscriptTime = Date.now();
131
+ }
132
+ }
133
+ // Process transcript if it has content and is final
134
+ // Check both speech_final (primary) and is_final (backup)
135
+ if (!hasTranscript || (!speechFinal && !isFinal)) {
111
136
  return;
112
137
  }
113
138
  const words = data.channel.alternatives[0].words || [];
@@ -117,13 +142,55 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
117
142
  words.length
118
143
  : 0;
119
144
  logger.verbose("transcribe result", {
120
- speech: data.channel.alternatives[0].transcript,
121
- responseTime
145
+ speech: transcript,
146
+ responseTime,
147
+ isFinal,
148
+ speechFinal
122
149
  });
123
150
  out.emit("data", {
124
- speech: data.channel.alternatives[0].transcript,
151
+ speech: transcript,
125
152
  responseTime
126
153
  });
154
+ // Clear transcripts after processing (they've been emitted)
155
+ lastFinalizedTranscript = null;
156
+ lastAnyTranscript = null;
157
+ });
158
+ // CRITICAL: Handle UtteranceEnd events (fallback when speech_final never becomes true)
159
+ // This is Deepgram's recommended fallback mechanism for noisy environments
160
+ // UtteranceEnd requires: interim_results=true and utterance_end_ms parameter
161
+ // UtteranceEnd fires after finalized words, so prefer lastFinalizedTranscript
162
+ connection.on(LiveTranscriptionEvents.UtteranceEnd, (data) => {
163
+ // Prefer finalized transcript, fall back to any transcript
164
+ const transcriptToUse = lastFinalizedTranscript || lastAnyTranscript;
165
+ const transcriptTime = lastFinalizedTranscript
166
+ ? lastFinalizedTranscriptTime
167
+ : lastAnyTranscriptTime;
168
+ if (transcriptToUse) {
169
+ // Use last_word_end from UtteranceEnd event if available for more accurate timing
170
+ // Otherwise fall back to time since last transcript
171
+ const lastWordEnd = data === null || data === void 0 ? void 0 : data.last_word_end;
172
+ const responseTime = lastWordEnd
173
+ ? lastWordEnd * 1000 // Convert seconds to milliseconds
174
+ : transcriptTime
175
+ ? Date.now() - transcriptTime
176
+ : 0;
177
+ logger.info("Deepgram UtteranceEnd - processing last transcript", {
178
+ speech: transcriptToUse,
179
+ responseTime,
180
+ lastWordEnd: lastWordEnd,
181
+ wasFinalized: !!lastFinalizedTranscript
182
+ });
183
+ out.emit("data", {
184
+ speech: transcriptToUse,
185
+ responseTime
186
+ });
187
+ // Clear transcripts after processing
188
+ lastFinalizedTranscript = null;
189
+ lastAnyTranscript = null;
190
+ }
191
+ else {
192
+ logger.warn("Deepgram UtteranceEnd received but no last transcript available");
193
+ }
127
194
  });
128
195
  });
129
196
  // Handle stream errors and cleanup
@@ -212,7 +279,9 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
212
279
  .optional(),
213
280
  model: z
214
281
  .nativeEnum(types_1.DeepgramModel, { message: "Invalid Deepgram model" })
215
- .optional()
282
+ .optional(),
283
+ interimResults: z.boolean().optional(),
284
+ utteranceEndMs: z.number().int().min(1000).max(5000).optional()
216
285
  });
217
286
  }
218
287
  static getCredentialsValidationSchema() {
@@ -223,7 +292,17 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
223
292
  }
224
293
  exports.Deepgram = Deepgram;
225
294
  function buildTranscribeConfig(config) {
226
- return Object.assign(Object.assign({}, config), { model: config.model || types_1.DeepgramModel.NOVA_2_PHONECALL, language: config.languageCode || common_1.VoiceLanguage.EN_US, encoding: "linear16", sample_rate: 16000, channels: 1, smart_format: config.smartFormat || true,
295
+ // UtteranceEnd requires interim_results to be enabled
296
+ // Default to true to enable UtteranceEnd fallback mechanism
297
+ const interimResults = config.interimResults !== false;
298
+ // Default utterance_end_ms to 1000ms (minimum required value)
299
+ // This enables UtteranceEnd events as a fallback when speech_final never becomes true
300
+ const utteranceEndMs = config.utteranceEndMs || 1000;
301
+ return Object.assign(Object.assign({}, config), { model: config.model || types_1.DeepgramModel.NOVA_2_PHONECALL, language: config.languageCode || common_1.VoiceLanguage.EN_US, encoding: "linear16", sample_rate: 16000, channels: 1, smart_format: config.smartFormat !== false,
227
302
  // This needs to be set to true to avoid delays while using smart_format
228
- no_delay: config.noDelay || true });
303
+ no_delay: config.noDelay !== false,
304
+ // REQUIRED for UtteranceEnd: interim_results must be true
305
+ interim_results: interimResults,
306
+ // REQUIRED for UtteranceEnd: utterance_end_ms parameter
307
+ utterance_end_ms: utteranceEndMs });
229
308
  }
@@ -51,6 +51,8 @@ type DeepgramSttConfig = {
51
51
  model: DeepgramModel;
52
52
  smartFormat: boolean;
53
53
  noDelay: boolean;
54
+ interimResults?: boolean;
55
+ utteranceEndMs?: number;
54
56
  };
55
57
  credentials: {
56
58
  apiKey: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fonoster/apiserver",
3
- "version": "0.16.9",
3
+ "version": "0.16.11",
4
4
  "description": "APIServer for Fonoster",
5
5
  "author": "Pedro Sanders <psanders@fonoster.com>",
6
6
  "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -22,12 +22,12 @@
22
22
  },
23
23
  "dependencies": {
24
24
  "@deepgram/sdk": "^3.5.1",
25
- "@fonoster/authz": "^0.16.8",
26
- "@fonoster/autopilot": "^0.16.9",
25
+ "@fonoster/authz": "^0.16.10",
26
+ "@fonoster/autopilot": "^0.16.11",
27
27
  "@fonoster/common": "^0.16.8",
28
- "@fonoster/identity": "^0.16.8",
28
+ "@fonoster/identity": "^0.16.10",
29
29
  "@fonoster/logger": "^0.16.7",
30
- "@fonoster/sipnet": "^0.16.8",
30
+ "@fonoster/sipnet": "^0.16.10",
31
31
  "@fonoster/streams": "^0.16.7",
32
32
  "@fonoster/types": "^0.16.7",
33
33
  "@google-cloud/speech": "^6.6.0",
@@ -76,5 +76,5 @@
76
76
  "@types/uuid": "^10.0.0",
77
77
  "@types/validator": "^13.15.10"
78
78
  },
79
- "gitHead": "115c6d190293c1a5f366cf1997be7e84433f9e05"
79
+ "gitHead": "680281a11296cb509f64823461a30de237960a05"
80
80
  }