@fonoster/apiserver 0.16.9 → 0.16.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c)
|
|
3
|
+
Copyright (c) 2026 Fonoster Inc
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
|
@@ -56,7 +56,7 @@ function createListApplications(prisma) {
|
|
|
56
56
|
const items = result.map(applicationWithEncodedStruct_1.applicationWithEncodedStruct);
|
|
57
57
|
callback(null, {
|
|
58
58
|
items,
|
|
59
|
-
nextPageToken: (_a = result[result.length - 1]) === null || _a === void 0 ? void 0 : _a.ref
|
|
59
|
+
nextPageToken: items.length < pageSize ? undefined : (_a = result[result.length - 1]) === null || _a === void 0 ? void 0 : _a.ref
|
|
60
60
|
});
|
|
61
61
|
});
|
|
62
62
|
return (0, common_1.withErrorHandlingAndValidation)(listApplications, common_1.Validators.listRequestSchema);
|
|
@@ -49,7 +49,7 @@ function listSecrets(prisma) {
|
|
|
49
49
|
})).map(common_1.datesMapper);
|
|
50
50
|
callback(null, {
|
|
51
51
|
items: result,
|
|
52
|
-
nextPageToken: (_a = result[result.length - 1]) === null || _a === void 0 ? void 0 : _a.ref
|
|
52
|
+
nextPageToken: result.length < pageSize ? undefined : (_a = result[result.length - 1]) === null || _a === void 0 ? void 0 : _a.ref
|
|
53
53
|
});
|
|
54
54
|
});
|
|
55
55
|
return (0, common_1.withErrorHandlingAndValidation)(fn, common_1.Validators.listRequestSchema);
|
|
@@ -83,6 +83,16 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
83
83
|
streamTranscribe(stream) {
|
|
84
84
|
const connection = this.client.listen.live(buildTranscribeConfig(this.engineConfig.config));
|
|
85
85
|
const out = new stream_1.Stream();
|
|
86
|
+
// Track last transcript for UtteranceEnd fallback
|
|
87
|
+
// According to Deepgram docs: "If you receive an UtteranceEnd event without a
|
|
88
|
+
// preceding speech_final: true, it's advisable to process the last-received
|
|
89
|
+
// transcript as a complete utterance."
|
|
90
|
+
// UtteranceEnd fires after finalized words, so we store the last finalized transcript
|
|
91
|
+
// but also keep any transcript as a fallback
|
|
92
|
+
let lastFinalizedTranscript = null;
|
|
93
|
+
let lastFinalizedTranscriptTime = 0;
|
|
94
|
+
let lastAnyTranscript = null;
|
|
95
|
+
let lastAnyTranscriptTime = 0;
|
|
86
96
|
// Add error handler immediately to catch any connection errors
|
|
87
97
|
connection.on(LiveTranscriptionEvents.Error, (err) => {
|
|
88
98
|
logger.error("error on Deepgram connection", { err });
|
|
@@ -106,8 +116,23 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
106
116
|
});
|
|
107
117
|
connection.on(LiveTranscriptionEvents.Transcript, (data) => {
|
|
108
118
|
var _a, _b, _c;
|
|
109
|
-
|
|
110
|
-
|
|
119
|
+
const transcript = (_c = (_b = (_a = data.channel) === null || _a === void 0 ? void 0 : _a.alternatives) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.transcript;
|
|
120
|
+
const hasTranscript = !!transcript;
|
|
121
|
+
const isFinal = data.is_final === true;
|
|
122
|
+
const speechFinal = data.speech_final === true;
|
|
123
|
+
// Store any transcript for UtteranceEnd fallback
|
|
124
|
+
if (hasTranscript) {
|
|
125
|
+
lastAnyTranscript = transcript;
|
|
126
|
+
lastAnyTranscriptTime = Date.now();
|
|
127
|
+
// Store finalized transcripts separately (preferred for UtteranceEnd)
|
|
128
|
+
if (isFinal || speechFinal) {
|
|
129
|
+
lastFinalizedTranscript = transcript;
|
|
130
|
+
lastFinalizedTranscriptTime = Date.now();
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
// Process transcript if it has content and is final
|
|
134
|
+
// Check both speech_final (primary) and is_final (backup)
|
|
135
|
+
if (!hasTranscript || (!speechFinal && !isFinal)) {
|
|
111
136
|
return;
|
|
112
137
|
}
|
|
113
138
|
const words = data.channel.alternatives[0].words || [];
|
|
@@ -117,13 +142,55 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
117
142
|
words.length
|
|
118
143
|
: 0;
|
|
119
144
|
logger.verbose("transcribe result", {
|
|
120
|
-
speech:
|
|
121
|
-
responseTime
|
|
145
|
+
speech: transcript,
|
|
146
|
+
responseTime,
|
|
147
|
+
isFinal,
|
|
148
|
+
speechFinal
|
|
122
149
|
});
|
|
123
150
|
out.emit("data", {
|
|
124
|
-
speech:
|
|
151
|
+
speech: transcript,
|
|
125
152
|
responseTime
|
|
126
153
|
});
|
|
154
|
+
// Clear transcripts after processing (they've been emitted)
|
|
155
|
+
lastFinalizedTranscript = null;
|
|
156
|
+
lastAnyTranscript = null;
|
|
157
|
+
});
|
|
158
|
+
// CRITICAL: Handle UtteranceEnd events (fallback when speech_final never becomes true)
|
|
159
|
+
// This is Deepgram's recommended fallback mechanism for noisy environments
|
|
160
|
+
// UtteranceEnd requires: interim_results=true and utterance_end_ms parameter
|
|
161
|
+
// UtteranceEnd fires after finalized words, so prefer lastFinalizedTranscript
|
|
162
|
+
connection.on(LiveTranscriptionEvents.UtteranceEnd, (data) => {
|
|
163
|
+
// Prefer finalized transcript, fall back to any transcript
|
|
164
|
+
const transcriptToUse = lastFinalizedTranscript || lastAnyTranscript;
|
|
165
|
+
const transcriptTime = lastFinalizedTranscript
|
|
166
|
+
? lastFinalizedTranscriptTime
|
|
167
|
+
: lastAnyTranscriptTime;
|
|
168
|
+
if (transcriptToUse) {
|
|
169
|
+
// Use last_word_end from UtteranceEnd event if available for more accurate timing
|
|
170
|
+
// Otherwise fall back to time since last transcript
|
|
171
|
+
const lastWordEnd = data === null || data === void 0 ? void 0 : data.last_word_end;
|
|
172
|
+
const responseTime = lastWordEnd
|
|
173
|
+
? lastWordEnd * 1000 // Convert seconds to milliseconds
|
|
174
|
+
: transcriptTime
|
|
175
|
+
? Date.now() - transcriptTime
|
|
176
|
+
: 0;
|
|
177
|
+
logger.info("Deepgram UtteranceEnd - processing last transcript", {
|
|
178
|
+
speech: transcriptToUse,
|
|
179
|
+
responseTime,
|
|
180
|
+
lastWordEnd: lastWordEnd,
|
|
181
|
+
wasFinalized: !!lastFinalizedTranscript
|
|
182
|
+
});
|
|
183
|
+
out.emit("data", {
|
|
184
|
+
speech: transcriptToUse,
|
|
185
|
+
responseTime
|
|
186
|
+
});
|
|
187
|
+
// Clear transcripts after processing
|
|
188
|
+
lastFinalizedTranscript = null;
|
|
189
|
+
lastAnyTranscript = null;
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
logger.warn("Deepgram UtteranceEnd received but no last transcript available");
|
|
193
|
+
}
|
|
127
194
|
});
|
|
128
195
|
});
|
|
129
196
|
// Handle stream errors and cleanup
|
|
@@ -212,7 +279,9 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
212
279
|
.optional(),
|
|
213
280
|
model: z
|
|
214
281
|
.nativeEnum(types_1.DeepgramModel, { message: "Invalid Deepgram model" })
|
|
215
|
-
.optional()
|
|
282
|
+
.optional(),
|
|
283
|
+
interimResults: z.boolean().optional(),
|
|
284
|
+
utteranceEndMs: z.number().int().min(1000).max(5000).optional()
|
|
216
285
|
});
|
|
217
286
|
}
|
|
218
287
|
static getCredentialsValidationSchema() {
|
|
@@ -223,7 +292,17 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
223
292
|
}
|
|
224
293
|
exports.Deepgram = Deepgram;
|
|
225
294
|
function buildTranscribeConfig(config) {
|
|
226
|
-
|
|
295
|
+
// UtteranceEnd requires interim_results to be enabled
|
|
296
|
+
// Default to true to enable UtteranceEnd fallback mechanism
|
|
297
|
+
const interimResults = config.interimResults !== false;
|
|
298
|
+
// Default utterance_end_ms to 1000ms (minimum required value)
|
|
299
|
+
// This enables UtteranceEnd events as a fallback when speech_final never becomes true
|
|
300
|
+
const utteranceEndMs = config.utteranceEndMs || 1000;
|
|
301
|
+
return Object.assign(Object.assign({}, config), { model: config.model || types_1.DeepgramModel.NOVA_2_PHONECALL, language: config.languageCode || common_1.VoiceLanguage.EN_US, encoding: "linear16", sample_rate: 16000, channels: 1, smart_format: config.smartFormat !== false,
|
|
227
302
|
// This needs to be set to true to avoid delays while using smart_format
|
|
228
|
-
no_delay: config.noDelay
|
|
303
|
+
no_delay: config.noDelay !== false,
|
|
304
|
+
// REQUIRED for UtteranceEnd: interim_results must be true
|
|
305
|
+
interim_results: interimResults,
|
|
306
|
+
// REQUIRED for UtteranceEnd: utterance_end_ms parameter
|
|
307
|
+
utterance_end_ms: utteranceEndMs });
|
|
229
308
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/apiserver",
|
|
3
|
-
"version": "0.16.
|
|
3
|
+
"version": "0.16.11",
|
|
4
4
|
"description": "APIServer for Fonoster",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -22,12 +22,12 @@
|
|
|
22
22
|
},
|
|
23
23
|
"dependencies": {
|
|
24
24
|
"@deepgram/sdk": "^3.5.1",
|
|
25
|
-
"@fonoster/authz": "^0.16.
|
|
26
|
-
"@fonoster/autopilot": "^0.16.
|
|
25
|
+
"@fonoster/authz": "^0.16.10",
|
|
26
|
+
"@fonoster/autopilot": "^0.16.11",
|
|
27
27
|
"@fonoster/common": "^0.16.8",
|
|
28
|
-
"@fonoster/identity": "^0.16.
|
|
28
|
+
"@fonoster/identity": "^0.16.10",
|
|
29
29
|
"@fonoster/logger": "^0.16.7",
|
|
30
|
-
"@fonoster/sipnet": "^0.16.
|
|
30
|
+
"@fonoster/sipnet": "^0.16.10",
|
|
31
31
|
"@fonoster/streams": "^0.16.7",
|
|
32
32
|
"@fonoster/types": "^0.16.7",
|
|
33
33
|
"@google-cloud/speech": "^6.6.0",
|
|
@@ -76,5 +76,5 @@
|
|
|
76
76
|
"@types/uuid": "^10.0.0",
|
|
77
77
|
"@types/validator": "^13.15.10"
|
|
78
78
|
},
|
|
79
|
-
"gitHead": "
|
|
79
|
+
"gitHead": "680281a11296cb509f64823461a30de237960a05"
|
|
80
80
|
}
|