@fonoster/apiserver 0.8.51 → 0.8.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/voice/VoiceClientImpl.d.ts +2 -2
- package/dist/voice/handlers/createStreamGatherHandler.js +5 -3
- package/dist/voice/handlers/gather/createGatherHandler.js +6 -5
- package/dist/voice/handlers/gather/getTimeoutPromise.d.ts +1 -1
- package/dist/voice/stt/Deepgram.js +11 -2
- package/dist/voice/stt/Google.js +6 -2
- package/dist/voice/stt/types.d.ts +1 -0
- package/dist/voice/tts/ElevenLabs.js +3 -1
- package/dist/voice/types/voice.d.ts +2 -1
- package/package.json +6 -6
|
@@ -32,8 +32,8 @@ declare class VoiceClientImpl implements VoiceClient {
|
|
|
32
32
|
synthesize(text: string, options: SayOptions): Promise<string>;
|
|
33
33
|
transcribe(): Promise<SpeechResult>;
|
|
34
34
|
startSpeechGather(callback: (stream: {
|
|
35
|
-
speech
|
|
36
|
-
|
|
35
|
+
speech: string;
|
|
36
|
+
responseTime: number;
|
|
37
37
|
}) => void): Promise<void>;
|
|
38
38
|
startDtmfGather(sessionRef: string, callback: (event: {
|
|
39
39
|
digit: string;
|
|
@@ -47,18 +47,20 @@ function createStreamGatherHandler(voiceClient) {
|
|
|
47
47
|
voiceClient.sendResponse({
|
|
48
48
|
streamGatherPayload: {
|
|
49
49
|
sessionRef,
|
|
50
|
-
digit
|
|
50
|
+
digit,
|
|
51
|
+
responseTime: 0
|
|
51
52
|
}
|
|
52
53
|
});
|
|
53
54
|
});
|
|
54
55
|
}
|
|
55
56
|
if (effectiveSource.includes(common_1.StreamGatherSource.SPEECH)) {
|
|
56
57
|
voiceClient.startSpeechGather((event) => {
|
|
57
|
-
const { speech } = event;
|
|
58
|
+
const { speech, responseTime } = event;
|
|
58
59
|
voiceClient.sendResponse({
|
|
59
60
|
streamGatherPayload: {
|
|
60
61
|
sessionRef,
|
|
61
|
-
speech
|
|
62
|
+
speech,
|
|
63
|
+
responseTime
|
|
62
64
|
}
|
|
63
65
|
});
|
|
64
66
|
});
|
|
@@ -60,7 +60,7 @@ function createGatherHandler(voiceClient) {
|
|
|
60
60
|
const effectiveSource = source || common_1.GatherSource.SPEECH_AND_DTMF;
|
|
61
61
|
const promises = [timeoutPromise];
|
|
62
62
|
if (effectiveSource.includes(common_1.GatherSource.SPEECH)) {
|
|
63
|
-
promises.push(voiceClient.transcribe().then((result) => result
|
|
63
|
+
promises.push(voiceClient.transcribe().then((result) => result));
|
|
64
64
|
}
|
|
65
65
|
if (effectiveSource.includes(common_1.GatherSource.DTMF)) {
|
|
66
66
|
promises.push(voiceClient
|
|
@@ -71,14 +71,15 @@ function createGatherHandler(voiceClient) {
|
|
|
71
71
|
timeout: effectiveTimeout,
|
|
72
72
|
onDigitReceived: timeoutPromise.cancelGlobalTimer
|
|
73
73
|
})
|
|
74
|
-
.then((
|
|
74
|
+
.then((result) => result));
|
|
75
75
|
}
|
|
76
|
-
const result = yield Promise.race(promises);
|
|
76
|
+
const result = (yield Promise.race(promises));
|
|
77
77
|
voiceClient.sendResponse({
|
|
78
78
|
gatherResponse: {
|
|
79
79
|
sessionRef,
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
responseTime: result.responseTime,
|
|
81
|
+
speech: (0, utils_1.isDtmf)(result.digits) ? undefined : result.speech,
|
|
82
|
+
digits: (0, utils_1.isDtmf)(result.digits) ? result.digits : undefined
|
|
82
83
|
}
|
|
83
84
|
});
|
|
84
85
|
}));
|
|
@@ -2,7 +2,7 @@ interface PromiseWithResetTimer<T> extends Promise<T> {
|
|
|
2
2
|
cancelGlobalTimer?: () => void;
|
|
3
3
|
}
|
|
4
4
|
declare function getTimeoutPromise(timeout: number): {
|
|
5
|
-
timeoutPromise: PromiseWithResetTimer<
|
|
5
|
+
timeoutPromise: PromiseWithResetTimer<unknown>;
|
|
6
6
|
effectiveTimeout: number;
|
|
7
7
|
};
|
|
8
8
|
export { getTimeoutPromise };
|
|
@@ -90,8 +90,15 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
90
90
|
if (!data.channel.alternatives[0].transcript || !data.speech_final) {
|
|
91
91
|
return;
|
|
92
92
|
}
|
|
93
|
+
const words = data.channel.alternatives[0].words;
|
|
94
|
+
const responseTime = words.length > 0
|
|
95
|
+
? (words.reduce((acc, word) => acc + (word.end - word.start), 0) *
|
|
96
|
+
1000) /
|
|
97
|
+
words.length
|
|
98
|
+
: 0;
|
|
93
99
|
out.emit("data", {
|
|
94
|
-
speech: data.channel.alternatives[0].transcript
|
|
100
|
+
speech: data.channel.alternatives[0].transcript,
|
|
101
|
+
responseTime
|
|
95
102
|
});
|
|
96
103
|
});
|
|
97
104
|
connection.on(LiveTranscriptionEvents.Error, (err) => {
|
|
@@ -104,6 +111,7 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
104
111
|
transcribe(stream) {
|
|
105
112
|
return __awaiter(this, void 0, void 0, function* () {
|
|
106
113
|
return new Promise((resolve, reject) => {
|
|
114
|
+
const startTime = performance.now();
|
|
107
115
|
const connection = this.client.listen.live(buildTranscribeConfig(this.engineConfig.config));
|
|
108
116
|
stream.on("data", (chunk) => {
|
|
109
117
|
connection.send(chunk);
|
|
@@ -113,7 +121,8 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
113
121
|
if (data.channel.alternatives[0].transcript && data.speech_final) {
|
|
114
122
|
const result = {
|
|
115
123
|
speech: data.channel.alternatives[0].transcript,
|
|
116
|
-
isFinal: true
|
|
124
|
+
isFinal: true,
|
|
125
|
+
responseTime: performance.now() - startTime
|
|
117
126
|
};
|
|
118
127
|
resolve(result);
|
|
119
128
|
connection.destroy();
|
package/dist/voice/stt/Google.js
CHANGED
|
@@ -47,6 +47,7 @@ const common_1 = require("@fonoster/common");
|
|
|
47
47
|
const speech_1 = require("@google-cloud/speech");
|
|
48
48
|
const z = __importStar(require("zod"));
|
|
49
49
|
const AbstractSpeechToText_1 = require("./AbstractSpeechToText");
|
|
50
|
+
const perf_hooks_1 = require("perf_hooks");
|
|
50
51
|
const ENGINE_NAME = "stt.google";
|
|
51
52
|
exports.ENGINE_NAME = ENGINE_NAME;
|
|
52
53
|
class Google extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
@@ -63,6 +64,7 @@ class Google extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
63
64
|
}
|
|
64
65
|
transcribe(stream) {
|
|
65
66
|
return __awaiter(this, void 0, void 0, function* () {
|
|
67
|
+
const startTime = perf_hooks_1.performance.now();
|
|
66
68
|
const languageCode = this.engineConfig.config.languageCode || common_1.VoiceLanguage.EN_US;
|
|
67
69
|
const audioConfig = {
|
|
68
70
|
interimResults: false,
|
|
@@ -78,15 +80,17 @@ class Google extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
78
80
|
.on("error", (e) => reject(e))
|
|
79
81
|
.on("data", (data) => {
|
|
80
82
|
var _a;
|
|
83
|
+
const responseTime = perf_hooks_1.performance.now() - startTime;
|
|
81
84
|
if ((_a = data.results[0]) === null || _a === void 0 ? void 0 : _a.alternatives[0]) {
|
|
82
85
|
const result = {
|
|
83
86
|
speech: data.results[0].alternatives[0].transcript,
|
|
84
|
-
isFinal: true
|
|
87
|
+
isFinal: true,
|
|
88
|
+
responseTime
|
|
85
89
|
};
|
|
86
90
|
resolve(result);
|
|
87
91
|
}
|
|
88
92
|
else {
|
|
89
|
-
resolve({ speech: "", isFinal: true });
|
|
93
|
+
resolve({ speech: "", isFinal: true, responseTime });
|
|
90
94
|
}
|
|
91
95
|
recognizeStream.destroy();
|
|
92
96
|
});
|
|
@@ -125,7 +125,9 @@ class ElevenLabs extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
|
125
125
|
voice,
|
|
126
126
|
text,
|
|
127
127
|
model_id: model !== null && model !== void 0 ? model : "eleven_flash_v2_5",
|
|
128
|
-
output_format: "pcm_16000"
|
|
128
|
+
output_format: "pcm_16000",
|
|
129
|
+
// TODO: Make this configurable
|
|
130
|
+
optimize_streaming_latency: 2
|
|
129
131
|
});
|
|
130
132
|
return (yield (0, streamToBuffer_1.streamToBuffer)(response));
|
|
131
133
|
});
|
|
@@ -11,7 +11,8 @@ type VoiceClient = {
|
|
|
11
11
|
synthesize: (text: string, options: SayOptions) => Promise<string>;
|
|
12
12
|
transcribe: () => Promise<SpeechResult>;
|
|
13
13
|
startSpeechGather: (callback: (stream: {
|
|
14
|
-
speech
|
|
14
|
+
speech: string;
|
|
15
|
+
responseTime: number;
|
|
15
16
|
}) => void) => void;
|
|
16
17
|
startDtmfGather: (sessionRef: string, callback: (event: {
|
|
17
18
|
digit: string;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/apiserver",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.52",
|
|
4
4
|
"description": "APIServer for Fonoster",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -21,11 +21,11 @@
|
|
|
21
21
|
},
|
|
22
22
|
"dependencies": {
|
|
23
23
|
"@deepgram/sdk": "^3.5.1",
|
|
24
|
-
"@fonoster/authz": "^0.8.
|
|
25
|
-
"@fonoster/common": "^0.8.
|
|
26
|
-
"@fonoster/identity": "^0.8.
|
|
24
|
+
"@fonoster/authz": "^0.8.52",
|
|
25
|
+
"@fonoster/common": "^0.8.52",
|
|
26
|
+
"@fonoster/identity": "^0.8.52",
|
|
27
27
|
"@fonoster/logger": "^0.8.51",
|
|
28
|
-
"@fonoster/sipnet": "^0.8.
|
|
28
|
+
"@fonoster/sipnet": "^0.8.52",
|
|
29
29
|
"@fonoster/streams": "^0.8.51",
|
|
30
30
|
"@fonoster/types": "^0.8.51",
|
|
31
31
|
"@google-cloud/speech": "^6.6.0",
|
|
@@ -73,5 +73,5 @@
|
|
|
73
73
|
"@types/uuid": "^10.0.0",
|
|
74
74
|
"@types/validator": "^13.12.0"
|
|
75
75
|
},
|
|
76
|
-
"gitHead": "
|
|
76
|
+
"gitHead": "772398ea115078012f8e270943ec36b61343ea36"
|
|
77
77
|
}
|