@fonoster/apiserver 0.8.51 → 0.8.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,8 +32,8 @@ declare class VoiceClientImpl implements VoiceClient {
32
32
  synthesize(text: string, options: SayOptions): Promise<string>;
33
33
  transcribe(): Promise<SpeechResult>;
34
34
  startSpeechGather(callback: (stream: {
35
- speech?: string;
36
- digit?: string;
35
+ speech: string;
36
+ responseTime: number;
37
37
  }) => void): Promise<void>;
38
38
  startDtmfGather(sessionRef: string, callback: (event: {
39
39
  digit: string;
@@ -47,18 +47,20 @@ function createStreamGatherHandler(voiceClient) {
47
47
  voiceClient.sendResponse({
48
48
  streamGatherPayload: {
49
49
  sessionRef,
50
- digit
50
+ digit,
51
+ responseTime: 0
51
52
  }
52
53
  });
53
54
  });
54
55
  }
55
56
  if (effectiveSource.includes(common_1.StreamGatherSource.SPEECH)) {
56
57
  voiceClient.startSpeechGather((event) => {
57
- const { speech } = event;
58
+ const { speech, responseTime } = event;
58
59
  voiceClient.sendResponse({
59
60
  streamGatherPayload: {
60
61
  sessionRef,
61
- speech
62
+ speech,
63
+ responseTime
62
64
  }
63
65
  });
64
66
  });
@@ -60,7 +60,7 @@ function createGatherHandler(voiceClient) {
60
60
  const effectiveSource = source || common_1.GatherSource.SPEECH_AND_DTMF;
61
61
  const promises = [timeoutPromise];
62
62
  if (effectiveSource.includes(common_1.GatherSource.SPEECH)) {
63
- promises.push(voiceClient.transcribe().then((result) => result.speech));
63
+ promises.push(voiceClient.transcribe().then((result) => result));
64
64
  }
65
65
  if (effectiveSource.includes(common_1.GatherSource.DTMF)) {
66
66
  promises.push(voiceClient
@@ -71,14 +71,15 @@ function createGatherHandler(voiceClient) {
71
71
  timeout: effectiveTimeout,
72
72
  onDigitReceived: timeoutPromise.cancelGlobalTimer
73
73
  })
74
- .then(({ digits }) => digits));
74
+ .then((result) => result));
75
75
  }
76
- const result = yield Promise.race(promises);
76
+ const result = (yield Promise.race(promises));
77
77
  voiceClient.sendResponse({
78
78
  gatherResponse: {
79
79
  sessionRef,
80
- speech: (0, utils_1.isDtmf)(result) ? null : result,
81
- digits: (0, utils_1.isDtmf)(result) ? result : null
80
+ responseTime: result.responseTime,
81
+ speech: (0, utils_1.isDtmf)(result.digits) ? undefined : result.speech,
82
+ digits: (0, utils_1.isDtmf)(result.digits) ? result.digits : undefined
82
83
  }
83
84
  });
84
85
  }));
@@ -2,7 +2,7 @@ interface PromiseWithResetTimer<T> extends Promise<T> {
2
2
  cancelGlobalTimer?: () => void;
3
3
  }
4
4
  declare function getTimeoutPromise(timeout: number): {
5
- timeoutPromise: PromiseWithResetTimer<string>;
5
+ timeoutPromise: PromiseWithResetTimer<unknown>;
6
6
  effectiveTimeout: number;
7
7
  };
8
8
  export { getTimeoutPromise };
@@ -90,8 +90,15 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
90
90
  if (!data.channel.alternatives[0].transcript || !data.speech_final) {
91
91
  return;
92
92
  }
93
+ const words = data.channel.alternatives[0].words;
94
+ const responseTime = words.length > 0
95
+ ? (words.reduce((acc, word) => acc + (word.end - word.start), 0) *
96
+ 1000) /
97
+ words.length
98
+ : 0;
93
99
  out.emit("data", {
94
- speech: data.channel.alternatives[0].transcript
100
+ speech: data.channel.alternatives[0].transcript,
101
+ responseTime
95
102
  });
96
103
  });
97
104
  connection.on(LiveTranscriptionEvents.Error, (err) => {
@@ -104,6 +111,7 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
104
111
  transcribe(stream) {
105
112
  return __awaiter(this, void 0, void 0, function* () {
106
113
  return new Promise((resolve, reject) => {
114
+ const startTime = performance.now();
107
115
  const connection = this.client.listen.live(buildTranscribeConfig(this.engineConfig.config));
108
116
  stream.on("data", (chunk) => {
109
117
  connection.send(chunk);
@@ -113,7 +121,8 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
113
121
  if (data.channel.alternatives[0].transcript && data.speech_final) {
114
122
  const result = {
115
123
  speech: data.channel.alternatives[0].transcript,
116
- isFinal: true
124
+ isFinal: true,
125
+ responseTime: performance.now() - startTime
117
126
  };
118
127
  resolve(result);
119
128
  connection.destroy();
@@ -47,6 +47,7 @@ const common_1 = require("@fonoster/common");
47
47
  const speech_1 = require("@google-cloud/speech");
48
48
  const z = __importStar(require("zod"));
49
49
  const AbstractSpeechToText_1 = require("./AbstractSpeechToText");
50
+ const perf_hooks_1 = require("perf_hooks");
50
51
  const ENGINE_NAME = "stt.google";
51
52
  exports.ENGINE_NAME = ENGINE_NAME;
52
53
  class Google extends AbstractSpeechToText_1.AbstractSpeechToText {
@@ -63,6 +64,7 @@ class Google extends AbstractSpeechToText_1.AbstractSpeechToText {
63
64
  }
64
65
  transcribe(stream) {
65
66
  return __awaiter(this, void 0, void 0, function* () {
67
+ const startTime = perf_hooks_1.performance.now();
66
68
  const languageCode = this.engineConfig.config.languageCode || common_1.VoiceLanguage.EN_US;
67
69
  const audioConfig = {
68
70
  interimResults: false,
@@ -78,15 +80,17 @@ class Google extends AbstractSpeechToText_1.AbstractSpeechToText {
78
80
  .on("error", (e) => reject(e))
79
81
  .on("data", (data) => {
80
82
  var _a;
83
+ const responseTime = perf_hooks_1.performance.now() - startTime;
81
84
  if ((_a = data.results[0]) === null || _a === void 0 ? void 0 : _a.alternatives[0]) {
82
85
  const result = {
83
86
  speech: data.results[0].alternatives[0].transcript,
84
- isFinal: true
87
+ isFinal: true,
88
+ responseTime
85
89
  };
86
90
  resolve(result);
87
91
  }
88
92
  else {
89
- resolve({ speech: "", isFinal: true });
93
+ resolve({ speech: "", isFinal: true, responseTime });
90
94
  }
91
95
  recognizeStream.destroy();
92
96
  });
@@ -2,6 +2,7 @@ import { VoiceLanguage } from "@fonoster/common";
2
2
  type SpeechResult = {
3
3
  speech: string;
4
4
  isFinal: boolean;
5
+ responseTime: number;
5
6
  };
6
7
  type StreamSpeech = {
7
8
  on(events: string, callback: (result: SpeechResult) => void): void;
@@ -125,7 +125,9 @@ class ElevenLabs extends AbstractTextToSpeech_1.AbstractTextToSpeech {
125
125
  voice,
126
126
  text,
127
127
  model_id: model !== null && model !== void 0 ? model : "eleven_flash_v2_5",
128
- output_format: "pcm_16000"
128
+ output_format: "pcm_16000",
129
+ // TODO: Make this configurable
130
+ optimize_streaming_latency: 2
129
131
  });
130
132
  return (yield (0, streamToBuffer_1.streamToBuffer)(response));
131
133
  });
@@ -11,7 +11,8 @@ type VoiceClient = {
11
11
  synthesize: (text: string, options: SayOptions) => Promise<string>;
12
12
  transcribe: () => Promise<SpeechResult>;
13
13
  startSpeechGather: (callback: (stream: {
14
- speech?: string;
14
+ speech: string;
15
+ responseTime: number;
15
16
  }) => void) => void;
16
17
  startDtmfGather: (sessionRef: string, callback: (event: {
17
18
  digit: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fonoster/apiserver",
3
- "version": "0.8.51",
3
+ "version": "0.8.52",
4
4
  "description": "APIServer for Fonoster",
5
5
  "author": "Pedro Sanders <psanders@fonoster.com>",
6
6
  "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -21,11 +21,11 @@
21
21
  },
22
22
  "dependencies": {
23
23
  "@deepgram/sdk": "^3.5.1",
24
- "@fonoster/authz": "^0.8.51",
25
- "@fonoster/common": "^0.8.51",
26
- "@fonoster/identity": "^0.8.51",
24
+ "@fonoster/authz": "^0.8.52",
25
+ "@fonoster/common": "^0.8.52",
26
+ "@fonoster/identity": "^0.8.52",
27
27
  "@fonoster/logger": "^0.8.51",
28
- "@fonoster/sipnet": "^0.8.51",
28
+ "@fonoster/sipnet": "^0.8.52",
29
29
  "@fonoster/streams": "^0.8.51",
30
30
  "@fonoster/types": "^0.8.51",
31
31
  "@google-cloud/speech": "^6.6.0",
@@ -73,5 +73,5 @@
73
73
  "@types/uuid": "^10.0.0",
74
74
  "@types/validator": "^13.12.0"
75
75
  },
76
- "gitHead": "67a0f500926304550c2698943ba6e7a7aa4b9f06"
76
+ "gitHead": "772398ea115078012f8e270943ec36b61343ea36"
77
77
  }