@fonoster/apiserver 0.16.10 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/LICENSE +2 -2
  2. package/dist/core/buildWelcomeDemoService.js +13 -13
  3. package/dist/voice/client/AudioSocketHandler.js +3 -3
  4. package/dist/voice/client/AuthorizationHandler.js +2 -2
  5. package/dist/voice/client/ExternalMediaHandler.js +4 -4
  6. package/dist/voice/client/SpeechHandler.d.ts +2 -2
  7. package/dist/voice/client/SpeechHandler.js +8 -4
  8. package/dist/voice/client/VoiceClientImpl.d.ts +2 -2
  9. package/dist/voice/client/VoiceClientImpl.js +2 -2
  10. package/dist/voice/createCreateVoiceClient.js +10 -4
  11. package/dist/voice/handlers/createAnswerHandler.js +3 -3
  12. package/dist/voice/handlers/createHangupHandler.js +3 -3
  13. package/dist/voice/handlers/createMuteHandler.js +3 -3
  14. package/dist/voice/handlers/createPlayDtmfHandler.js +3 -3
  15. package/dist/voice/handlers/createPlayHandler.js +3 -3
  16. package/dist/voice/handlers/createPlaybackControlHandler.js +3 -3
  17. package/dist/voice/handlers/createRecordHandler.js +3 -3
  18. package/dist/voice/handlers/createSayHandler.js +2 -2
  19. package/dist/voice/handlers/createStopSayHandler.js +3 -3
  20. package/dist/voice/handlers/createStreamGatherHandler.js +5 -5
  21. package/dist/voice/handlers/createStreamHandler.js +3 -3
  22. package/dist/voice/handlers/createUnmuteHandler.js +3 -3
  23. package/dist/voice/handlers/dial/createDialHandler.js +1 -1
  24. package/dist/voice/handlers/dial/recordChannel.d.ts +1 -1
  25. package/dist/voice/handlers/dial/recordChannel.js +3 -3
  26. package/dist/voice/handlers/gather/createGatherHandler.js +3 -3
  27. package/dist/voice/stt/Deepgram.js +87 -8
  28. package/dist/voice/stt/types.d.ts +2 -0
  29. package/dist/voice/types/ari.d.ts +2 -1
  30. package/dist/voice/types/ari.js +1 -0
  31. package/dist/voice/types/voice.d.ts +2 -2
  32. package/package.json +10 -10
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 Fonoster Inc
3
+ Copyright (c) 2026 Fonoster Inc
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
@@ -33,48 +33,48 @@ function buildWelcomeDemoService() {
33
33
  definition: SERVICE_DEFINITION,
34
34
  handlers: {
35
35
  createSession: (voice) => {
36
- let sessionRef;
36
+ let mediaSessionRef;
37
37
  voice.on(common_1.StreamEvent.DATA, (params) => {
38
38
  try {
39
39
  const { request, sayResponse } = params;
40
40
  if (request) {
41
41
  const { callerNumber } = request;
42
- sessionRef = request.sessionRef;
42
+ mediaSessionRef = request.mediaSessionRef;
43
43
  logger.verbose("welcome demo session started", {
44
- sessionRef,
44
+ mediaSessionRef,
45
45
  callerNumber
46
46
  });
47
47
  voice.write({
48
48
  answerRequest: {
49
- sessionRef: request.sessionRef
49
+ mediaSessionRef: request.mediaSessionRef
50
50
  }
51
51
  });
52
52
  voice.write({
53
53
  sayRequest: {
54
54
  text: "Welcome to Fonoster! Your system is configured correctly and ready for voice application development. Goodbye!",
55
- sessionRef: request.sessionRef
55
+ mediaSessionRef: request.mediaSessionRef
56
56
  }
57
57
  });
58
58
  }
59
- if (sayResponse && sessionRef) {
59
+ if (sayResponse && mediaSessionRef) {
60
60
  logger.verbose("hanging up welcome demo session", {
61
- sessionRef
61
+ mediaSessionRef
62
62
  });
63
63
  voice.write({
64
64
  hangupRequest: {
65
- sessionRef
65
+ mediaSessionRef
66
66
  }
67
67
  });
68
68
  }
69
69
  }
70
70
  catch (error) {
71
71
  logger.error("error in welcome demo session", {
72
- sessionRef,
72
+ mediaSessionRef,
73
73
  error
74
74
  });
75
- if (sessionRef) {
75
+ if (mediaSessionRef) {
76
76
  voice.write({
77
- hangupRequest: { sessionRef }
77
+ hangupRequest: { mediaSessionRef }
78
78
  });
79
79
  }
80
80
  voice.end();
@@ -82,11 +82,11 @@ function buildWelcomeDemoService() {
82
82
  });
83
83
  voice.once(common_1.StreamEvent.END, () => {
84
84
  voice.end();
85
- logger.verbose("welcome demo session ended", { sessionRef });
85
+ logger.verbose("welcome demo session ended", { mediaSessionRef });
86
86
  });
87
87
  voice.on(common_1.StreamEvent.ERROR, (error) => {
88
88
  logger.error("stream error in welcome demo session", {
89
- sessionRef,
89
+ mediaSessionRef,
90
90
  error
91
91
  });
92
92
  voice.end();
@@ -27,18 +27,18 @@ class AudioSocketHandler {
27
27
  this.audioSocket.onConnection((req, res) => __awaiter(this, void 0, void 0, function* () {
28
28
  logger.verbose("audio socket connection received", {
29
29
  ref: req.ref,
30
- sessionRef: this.config.sessionRef
30
+ mediaSessionRef: this.config.mediaSessionRef
31
31
  });
32
32
  (0, transcribeOnConnection_1.transcribeOnConnection)(this.transcriptionsStream)(req, res);
33
33
  res.onClose(() => {
34
34
  logger.verbose("session audio stream closed", {
35
- sessionRef: this.config.sessionRef
35
+ mediaSessionRef: this.config.mediaSessionRef
36
36
  });
37
37
  });
38
38
  res.onError((err) => {
39
39
  logger.error("session audio stream error", {
40
40
  error: err,
41
- sessionRef: this.config.sessionRef
41
+ mediaSessionRef: this.config.mediaSessionRef
42
42
  });
43
43
  });
44
44
  this.audioStream = res;
@@ -42,7 +42,7 @@ class AuthorizationHandler {
42
42
  if (!envs_1.AUTHZ_SERVICE_ENABLED) {
43
43
  return true;
44
44
  }
45
- const { sessionRef: channelId, accessKeyId } = this.config;
45
+ const { mediaSessionRef: channelId, accessKeyId } = this.config;
46
46
  try {
47
47
  const authz = new authz_1.AuthzClient(`${envs_1.AUTHZ_SERVICE_HOST}:${envs_1.AUTHZ_SERVICE_PORT}`);
48
48
  const authorized = yield authz.checkSessionAuthorized({ accessKeyId });
@@ -62,7 +62,7 @@ class AuthorizationHandler {
62
62
  }
63
63
  handleUnauthorizedSession() {
64
64
  return __awaiter(this, void 0, void 0, function* () {
65
- const { sessionRef: channelId } = this.config;
65
+ const { mediaSessionRef: channelId } = this.config;
66
66
  try {
67
67
  yield this.ari.channels.answer({ channelId });
68
68
  yield this.ari.channels.play({ channelId, media: "sound:unavailable" });
@@ -26,20 +26,20 @@ class ExternalMediaHandler {
26
26
  yield bridge.create({ type: "mixing" });
27
27
  logger.verbose("creating external media config", {
28
28
  port,
29
- sessionRef: this.config.sessionRef,
29
+ mediaSessionRef: this.config.mediaSessionRef,
30
30
  bridgeId: bridge.id
31
31
  });
32
32
  channel.externalMedia((0, createExternalMediaConfig_1.createExternalMediaConfig)(port));
33
33
  channel.once(types_1.AriEvent.STASIS_START, (_, channel) => __awaiter(this, void 0, void 0, function* () {
34
- bridge.addChannel({ channel: [this.config.sessionRef, channel.id] });
34
+ bridge.addChannel({ channel: [this.config.mediaSessionRef, channel.id] });
35
35
  logger.verbose("added channel to bridge", {
36
- sessionRef: this.config.sessionRef,
36
+ mediaSessionRef: this.config.mediaSessionRef,
37
37
  channelId: channel.id
38
38
  });
39
39
  }));
40
40
  channel.once("ChannelLeftBridge", () => __awaiter(this, void 0, void 0, function* () {
41
41
  logger.verbose("channel left bridge", {
42
- sessionRef: this.config.sessionRef,
42
+ mediaSessionRef: this.config.mediaSessionRef,
43
43
  bridgeId: bridge.id
44
44
  });
45
45
  try {
@@ -28,14 +28,14 @@ declare class SpeechHandler {
28
28
  private ari;
29
29
  private transcriptionsStream;
30
30
  private audioStream;
31
- private sessionRef;
31
+ private mediaSessionRef;
32
32
  constructor(params: {
33
33
  tts: TextToSpeech;
34
34
  stt: SpeechToText;
35
35
  ari: Client;
36
36
  transcriptionsStream: Stream;
37
37
  audioStream: AudioStream;
38
- sessionRef: string;
38
+ mediaSessionRef: string;
39
39
  });
40
40
  synthesize(text: string, options: SayOptions): Promise<string>;
41
41
  stopSynthesis(): Promise<void>;
@@ -20,7 +20,7 @@ class SpeechHandler {
20
20
  this.ari = params.ari;
21
21
  this.transcriptionsStream = params.transcriptionsStream;
22
22
  this.audioStream = params.audioStream;
23
- this.sessionRef = params.sessionRef;
23
+ this.mediaSessionRef = params.mediaSessionRef;
24
24
  }
25
25
  synthesize(text, options) {
26
26
  return __awaiter(this, void 0, void 0, function* () {
@@ -60,12 +60,14 @@ class SpeechHandler {
60
60
  out.on("data", callback);
61
61
  out.on("error", (error) => __awaiter(this, void 0, void 0, function* () {
62
62
  logger.error("speech recognition error", { error });
63
- yield this.ari.channels.hangup({ channelId: this.sessionRef });
63
+ yield this.ari.channels.hangup({ channelId: this.mediaSessionRef });
64
64
  }));
65
65
  }
66
66
  startDtmfGather(callback) {
67
67
  return __awaiter(this, void 0, void 0, function* () {
68
- const channel = yield this.ari.channels.get({ channelId: this.sessionRef });
68
+ const channel = yield this.ari.channels.get({
69
+ channelId: this.mediaSessionRef
70
+ });
69
71
  channel.on(types_1.AriEvent.CHANNEL_DTMF_RECEIVED, (event) => {
70
72
  const { digit } = event;
71
73
  callback({ digit });
@@ -78,7 +80,9 @@ class SpeechHandler {
78
80
  const { onDigitReceived, finishOnKey, maxDigits, timeout } = params;
79
81
  let result = "";
80
82
  let timeoutId = null;
81
- const channel = yield this.ari.channels.get({ channelId: this.sessionRef });
83
+ const channel = yield this.ari.channels.get({
84
+ channelId: this.mediaSessionRef
85
+ });
82
86
  return new Promise((resolve) => {
83
87
  const resetTimer = () => {
84
88
  if (timeoutId) {
@@ -50,11 +50,11 @@ declare class VoiceClientImpl implements VoiceClient {
50
50
  speech: string;
51
51
  responseTime: number;
52
52
  }) => void): void;
53
- startDtmfGather(sessionRef: string, callback: (event: {
53
+ startDtmfGather(mediaSessionRef: string, callback: (event: {
54
54
  digit: string;
55
55
  }) => void): Promise<void>;
56
56
  waitForDtmf(params: {
57
- sessionRef: string;
57
+ mediaSessionRef: string;
58
58
  finishOnKey: string;
59
59
  maxDigits: number;
60
60
  timeout: number;
@@ -90,7 +90,7 @@ class VoiceClientImpl {
90
90
  ari: this.ari,
91
91
  transcriptionsStream: this.transcriptionsStream,
92
92
  audioStream: this.audioSocketHandler.getAudioStream(),
93
- sessionRef: this.config.sessionRef
93
+ mediaSessionRef: this.config.mediaSessionRef
94
94
  });
95
95
  logger.verbose("voice client setup completed");
96
96
  });
@@ -122,7 +122,7 @@ class VoiceClientImpl {
122
122
  startSpeechGather(callback) {
123
123
  this.speechHandler.startSpeechGather(callback);
124
124
  }
125
- startDtmfGather(sessionRef, callback) {
125
+ startDtmfGather(mediaSessionRef, callback) {
126
126
  return __awaiter(this, void 0, void 0, function* () {
127
127
  return this.speechHandler.startDtmfGather(callback);
128
128
  });
@@ -30,6 +30,7 @@ exports.createCreateVoiceClient = createCreateVoiceClient;
30
30
  */
31
31
  const identity_1 = require("@fonoster/identity");
32
32
  const logger_1 = require("@fonoster/logger");
33
+ const uuid_1 = require("uuid");
33
34
  const identityConfig_1 = require("../core/identityConfig");
34
35
  const mapCallDirectionToEnum_1 = require("../events/mapCallDirectionToEnum");
35
36
  const client_1 = require("./client");
@@ -41,21 +42,26 @@ const generateCallAccessToken = (0, identity_1.createGenerateCallAccessToken)(id
41
42
  function createCreateVoiceClient(createContainer) {
42
43
  return function createVoiceClient(params) {
43
44
  return __awaiter(this, void 0, void 0, function* () {
44
- var _a, _b, _c, _d, _e;
45
+ var _a, _b, _c, _d, _e, _f;
45
46
  const { ari, event, channel } = params;
46
- const { id: sessionRef, caller } = event.channel;
47
+ const { id: mediaSessionRef, caller } = event.channel;
47
48
  const { name: callerName, number: callerNumber } = caller;
48
49
  const getChannelVar = (0, createGetChannelVarWithoutThrow_1.createGetChannelVarWithoutThrow)(channel);
49
50
  // Variables set by Asterisk's dialplan
50
51
  const callDirection = (_a = (yield getChannelVar(types_1.ChannelVar.CALL_DIRECTION))) === null || _a === void 0 ? void 0 : _a.value;
51
52
  const appRef = (_b = (yield getChannelVar(types_1.ChannelVar.APP_REF))) === null || _b === void 0 ? void 0 : _b.value;
52
53
  const ingressNumber = ((_c = (yield getChannelVar(types_1.ChannelVar.INGRESS_NUMBER))) === null || _c === void 0 ? void 0 : _c.value) || "";
54
+ // Try to get callRef from channel variable (set by dialplan from X-Call-Ref header for API-originated calls)
55
+ // If not found, generate a new UUID (for PSTN-terminated calls)
56
+ const callRefFromChannel = (_d = (yield getChannelVar(types_1.ChannelVar.CALL_REF))) === null || _d === void 0 ? void 0 : _d.value;
57
+ const callRef = callRefFromChannel || (0, uuid_1.v4)();
53
58
  const { accessKeyId, endpoint, tts, stt } = yield createContainer(appRef);
54
59
  const sessionToken = yield generateCallAccessToken({ accessKeyId, appRef });
55
- const metadataStr = (_e = (_d = (yield getChannelVar(types_1.ChannelVar.METADATA))) === null || _d === void 0 ? void 0 : _d.value) !== null && _e !== void 0 ? _e : "{}";
60
+ const metadataStr = (_f = (_e = (yield getChannelVar(types_1.ChannelVar.METADATA))) === null || _e === void 0 ? void 0 : _e.value) !== null && _f !== void 0 ? _f : "{}";
56
61
  const config = {
57
62
  appRef,
58
- sessionRef,
63
+ mediaSessionRef,
64
+ callRef,
59
65
  accessKeyId,
60
66
  endpoint,
61
67
  callerName,
@@ -13,11 +13,11 @@ exports.createAnswerHandler = createAnswerHandler;
13
13
  const withErrorHandling_1 = require("./utils/withErrorHandling");
14
14
  function createAnswerHandler(ari, voiceClient) {
15
15
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
16
- const { sessionRef } = request;
17
- yield ari.channels.answer({ channelId: sessionRef });
16
+ const { mediaSessionRef } = request;
17
+ yield ari.channels.answer({ channelId: mediaSessionRef });
18
18
  voiceClient.sendResponse({
19
19
  answerResponse: {
20
- sessionRef
20
+ mediaSessionRef
21
21
  }
22
22
  });
23
23
  }));
@@ -13,13 +13,13 @@ exports.createHangupHandler = createHangupHandler;
13
13
  const withErrorHandling_1 = require("./utils/withErrorHandling");
14
14
  function createHangupHandler(ari, voiceClient) {
15
15
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
16
- const { sessionRef } = request;
16
+ const { mediaSessionRef } = request;
17
17
  // Give some time for the last sound to play
18
18
  setTimeout(() => {
19
- ari.channels.hangup({ channelId: sessionRef });
19
+ ari.channels.hangup({ channelId: mediaSessionRef });
20
20
  voiceClient.sendResponse({
21
21
  hangupResponse: {
22
- sessionRef
22
+ mediaSessionRef
23
23
  }
24
24
  });
25
25
  voiceClient.close();
@@ -13,14 +13,14 @@ exports.createMuteHandler = createMuteHandler;
13
13
  const withErrorHandling_1 = require("./utils/withErrorHandling");
14
14
  function createMuteHandler(ari, voiceClient) {
15
15
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
16
- const { sessionRef, direction } = request;
16
+ const { mediaSessionRef, direction } = request;
17
17
  yield ari.channels.mute({
18
- channelId: sessionRef,
18
+ channelId: mediaSessionRef,
19
19
  direction
20
20
  });
21
21
  voiceClient.sendResponse({
22
22
  muteResponse: {
23
- sessionRef
23
+ mediaSessionRef
24
24
  }
25
25
  });
26
26
  }));
@@ -13,14 +13,14 @@ exports.createPlayDtmfHandler = createPlayDtmfHandler;
13
13
  const withErrorHandling_1 = require("./utils/withErrorHandling");
14
14
  function createPlayDtmfHandler(ari, voiceClient) {
15
15
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
16
- const { sessionRef, digits } = request;
16
+ const { mediaSessionRef, digits } = request;
17
17
  yield ari.channels.sendDTMF({
18
- channelId: sessionRef,
18
+ channelId: mediaSessionRef,
19
19
  dtmf: digits
20
20
  });
21
21
  voiceClient.sendResponse({
22
22
  playDtmfResponse: {
23
- sessionRef
23
+ mediaSessionRef
24
24
  }
25
25
  });
26
26
  }));
@@ -15,17 +15,17 @@ const awaitForPlaybackFinished_1 = require("./utils/awaitForPlaybackFinished");
15
15
  const withErrorHandling_1 = require("./utils/withErrorHandling");
16
16
  function createPlayHandler(ari, voiceClient) {
17
17
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
18
- const { sessionRef } = request;
18
+ const { mediaSessionRef } = request;
19
19
  const playbackRef = request.playbackRef || (0, nanoid_1.nanoid)(10);
20
20
  yield ari.channels.play({
21
- channelId: sessionRef,
21
+ channelId: mediaSessionRef,
22
22
  media: `sound:${request.url}`,
23
23
  playbackId: playbackRef
24
24
  });
25
25
  yield (0, awaitForPlaybackFinished_1.awaitForPlaybackFinished)(ari, playbackRef);
26
26
  voiceClient.sendResponse({
27
27
  playResponse: {
28
- sessionRef,
28
+ mediaSessionRef,
29
29
  playbackRef
30
30
  }
31
31
  });
@@ -32,7 +32,7 @@ const common_1 = require("@fonoster/common");
32
32
  const zod_1 = require("zod");
33
33
  const withErrorHandling_1 = require("./utils/withErrorHandling");
34
34
  const requestSchema = zod_1.z.object({
35
- sessionRef: zod_1.z.string(),
35
+ mediaSessionRef: zod_1.z.string(),
36
36
  playbackRef: zod_1.z.string().optional(),
37
37
  action: zod_1.z.nativeEnum(common_1.PlaybackControlAction, {
38
38
  message: "Invalid playback control action."
@@ -41,7 +41,7 @@ const requestSchema = zod_1.z.object({
41
41
  function createPlaybackControlHandler(ari, voiceClient) {
42
42
  return (0, withErrorHandling_1.withErrorHandling)((playbackControlReq) => __awaiter(this, void 0, void 0, function* () {
43
43
  requestSchema.parse(playbackControlReq);
44
- const { sessionRef, playbackRef: playbackId, action } = playbackControlReq;
44
+ const { mediaSessionRef, playbackRef: playbackId, action } = playbackControlReq;
45
45
  try {
46
46
  if (action === common_1.PlaybackControlAction.STOP) {
47
47
  yield ari.playbacks.stop({ playbackId });
@@ -55,7 +55,7 @@ function createPlaybackControlHandler(ari, voiceClient) {
55
55
  }
56
56
  voiceClient.sendResponse({
57
57
  playbackControlResponse: {
58
- sessionRef
58
+ mediaSessionRef
59
59
  }
60
60
  });
61
61
  }));
@@ -34,10 +34,10 @@ const awaitForRecordingFinished_1 = require("./utils/awaitForRecordingFinished")
34
34
  const withErrorHandling_1 = require("./utils/withErrorHandling");
35
35
  function createRecordHandler(ari, voiceClient) {
36
36
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
37
- const { sessionRef, maxDuration, maxSilence, beep, finishOnKey } = request;
37
+ const { mediaSessionRef, maxDuration, maxSilence, beep, finishOnKey } = request;
38
38
  const name = (0, nanoid_1.nanoid)(10);
39
39
  yield ari.channels.record({
40
- channelId: sessionRef,
40
+ channelId: mediaSessionRef,
41
41
  format: common_1.RecordFormat.WAV,
42
42
  name,
43
43
  beep,
@@ -48,7 +48,7 @@ function createRecordHandler(ari, voiceClient) {
48
48
  const { duration } = yield (0, awaitForRecordingFinished_1.awaitForRecordingFinished)(ari, name);
49
49
  voiceClient.sendResponse({
50
50
  recordResponse: {
51
- sessionRef,
51
+ mediaSessionRef,
52
52
  name,
53
53
  format: common_1.RecordFormat.WAV,
54
54
  duration
@@ -15,7 +15,7 @@ const zod_1 = require("zod");
15
15
  const withErrorHandling_1 = require("./utils/withErrorHandling");
16
16
  const sayRequestSchema = zod_1.z.object({
17
17
  text: zod_1.z.string(),
18
- sessionRef: zod_1.z.string(),
18
+ mediaSessionRef: zod_1.z.string(),
19
19
  options: zod_1.z.record(zod_1.z.unknown()).optional()
20
20
  });
21
21
  function createSayHandler(ari, voiceClient) {
@@ -24,7 +24,7 @@ function createSayHandler(ari, voiceClient) {
24
24
  yield voiceClient.synthesize(request.text, request.options ? pb_util_1.struct.decode(request.options) : {});
25
25
  voiceClient.sendResponse({
26
26
  sayResponse: {
27
- sessionRef: request.sessionRef
27
+ mediaSessionRef: request.mediaSessionRef
28
28
  }
29
29
  });
30
30
  }));
@@ -13,12 +13,12 @@ exports.createStopSayHandler = createStopSayHandler;
13
13
  const zod_1 = require("zod");
14
14
  const withErrorHandling_1 = require("./utils/withErrorHandling");
15
15
  const requestSchema = zod_1.z.object({
16
- sessionRef: zod_1.z.string()
16
+ mediaSessionRef: zod_1.z.string()
17
17
  });
18
18
  function createStopSayHandler(voiceClient) {
19
19
  return (0, withErrorHandling_1.withErrorHandling)((stopSayReq) => __awaiter(this, void 0, void 0, function* () {
20
20
  requestSchema.parse(stopSayReq);
21
- const { sessionRef } = stopSayReq;
21
+ const { mediaSessionRef } = stopSayReq;
22
22
  try {
23
23
  voiceClient.stopSynthesis();
24
24
  }
@@ -27,7 +27,7 @@ function createStopSayHandler(voiceClient) {
27
27
  }
28
28
  voiceClient.sendResponse({
29
29
  stopSayResponse: {
30
- sessionRef
30
+ mediaSessionRef
31
31
  }
32
32
  });
33
33
  }));
@@ -38,15 +38,15 @@ const gatherRequestSchema = zod_1.z.object({
38
38
  });
39
39
  function createStreamGatherHandler(voiceClient) {
40
40
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
41
- const { sessionRef, source } = request;
41
+ const { mediaSessionRef, source } = request;
42
42
  gatherRequestSchema.parse(request);
43
43
  const effectiveSource = source || common_1.StreamGatherSource.SPEECH_AND_DTMF;
44
44
  if (effectiveSource.includes(common_1.StreamGatherSource.DTMF)) {
45
- voiceClient.startDtmfGather(sessionRef, (event) => {
45
+ voiceClient.startDtmfGather(mediaSessionRef, (event) => {
46
46
  const { digit } = event;
47
47
  voiceClient.sendResponse({
48
48
  streamGatherPayload: {
49
- sessionRef,
49
+ mediaSessionRef,
50
50
  digit,
51
51
  responseTime: 0
52
52
  }
@@ -58,7 +58,7 @@ function createStreamGatherHandler(voiceClient) {
58
58
  const { speech, responseTime } = event;
59
59
  voiceClient.sendResponse({
60
60
  streamGatherPayload: {
61
- sessionRef,
61
+ mediaSessionRef,
62
62
  speech,
63
63
  responseTime
64
64
  }
@@ -67,7 +67,7 @@ function createStreamGatherHandler(voiceClient) {
67
67
  }
68
68
  voiceClient.sendResponse({
69
69
  startStreamGatherResponse: {
70
- sessionRef
70
+ mediaSessionRef
71
71
  }
72
72
  });
73
73
  }));
@@ -41,7 +41,7 @@ const streamRequestSchema = zod_1.z.object({
41
41
  });
42
42
  function createStreamHandler(voiceClient) {
43
43
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
44
- const { sessionRef, direction, format } = request;
44
+ const { mediaSessionRef, direction, format } = request;
45
45
  streamRequestSchema.parse(request);
46
46
  const effectiveDirection = direction || common_1.StreamDirection.BOTH;
47
47
  const effectiveFormat = format || common_1.StreamAudioFormat.WAV;
@@ -51,7 +51,7 @@ function createStreamHandler(voiceClient) {
51
51
  voiceClient.getTranscriptionsStream().on("data", (data) => {
52
52
  voiceClient.sendResponse({
53
53
  streamPayload: {
54
- sessionRef,
54
+ mediaSessionRef,
55
55
  type: common_1.StreamMessageType.AUDIO_OUT,
56
56
  data,
57
57
  streamRef: "fixme",
@@ -62,7 +62,7 @@ function createStreamHandler(voiceClient) {
62
62
  }
63
63
  voiceClient.sendResponse({
64
64
  startStreamResponse: {
65
- sessionRef,
65
+ mediaSessionRef,
66
66
  streamRef: "fixme"
67
67
  }
68
68
  });
@@ -13,14 +13,14 @@ exports.createUnmuteHandler = createUnmuteHandler;
13
13
  const withErrorHandling_1 = require("./utils/withErrorHandling");
14
14
  function createUnmuteHandler(ari, voiceClient) {
15
15
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
16
- const { sessionRef, direction } = request;
16
+ const { mediaSessionRef, direction } = request;
17
17
  yield ari.channels.unmute({
18
- channelId: sessionRef,
18
+ channelId: mediaSessionRef,
19
19
  direction
20
20
  });
21
21
  voiceClient.sendResponse({
22
22
  muteResponse: {
23
- sessionRef
23
+ mediaSessionRef
24
24
  }
25
25
  });
26
26
  }));
@@ -41,7 +41,7 @@ const handleStasisStart_1 = require("./handleStasisStart");
41
41
  function createDialHandler(ari, voiceClient) {
42
42
  return function dial(request) {
43
43
  return __awaiter(this, void 0, void 0, function* () {
44
- const { sessionRef: channelId, destination, timeout } = request;
44
+ const { mediaSessionRef: channelId, destination, timeout } = request;
45
45
  const bridge = yield ari.bridges.create({
46
46
  type: "mixing"
47
47
  });
@@ -18,5 +18,5 @@
18
18
  */
19
19
  import { DialRecordDirection } from "@fonoster/common";
20
20
  import { Client } from "ari-client";
21
- declare function recordChannel(ari: Client, direction: DialRecordDirection.IN | DialRecordDirection.OUT, sessionRef: string): Promise<import("ari-client").LiveRecording>;
21
+ declare function recordChannel(ari: Client, direction: DialRecordDirection.IN | DialRecordDirection.OUT, mediaSessionRef: string): Promise<import("ari-client").LiveRecording>;
22
22
  export { recordChannel };
@@ -29,18 +29,18 @@ exports.recordChannel = recordChannel;
29
29
  * limitations under the License.
30
30
  */
31
31
  const common_1 = require("@fonoster/common");
32
- function recordChannel(ari, direction, sessionRef) {
32
+ function recordChannel(ari, direction, mediaSessionRef) {
33
33
  return __awaiter(this, void 0, void 0, function* () {
34
34
  const spy = direction.toLowerCase();
35
35
  const channel = yield ari.channels.snoopChannel({
36
36
  app: common_1.STASIS_APP_NAME,
37
- channelId: sessionRef,
37
+ channelId: mediaSessionRef,
38
38
  spy
39
39
  });
40
40
  return ari.channels.record({
41
41
  channelId: channel.id,
42
42
  format: common_1.RecordFormat.WAV,
43
- name: `${sessionRef}_${spy}`
43
+ name: `${mediaSessionRef}_${spy}`
44
44
  });
45
45
  });
46
46
  }
@@ -54,7 +54,7 @@ const gatherRequestSchema = zod_1.z.object({
54
54
  });
55
55
  function createGatherHandler(voiceClient) {
56
56
  return (0, withErrorHandling_1.withErrorHandling)((request) => __awaiter(this, void 0, void 0, function* () {
57
- const { sessionRef, source, timeout, finishOnKey, maxDigits } = request;
57
+ const { mediaSessionRef, source, timeout, finishOnKey, maxDigits } = request;
58
58
  gatherRequestSchema.parse(request);
59
59
  const { timeoutPromise, effectiveTimeout } = (0, getTimeoutPromise_1.getTimeoutPromise)(timeout);
60
60
  const effectiveSource = source || common_1.GatherSource.SPEECH_AND_DTMF;
@@ -65,7 +65,7 @@ function createGatherHandler(voiceClient) {
65
65
  if (effectiveSource.includes(common_1.GatherSource.DTMF)) {
66
66
  promises.push(voiceClient
67
67
  .waitForDtmf({
68
- sessionRef,
68
+ mediaSessionRef,
69
69
  finishOnKey,
70
70
  maxDigits,
71
71
  timeout: effectiveTimeout,
@@ -76,7 +76,7 @@ function createGatherHandler(voiceClient) {
76
76
  const result = (yield Promise.race(promises));
77
77
  voiceClient.sendResponse({
78
78
  gatherResponse: {
79
- sessionRef,
79
+ mediaSessionRef,
80
80
  responseTime: result.responseTime,
81
81
  speech: (0, utils_1.isDtmf)(result.digits) ? undefined : result.speech,
82
82
  digits: (0, utils_1.isDtmf)(result.digits) ? result.digits : undefined
@@ -83,6 +83,16 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
83
83
  streamTranscribe(stream) {
84
84
  const connection = this.client.listen.live(buildTranscribeConfig(this.engineConfig.config));
85
85
  const out = new stream_1.Stream();
86
+ // Track last transcript for UtteranceEnd fallback
87
+ // According to Deepgram docs: "If you receive an UtteranceEnd event without a
88
+ // preceding speech_final: true, it's advisable to process the last-received
89
+ // transcript as a complete utterance."
90
+ // UtteranceEnd fires after finalized words, so we store the last finalized transcript
91
+ // but also keep any transcript as a fallback
92
+ let lastFinalizedTranscript = null;
93
+ let lastFinalizedTranscriptTime = 0;
94
+ let lastAnyTranscript = null;
95
+ let lastAnyTranscriptTime = 0;
86
96
  // Add error handler immediately to catch any connection errors
87
97
  connection.on(LiveTranscriptionEvents.Error, (err) => {
88
98
  logger.error("error on Deepgram connection", { err });
@@ -106,8 +116,23 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
106
116
  });
107
117
  connection.on(LiveTranscriptionEvents.Transcript, (data) => {
108
118
  var _a, _b, _c;
109
- if (!((_c = (_b = (_a = data.channel) === null || _a === void 0 ? void 0 : _a.alternatives) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.transcript) ||
110
- !data.speech_final) {
119
+ const transcript = (_c = (_b = (_a = data.channel) === null || _a === void 0 ? void 0 : _a.alternatives) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.transcript;
120
+ const hasTranscript = !!transcript;
121
+ const isFinal = data.is_final === true;
122
+ const speechFinal = data.speech_final === true;
123
+ // Store any transcript for UtteranceEnd fallback
124
+ if (hasTranscript) {
125
+ lastAnyTranscript = transcript;
126
+ lastAnyTranscriptTime = Date.now();
127
+ // Store finalized transcripts separately (preferred for UtteranceEnd)
128
+ if (isFinal || speechFinal) {
129
+ lastFinalizedTranscript = transcript;
130
+ lastFinalizedTranscriptTime = Date.now();
131
+ }
132
+ }
133
+ // Process transcript if it has content and is final
134
+ // Check both speech_final (primary) and is_final (backup)
135
+ if (!hasTranscript || (!speechFinal && !isFinal)) {
111
136
  return;
112
137
  }
113
138
  const words = data.channel.alternatives[0].words || [];
@@ -117,13 +142,55 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
117
142
  words.length
118
143
  : 0;
119
144
  logger.verbose("transcribe result", {
120
- speech: data.channel.alternatives[0].transcript,
121
- responseTime
145
+ speech: transcript,
146
+ responseTime,
147
+ isFinal,
148
+ speechFinal
122
149
  });
123
150
  out.emit("data", {
124
- speech: data.channel.alternatives[0].transcript,
151
+ speech: transcript,
125
152
  responseTime
126
153
  });
154
+ // Clear transcripts after processing (they've been emitted)
155
+ lastFinalizedTranscript = null;
156
+ lastAnyTranscript = null;
157
+ });
158
+ // CRITICAL: Handle UtteranceEnd events (fallback when speech_final never becomes true)
159
+ // This is Deepgram's recommended fallback mechanism for noisy environments
160
+ // UtteranceEnd requires: interim_results=true and utterance_end_ms parameter
161
+ // UtteranceEnd fires after finalized words, so prefer lastFinalizedTranscript
162
+ connection.on(LiveTranscriptionEvents.UtteranceEnd, (data) => {
163
+ // Prefer finalized transcript, fall back to any transcript
164
+ const transcriptToUse = lastFinalizedTranscript || lastAnyTranscript;
165
+ const transcriptTime = lastFinalizedTranscript
166
+ ? lastFinalizedTranscriptTime
167
+ : lastAnyTranscriptTime;
168
+ if (transcriptToUse) {
169
+ // Use last_word_end from UtteranceEnd event if available for more accurate timing
170
+ // Otherwise fall back to time since last transcript
171
+ const lastWordEnd = data === null || data === void 0 ? void 0 : data.last_word_end;
172
+ const responseTime = lastWordEnd
173
+ ? lastWordEnd * 1000 // Convert seconds to milliseconds
174
+ : transcriptTime
175
+ ? Date.now() - transcriptTime
176
+ : 0;
177
+ logger.info("Deepgram UtteranceEnd - processing last transcript", {
178
+ speech: transcriptToUse,
179
+ responseTime,
180
+ lastWordEnd: lastWordEnd,
181
+ wasFinalized: !!lastFinalizedTranscript
182
+ });
183
+ out.emit("data", {
184
+ speech: transcriptToUse,
185
+ responseTime
186
+ });
187
+ // Clear transcripts after processing
188
+ lastFinalizedTranscript = null;
189
+ lastAnyTranscript = null;
190
+ }
191
+ else {
192
+ logger.warn("Deepgram UtteranceEnd received but no last transcript available");
193
+ }
127
194
  });
128
195
  });
129
196
  // Handle stream errors and cleanup
@@ -212,7 +279,9 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
212
279
  .optional(),
213
280
  model: z
214
281
  .nativeEnum(types_1.DeepgramModel, { message: "Invalid Deepgram model" })
215
- .optional()
282
+ .optional(),
283
+ interimResults: z.boolean().optional(),
284
+ utteranceEndMs: z.number().int().min(1000).max(5000).optional()
216
285
  });
217
286
  }
218
287
  static getCredentialsValidationSchema() {
@@ -223,7 +292,17 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
223
292
  }
224
293
  exports.Deepgram = Deepgram;
225
294
  function buildTranscribeConfig(config) {
226
- return Object.assign(Object.assign({}, config), { model: config.model || types_1.DeepgramModel.NOVA_2_PHONECALL, language: config.languageCode || common_1.VoiceLanguage.EN_US, encoding: "linear16", sample_rate: 16000, channels: 1, smart_format: config.smartFormat || true,
295
+ // UtteranceEnd requires interim_results to be enabled
296
+ // Default to true to enable UtteranceEnd fallback mechanism
297
+ const interimResults = config.interimResults !== false;
298
+ // Default utterance_end_ms to 1000ms (minimum required value)
299
+ // This enables UtteranceEnd events as a fallback when speech_final never becomes true
300
+ const utteranceEndMs = config.utteranceEndMs || 1000;
301
+ return Object.assign(Object.assign({}, config), { model: config.model || types_1.DeepgramModel.NOVA_2_PHONECALL, language: config.languageCode || common_1.VoiceLanguage.EN_US, encoding: "linear16", sample_rate: 16000, channels: 1, smart_format: config.smartFormat !== false,
227
302
  // This needs to be set to true to avoid delays while using smart_format
228
- no_delay: config.noDelay || true });
303
+ no_delay: config.noDelay !== false,
304
+ // REQUIRED for UtteranceEnd: interim_results must be true
305
+ interim_results: interimResults,
306
+ // REQUIRED for UtteranceEnd: utterance_end_ms parameter
307
+ utterance_end_ms: utteranceEndMs });
229
308
  }
@@ -51,6 +51,8 @@ type DeepgramSttConfig = {
51
51
  model: DeepgramModel;
52
52
  smartFormat: boolean;
53
53
  noDelay: boolean;
54
+ interimResults?: boolean;
55
+ utteranceEndMs?: number;
54
56
  };
55
57
  credentials: {
56
58
  apiKey: string;
@@ -35,6 +35,7 @@ declare enum ChannelVar {
35
35
  APP_REF = "APP_REF",
36
36
  APP_ENDPOINT = "APP_ENDPOINT",
37
37
  METADATA = "METADATA",
38
- FROM_EXTERNAL_MEDIA = "FROM_EXTERNAL_MEDIA"
38
+ FROM_EXTERNAL_MEDIA = "FROM_EXTERNAL_MEDIA",
39
+ CALL_REF = "CALL_REF"
39
40
  }
40
41
  export { AriEvent, ChannelVar };
@@ -41,4 +41,5 @@ var ChannelVar;
41
41
  ChannelVar["APP_ENDPOINT"] = "APP_ENDPOINT";
42
42
  ChannelVar["METADATA"] = "METADATA";
43
43
  ChannelVar["FROM_EXTERNAL_MEDIA"] = "FROM_EXTERNAL_MEDIA";
44
+ ChannelVar["CALL_REF"] = "CALL_REF";
44
45
  })(ChannelVar || (exports.ChannelVar = ChannelVar = {}));
@@ -32,12 +32,12 @@ type VoiceClient = {
32
32
  speech: string;
33
33
  responseTime: number;
34
34
  }) => void) => void;
35
- startDtmfGather: (sessionRef: string, callback: (event: {
35
+ startDtmfGather: (mediaSessionRef: string, callback: (event: {
36
36
  digit: string;
37
37
  }) => void) => void;
38
38
  stopStreamGather: () => void;
39
39
  waitForDtmf: (params: {
40
- sessionRef: string;
40
+ mediaSessionRef: string;
41
41
  finishOnKey: string;
42
42
  maxDigits: number;
43
43
  timeout: number;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fonoster/apiserver",
3
- "version": "0.16.10",
3
+ "version": "0.17.0",
4
4
  "description": "APIServer for Fonoster",
5
5
  "author": "Pedro Sanders <psanders@fonoster.com>",
6
6
  "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -22,14 +22,14 @@
22
22
  },
23
23
  "dependencies": {
24
24
  "@deepgram/sdk": "^3.5.1",
25
- "@fonoster/authz": "^0.16.10",
26
- "@fonoster/autopilot": "^0.16.10",
27
- "@fonoster/common": "^0.16.8",
28
- "@fonoster/identity": "^0.16.10",
29
- "@fonoster/logger": "^0.16.7",
30
- "@fonoster/sipnet": "^0.16.10",
31
- "@fonoster/streams": "^0.16.7",
32
- "@fonoster/types": "^0.16.7",
25
+ "@fonoster/authz": "^0.17.0",
26
+ "@fonoster/autopilot": "^0.17.0",
27
+ "@fonoster/common": "^0.17.0",
28
+ "@fonoster/identity": "^0.17.0",
29
+ "@fonoster/logger": "^0.17.0",
30
+ "@fonoster/sipnet": "^0.17.0",
31
+ "@fonoster/streams": "^0.17.0",
32
+ "@fonoster/types": "^0.17.0",
33
33
  "@google-cloud/speech": "^6.6.0",
34
34
  "@google-cloud/text-to-speech": "^5.3.0",
35
35
  "@grpc/grpc-js": "~1.10.11",
@@ -76,5 +76,5 @@
76
76
  "@types/uuid": "^10.0.0",
77
77
  "@types/validator": "^13.15.10"
78
78
  },
79
- "gitHead": "01b11ee98556983fa4dae5aabc0daa66bcbe9e82"
79
+ "gitHead": "4d1a9afaec6f294184386e009d1a4e292fb3583b"
80
80
  }