npm - @fonoster/apiserver - Versions diffs - 0.9.35 → 0.9.39 - Mend

@fonoster/apiserver 0.9.35 → 0.9.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/applications/createGetFnUtil.d.ts +3 -3
package/dist/core/httpBridge.js +12 -2
package/dist/voice/VoiceClientImpl.d.ts +1 -1
package/dist/voice/VoiceClientImpl.js +15 -11
package/dist/voice/stt/Deepgram.js +57 -8
package/dist/voice/tts/AbstractTextToSpeech.d.ts +7 -2
package/dist/voice/tts/AbstractTextToSpeech.js +30 -0
package/dist/voice/tts/Azure.d.ts +3 -10
package/dist/voice/tts/Azure.js +38 -49
package/dist/voice/tts/Deepgram.d.ts +3 -10
package/dist/voice/tts/Deepgram.js +11 -64
package/dist/voice/tts/ElevenLabs.d.ts +3 -10
package/dist/voice/tts/ElevenLabs.js +11 -64
package/dist/voice/tts/Google.d.ts +3 -10
package/dist/voice/tts/Google.js +13 -30
package/dist/voice/tts/types.d.ts +27 -1
package/dist/voice/tts/utils/createChunkedSynthesisStream.d.ts +28 -0
package/dist/voice/tts/utils/createChunkedSynthesisStream.js +78 -0
package/dist/voice/tts/utils/createErrorStream.d.ts +21 -0
package/dist/voice/tts/utils/createErrorStream.js +28 -0
package/dist/voice/types/voice.d.ts +2 -2
package/package.json +2 -2
/package/dist/voice/tts/{isSsml.d.ts → utils/isSsml.d.ts} +0 -0
/package/dist/voice/tts/{isSsml.js → utils/isSsml.js} +0 -0
/package/dist/voice/tts/{streamToBuffer.d.ts → utils/streamToBuffer.d.ts} +0 -0
/package/dist/voice/tts/{streamToBuffer.js → utils/streamToBuffer.js} +0 -0

package/dist/applications/createGetFnUtil.d.ts CHANGED Viewed

@@ -5,22 +5,22 @@ declare function createGetFnUtil(prisma: Prisma): (ref: string) => Promise<{
     };
     textToSpeech: {
         ref: string;
-        config: import("@prisma/client/runtime/library").JsonValue;
         credentials: string | null;
+        config: import("@prisma/client/runtime/library").JsonValue;
         productRef: string;
         applicationRef: string;
     };
     speechToText: {
         ref: string;
-        config: import("@prisma/client/runtime/library").JsonValue;
         credentials: string | null;
+        config: import("@prisma/client/runtime/library").JsonValue;
         productRef: string;
         applicationRef: string;
     };
     intelligence: {
         ref: string;
-        config: import("@prisma/client/runtime/library").JsonValue;
         credentials: string | null;
+        config: import("@prisma/client/runtime/library").JsonValue;
         productRef: string;
         applicationRef: string;
     };

package/dist/core/httpBridge.js CHANGED Viewed

@@ -32,8 +32,13 @@ function httpBridge(identityConfig, params) {
         }
         res.setHeader("content-type", CONTENT_TYPE);
         stream.on("error", (error) => {
-            logger.error(`Error reading file: ${error.message}`);
-            res.status(500).send("Error reading file!");
+            logger.error(`error reading file: ${error.message}`);
+            if (!res.headersSent) {
+                res.status(500).send("Error reading file!");
+            }
+            else {
+                res.end();
+            }
         });
         stream.on("end", () => {
             res.end();
@@ -61,6 +66,11 @@ function httpBridge(identityConfig, params) {
             streamMap.set(id, stream);
         },
         removeStream: (id) => {
+            logger.verbose(`removing stream with id: ${id}`);
+            const stream = streamMap.get(id);
+            if (stream) {
+                stream.destroy();
+            }
             streamMap.delete(id);
         },
         getStream: (id) => {

package/dist/voice/VoiceClientImpl.d.ts CHANGED Viewed

@@ -52,7 +52,7 @@ declare class VoiceClientImpl implements VoiceClient {
     startSpeechGather(callback: (stream: {
         speech: string;
         responseTime: number;
-    }) => void): Promise<void>;
+    }) => void): void;
     startDtmfGather(sessionRef: string, callback: (event: {
         digit: string;
     }) => void): Promise<void>;

package/dist/voice/VoiceClientImpl.js CHANGED Viewed

@@ -106,7 +106,6 @@ class VoiceClientImpl {
                 }
                 catch (e) {
                     logger.error("authz service error", e);
-                    // TODO: Play a different sound
                     yield ari.channels.answer({ channelId });
                     yield ari.channels.play({ channelId, media: "sound:unavailable" });
                     yield new Promise((resolve) => setTimeout(resolve, 2000));
@@ -186,7 +185,13 @@ class VoiceClientImpl {
     }
     synthesize(text, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            const { ref, stream } = yield this.tts.synthesize(text, options);
+            const { ref, stream } = this.tts.synthesize(text, options);
+            stream.on("error", (error) => __awaiter(this, void 0, void 0, function* () {
+                logger.error(`stream error for ref ${ref}: ${error.message}`, {
+                    errorDetails: error.stack || "No stack trace"
+                });
+                this.filesServer.removeStream(ref);
+            }));
             this.filesServer.addStream(ref, stream);
             return ref;
         });
@@ -203,15 +208,14 @@ class VoiceClientImpl {
         });
     }
     startSpeechGather(callback) {
-        return __awaiter(this, void 0, void 0, function* () {
-            try {
-                const out = this.stt.streamTranscribe(this.transcriptionsStream);
-                out.on("data", callback);
-            }
-            catch (e) {
-                logger.error(e);
-            }
-        });
+        const out = this.stt.streamTranscribe(this.transcriptionsStream);
+        out.on("data", callback);
+        out.on("error", (error) => __awaiter(this, void 0, void 0, function* () {
+            logger.error("speech recognition error", { error });
+            const { sessionRef: channelId } = this.config;
+            const { ari } = this;
+            ari.channels.hangup({ channelId });
+        }));
     }
     startDtmfGather(sessionRef, callback) {
         return __awaiter(this, void 0, void 0, function* () {

package/dist/voice/stt/Deepgram.js CHANGED Viewed

@@ -83,15 +83,34 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
     streamTranscribe(stream) {
         const connection = this.client.listen.live(buildTranscribeConfig(this.engineConfig.config));
         const out = new stream_1.Stream();
+        // Add error handler immediately to catch any connection errors
+        connection.on(LiveTranscriptionEvents.Error, (err) => {
+            logger.error("error on Deepgram connection", { err });
+            // Emit error properly for handling upstream
+            out.emit("error", new Error("Speech recognition service error"));
+            try {
+                connection.destroy();
+            }
+            catch (destroyErr) {
+                logger.error("error destroying connection", { destroyErr });
+            }
+        });
         connection.on(LiveTranscriptionEvents.Open, () => {
             stream.on("data", (chunk) => {
-                connection.send(chunk);
+                try {
+                    connection.send(chunk);
+                }
+                catch (err) {
+                    logger.error("error sending chunk to Deepgram", { err });
+                }
             });
             connection.on(LiveTranscriptionEvents.Transcript, (data) => {
-                if (!data.channel.alternatives[0].transcript || !data.speech_final) {
+                var _a, _b, _c;
+                if (!((_c = (_b = (_a = data.channel) === null || _a === void 0 ? void 0 : _a.alternatives) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.transcript) ||
+                    !data.speech_final) {
                     return;
                 }
-                const words = data.channel.alternatives[0].words;
+                const words = data.channel.alternatives[0].words || [];
                 const responseTime = words.length > 0
                     ? (words.reduce((acc, word) => acc + (word.end - word.start), 0) *
                         1000) /
@@ -106,10 +125,30 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
                     responseTime
                 });
             });
-            connection.on(LiveTranscriptionEvents.Error, (err) => {
-                logger.warn("error on Deepgram connection", { err });
-                connection.destroy();
+        });
+        // Handle stream errors and cleanup
+        stream.on("error", (err) => {
+            logger.warn("error on input stream", { err });
+            // Instead of emitting an error, just end the stream with a message
+            out.emit("data", {
+                speech: "Error with audio input stream",
+                responseTime: 0
             });
+            out.emit("end");
+            try {
+                connection.destroy();
+            }
+            catch (destroyErr) {
+                logger.warn("error destroying connection", { destroyErr });
+            }
+        });
+        stream.on("end", () => {
+            try {
+                connection.destroy();
+            }
+            catch (err) {
+                logger.error("error destroying connection on stream end", { err });
+            }
         });
         return out;
     }
@@ -143,10 +182,20 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
                     });
                 });
                 stream.on("end", () => {
-                    connection.destroy();
+                    try {
+                        connection.destroy();
+                    }
+                    catch (destroyErr) {
+                        logger.error("error destroying connection", { destroyErr });
+                    }
                 });
                 stream.on("error", (err) => {
-                    connection.destroy();
+                    try {
+                        connection.destroy();
+                    }
+                    catch (destroyErr) {
+                        logger.error("error destroying connection", { destroyErr });
+                    }
                     reject(err);
                 });
             });

package/dist/voice/tts/AbstractTextToSpeech.d.ts CHANGED Viewed

@@ -23,13 +23,18 @@ declare abstract class AbstractTextToSpeech<E, S extends SynthOptions = SynthOpt
     abstract readonly engineName: E;
     protected abstract OUTPUT_FORMAT: "wav" | "sln16";
     protected abstract CACHING_FIELDS: string[];
-    abstract synthesize(text: string, options: S): Promise<{
+    abstract synthesize(text: string, options: S): {
         ref: string;
         stream: Readable;
-    }>;
+    };
     static getConfigValidationSchema(): z.Schema;
     static getCredentialsValidationSchema(): z.Schema;
     protected createMediaReference(): string;
     getName(): E;
+    protected logSynthesisRequest(text: string, options: S): void;
+    protected safeSynthesize(ref: string, synthesisFunction: () => Promise<Readable>): Promise<{
+        ref: string;
+        stream: Readable;
+    }>;
 }
 export { AbstractTextToSpeech };

package/dist/voice/tts/AbstractTextToSpeech.js CHANGED Viewed

@@ -1,8 +1,21 @@
 "use strict";
+var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
+    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
+    return new (P || (P = Promise))(function (resolve, reject) {
+        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
+        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
+        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
+        step((generator = generator.apply(thisArg, _arguments || [])).next());
+    });
+};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.AbstractTextToSpeech = void 0;
+const logger_1 = require("@fonoster/logger");
 const uuid_1 = require("uuid");
 const MethodNotImplementedError_1 = require("../errors/MethodNotImplementedError");
+const createErrorStream_1 = require("./utils/createErrorStream");
+const isSsml_1 = require("./utils/isSsml");
+const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
 class AbstractTextToSpeech {
     static getConfigValidationSchema() {
         throw new MethodNotImplementedError_1.MethodNotImplementedError();
@@ -16,5 +29,22 @@ class AbstractTextToSpeech {
     getName() {
         return this.engineName;
     }
+    logSynthesisRequest(text, options) {
+        logger.verbose(`synthesize [input: ${text}, isSsml=${(0, isSsml_1.isSsml)(text)} options: ${JSON.stringify(options)}]`);
+    }
+    safeSynthesize(ref, synthesisFunction) {
+        return __awaiter(this, void 0, void 0, function* () {
+            try {
+                const stream = yield synthesisFunction();
+                return { ref, stream };
+            }
+            catch (error) {
+                return {
+                    ref,
+                    stream: (0, createErrorStream_1.createErrorStream)(`${this.engineName} synthesis failed: ${error.message}`)
+                };
+            }
+        });
+    }
 }
 exports.AbstractTextToSpeech = AbstractTextToSpeech;

package/dist/voice/tts/Azure.d.ts CHANGED Viewed

@@ -19,25 +19,18 @@
 import { Readable } from "stream";
 import * as z from "zod";
 import { AbstractTextToSpeech } from "./AbstractTextToSpeech";
-import { SynthOptions } from "./types";
+import { AzureTTSConfig, SynthOptions } from "./types";
 declare const ENGINE_NAME = "tts.azure";
-type AzureTTSConfig = {
-    [key: string]: Record<string, string>;
-    credentials: {
-        subscriptionKey: string;
-        serviceRegion: string;
-    };
-};
 declare class Azure extends AbstractTextToSpeech<typeof ENGINE_NAME> {
     config: AzureTTSConfig;
     readonly engineName = "tts.azure";
     protected readonly OUTPUT_FORMAT = "sln16";
     protected readonly CACHING_FIELDS: string[];
     constructor(config: AzureTTSConfig);
-    synthesize(text: string, options: SynthOptions): Promise<{
+    synthesize(text: string, options: SynthOptions): {
         ref: string;
         stream: Readable;
-    }>;
+    };
     static getConfigValidationSchema(): z.Schema;
     static getCredentialsValidationSchema(): z.Schema;
 }

package/dist/voice/tts/Azure.js CHANGED Viewed

@@ -43,34 +43,14 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ENGINE_NAME = exports.Azure = void 0;
-/**
- * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
- * http://github.com/fonoster/fonoster
- *
- * This file is part of Fonoster
- *
- * Licensed under the MIT License (the "License");
- * you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    https://opensource.org/licenses/MIT
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-const stream_1 = require("stream");
 const common_1 = require("@fonoster/common");
-const logger_1 = require("@fonoster/logger");
 const sdk = __importStar(require("microsoft-cognitiveservices-speech-sdk"));
 const z = __importStar(require("zod"));
 const AbstractTextToSpeech_1 = require("./AbstractTextToSpeech");
-const isSsml_1 = require("./isSsml");
+const createChunkedSynthesisStream_1 = require("./utils/createChunkedSynthesisStream");
+const isSsml_1 = require("./utils/isSsml");
 const ENGINE_NAME = "tts.azure";
 exports.ENGINE_NAME = ENGINE_NAME;
-const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
 class Azure extends AbstractTextToSpeech_1.AbstractTextToSpeech {
     constructor(config) {
         super();
@@ -80,36 +60,45 @@ class Azure extends AbstractTextToSpeech_1.AbstractTextToSpeech {
         this.config = config;
     }
     synthesize(text, options) {
-        return __awaiter(this, void 0, void 0, function* () {
-            logger.verbose(`synthesize [input: ${text}, isSsml=${(0, isSsml_1.isSsml)(text)} options: ${JSON.stringify(options)}]`);
-            const { subscriptionKey, serviceRegion } = this.config.credentials;
-            const speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
-            speechConfig.speechSynthesisVoiceName = options.voice;
-            speechConfig.speechSynthesisOutputFormat =
-                sdk.SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm;
+        this.logSynthesisRequest(text, options);
+        const ref = this.createMediaReference();
+        const { subscriptionKey, serviceRegion } = this.config.credentials;
+        const voice = options.voice || this.config.config.voice;
+        const speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
+        speechConfig.speechSynthesisVoiceName = voice;
+        speechConfig.speechSynthesisOutputFormat =
+            sdk.SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm;
+        const stream = (0, createChunkedSynthesisStream_1.createChunkedSynthesisStream)(text, (chunkText) => __awaiter(this, void 0, void 0, function* () {
             const synthesizer = new sdk.SpeechSynthesizer(speechConfig);
-            const isSSML = (0, isSsml_1.isSsml)(text);
+            const isSSML = (0, isSsml_1.isSsml)(chunkText);
             const func = isSSML ? "speakSsmlAsync" : "speakTextAsync";
-            const audioData = yield new Promise((resolve, reject) => {
-                const audioChunks = [];
-                synthesizer[func](text, (result) => {
-                    if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
-                        audioChunks.push(Buffer.from(result.audioData));
-                        resolve(Buffer.concat(audioChunks));
-                    }
-                    else {
-                        reject(new Error("Speech synthesis canceled: " + result.errorDetails));
-                    }
-                    synthesizer.close();
-                }, (err) => {
-                    synthesizer.close();
-                    reject(new Error(err));
+            try {
+                const audioData = yield new Promise((resolve, reject) => {
+                    const audioChunks = [];
+                    synthesizer[func](chunkText, (result) => {
+                        if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
+                            audioChunks.push(Buffer.from(result.audioData));
+                            resolve(Buffer.concat(audioChunks));
+                        }
+                        else {
+                            reject(new Error("Speech synthesis canceled: " + result.errorDetails));
+                        }
+                        synthesizer.close();
+                    }, (err) => {
+                        synthesizer.close();
+                        reject(new Error(err));
+                    });
                 });
-            });
-            const ref = this.createMediaReference();
-            const stream = stream_1.Readable.from(audioData);
-            return { ref, stream };
-        });
+                // Ignore the first 44 bytes of the response to avoid the WAV header
+                return audioData.subarray(44);
+            }
+            catch (error) {
+                // Make sure synthesizer is closed in case of error
+                synthesizer.close();
+                throw error;
+            }
+        }));
+        return { ref, stream };
     }
     static getConfigValidationSchema() {
         return z.object({

package/dist/voice/tts/Deepgram.d.ts CHANGED Viewed

@@ -20,14 +20,8 @@ import { Readable } from "stream";
 import { DeepgramClient } from "@deepgram/sdk";
 import * as z from "zod";
 import { AbstractTextToSpeech } from "./AbstractTextToSpeech";
-import { SynthOptions } from "./types";
+import { DeepgramTtsConfig, SynthOptions } from "./types";
 declare const ENGINE_NAME = "tts.deepgram";
-type DeepgramTtsConfig = {
-    [key: string]: Record<string, string>;
-    credentials: {
-        apiKey: string;
-    };
-};
 declare class Deepgram extends AbstractTextToSpeech<typeof ENGINE_NAME> {
     client: DeepgramClient;
     engineConfig: DeepgramTtsConfig;
@@ -37,11 +31,10 @@ declare class Deepgram extends AbstractTextToSpeech<typeof ENGINE_NAME> {
     protected readonly AUDIO_ENCODING: "linear16";
     protected readonly SAMPLE_RATE_HERTZ = 16000;
     constructor(config: DeepgramTtsConfig);
-    synthesize(text: string, options: SynthOptions): Promise<{
+    synthesize(text: string, options: SynthOptions): {
         ref: string;
         stream: Readable;
-    }>;
-    private doSynthesize;
+    };
     static getConfigValidationSchema(): z.Schema;
     static getCredentialsValidationSchema(): z.Schema;
 }

package/dist/voice/tts/Deepgram.js CHANGED Viewed

@@ -43,36 +43,14 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ENGINE_NAME = exports.Deepgram = void 0;
-/**
- * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
- * http://github.com/fonoster/fonoster
- *
- * This file is part of Fonoster
- *
- * Licensed under the MIT License (the "License");
- * you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    https://opensource.org/licenses/MIT
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-const stream_1 = require("stream");
 const sdk_1 = require("@deepgram/sdk");
 const common_1 = require("@fonoster/common");
-const logger_1 = require("@fonoster/logger");
 const z = __importStar(require("zod"));
-const textChunksByFirstNaturalPause_1 = require("../handlers/utils/textChunksByFirstNaturalPause");
 const AbstractTextToSpeech_1 = require("./AbstractTextToSpeech");
-const isSsml_1 = require("./isSsml");
-const streamToBuffer_1 = require("./streamToBuffer");
+const createChunkedSynthesisStream_1 = require("./utils/createChunkedSynthesisStream");
+const streamToBuffer_1 = require("./utils/streamToBuffer");
 const ENGINE_NAME = "tts.deepgram";
 exports.ENGINE_NAME = ENGINE_NAME;
-const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
 class Deepgram extends AbstractTextToSpeech_1.AbstractTextToSpeech {
     constructor(config) {
         super();
@@ -85,51 +63,20 @@ class Deepgram extends AbstractTextToSpeech_1.AbstractTextToSpeech {
         this.engineConfig = config;
     }
     synthesize(text, options) {
-        return __awaiter(this, void 0, void 0, function* () {
-            logger.verbose(`synthesize [input: ${text}, isSsml=${(0, isSsml_1.isSsml)(text)} options: ${JSON.stringify(options)}]`);
-            const { voice } = this.engineConfig.config;
-            const ref = this.createMediaReference();
-            const chunks = (0, textChunksByFirstNaturalPause_1.textChunksByFirstNaturalPause)(text);
-            const stream = new stream_1.Readable({ read() { } });
-            const results = new Array(chunks.length);
-            let nextIndexToPush = 0;
-            function observeQueue() {
-                if (nextIndexToPush < results.length &&
-                    results[nextIndexToPush] !== undefined) {
-                    stream.push(results[nextIndexToPush]);
-                    nextIndexToPush++;
-                    setImmediate(observeQueue);
-                }
-                else if (nextIndexToPush < results.length) {
-                    setTimeout(observeQueue, 10);
-                }
-                else {
-                    stream.push(null);
-                }
-            }
-            observeQueue();
-            chunks.forEach((text, index) => {
-                this.doSynthesize(text, voice)
-                    .then((synthesizedText) => {
-                    results[index] = synthesizedText;
-                })
-                    .catch((error) => {
-                    stream.emit("error", error);
-                });
-            });
-            return { ref, stream };
-        });
-    }
-    doSynthesize(text, voice) {
-        return __awaiter(this, void 0, void 0, function* () {
-            const response = yield this.client.speak.request({ text }, {
-                model: voice || common_1.DeepgramVoice.AURA_ASTERIA_EN,
+        this.logSynthesisRequest(text, options);
+        const { voice } = this.engineConfig.config;
+        const ref = this.createMediaReference();
+        const selectedVoice = voice || common_1.DeepgramVoice.AURA_ASTERIA_EN;
+        const stream = (0, createChunkedSynthesisStream_1.createChunkedSynthesisStream)(text, (chunkText) => __awaiter(this, void 0, void 0, function* () {
+            const response = yield this.client.speak.request({ text: chunkText }, {
+                model: selectedVoice,
                 encoding: this.AUDIO_ENCODING,
                 sample_rate: this.SAMPLE_RATE_HERTZ,
                 container: "none"
             });
             return (yield (0, streamToBuffer_1.streamToBuffer)(yield response.getStream()));
-        });
+        }));
+        return { ref, stream };
     }
     static getConfigValidationSchema() {
         return z.object({

package/dist/voice/tts/ElevenLabs.d.ts CHANGED Viewed

@@ -20,14 +20,8 @@ import { Readable } from "stream";
 import { ElevenLabsClient } from "elevenlabs";
 import * as z from "zod";
 import { AbstractTextToSpeech } from "./AbstractTextToSpeech";
-import { SynthOptions } from "./types";
+import { ElevenLabsTtsConfig, SynthOptions } from "./types";
 declare const ENGINE_NAME = "tts.elevenlabs";
-type ElevenLabsTtsConfig = {
-    [key: string]: Record<string, string>;
-    credentials: {
-        apiKey: string;
-    };
-};
 declare class ElevenLabs extends AbstractTextToSpeech<typeof ENGINE_NAME> {
     client: ElevenLabsClient;
     engineConfig: ElevenLabsTtsConfig;
@@ -35,11 +29,10 @@ declare class ElevenLabs extends AbstractTextToSpeech<typeof ENGINE_NAME> {
     protected readonly OUTPUT_FORMAT = "sln16";
     protected readonly CACHING_FIELDS: string[];
     constructor(config: ElevenLabsTtsConfig);
-    synthesize(text: string, options: SynthOptions): Promise<{
+    synthesize(text: string, options: SynthOptions): {
         ref: string;
         stream: Readable;
-    }>;
-    private doSynthesize;
+    };
     static getConfigValidationSchema(): z.Schema;
     static getCredentialsValidationSchema(): z.Schema;
 }

package/dist/voice/tts/ElevenLabs.js CHANGED Viewed

@@ -43,35 +43,13 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.ElevenLabs = exports.ENGINE_NAME = void 0;
-/**
- * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
- * http://github.com/fonoster/fonoster
- *
- * This file is part of Fonoster
- *
- * Licensed under the MIT License (the "License");
- * you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    https://opensource.org/licenses/MIT
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-const stream_1 = require("stream");
-const logger_1 = require("@fonoster/logger");
 const elevenlabs_1 = require("elevenlabs");
 const z = __importStar(require("zod"));
-const textChunksByFirstNaturalPause_1 = require("../handlers/utils/textChunksByFirstNaturalPause"); // Assuming this is the chunking function
 const AbstractTextToSpeech_1 = require("./AbstractTextToSpeech");
-const isSsml_1 = require("./isSsml");
-const streamToBuffer_1 = require("./streamToBuffer");
+const createChunkedSynthesisStream_1 = require("./utils/createChunkedSynthesisStream");
+const streamToBuffer_1 = require("./utils/streamToBuffer");
 const ENGINE_NAME = "tts.elevenlabs";
 exports.ENGINE_NAME = ENGINE_NAME;
-const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
 class ElevenLabs extends AbstractTextToSpeech_1.AbstractTextToSpeech {
     constructor(config) {
         super();
@@ -82,55 +60,24 @@ class ElevenLabs extends AbstractTextToSpeech_1.AbstractTextToSpeech {
         this.engineConfig = config;
     }
     synthesize(text, options) {
-        return __awaiter(this, void 0, void 0, function* () {
-            logger.verbose(`synthesize [input: ${text}, isSsml=${(0, isSsml_1.isSsml)(text)} options: ${JSON.stringify(options)}]`);
-            const { voice, model } = this.engineConfig.config;
-            const ref = this.createMediaReference();
-            const chunks = (0, textChunksByFirstNaturalPause_1.textChunksByFirstNaturalPause)(text);
-            const stream = new stream_1.Readable({ read() { } });
-            const results = new Array(chunks.length);
-            let nextIndexToPush = 0;
-            function observeQueue() {
-                if (nextIndexToPush < results.length &&
-                    results[nextIndexToPush] !== undefined) {
-                    stream.push(results[nextIndexToPush]);
-                    nextIndexToPush++;
-                    setImmediate(observeQueue);
-                }
-                else if (nextIndexToPush < results.length) {
-                    setTimeout(observeQueue, 10);
-                }
-                else {
-                    stream.push(null);
-                }
-            }
-            observeQueue();
-            chunks.forEach((text, index) => {
-                this.doSynthesize({ text, voice, model })
-                    .then((synthesizedText) => {
-                    results[index] = synthesizedText;
-                })
-                    .catch((error) => {
-                    stream.emit("error", error);
-                });
-            });
-            return { ref, stream };
-        });
-    }
-    doSynthesize(params) {
-        return __awaiter(this, void 0, void 0, function* () {
-            const { text, voice, model } = params;
+        this.logSynthesisRequest(text, options);
+        const { voice, model } = this.engineConfig.config;
+        const ref = this.createMediaReference();
+        const stream = (0, createChunkedSynthesisStream_1.createChunkedSynthesisStream)(text, (chunkText) => __awaiter(this, void 0, void 0, function* () {
             const response = yield this.client.generate({
                 stream: true,
                 voice,
-                text,
+                text: chunkText,
                 model_id: model !== null && model !== void 0 ? model : "eleven_flash_v2_5",
                 output_format: "pcm_16000",
                 // TODO: Make this configurable
                 optimize_streaming_latency: 2
+            }, {
+                maxRetries: 3
             });
             return (yield (0, streamToBuffer_1.streamToBuffer)(response));
-        });
+        }));
+        return { ref, stream };
     }
     static getConfigValidationSchema() {
         return z.object({});

package/dist/voice/tts/Google.d.ts CHANGED Viewed

@@ -20,15 +20,8 @@ import { Readable } from "stream";
 import { TextToSpeechClient } from "@google-cloud/text-to-speech";
 import * as z from "zod";
 import { AbstractTextToSpeech } from "./AbstractTextToSpeech";
-import { SynthOptions } from "./types";
+import { GoogleTtsConfig, SynthOptions } from "./types";
 declare const ENGINE_NAME = "tts.google";
-type GoogleTtsConfig = {
-    [key: string]: Record<string, string>;
-    credentials: {
-        client_email: string;
-        private_key: string;
-    };
-};
 declare class Google extends AbstractTextToSpeech<typeof ENGINE_NAME> {
     client: TextToSpeechClient;
     engineConfig: GoogleTtsConfig;
@@ -38,10 +31,10 @@ declare class Google extends AbstractTextToSpeech<typeof ENGINE_NAME> {
     protected readonly AUDIO_ENCODING: "LINEAR16";
     protected readonly SAMPLE_RATE_HERTZ = 16000;
     constructor(config: GoogleTtsConfig);
-    synthesize(text: string, options: SynthOptions): Promise<{
+    synthesize(text: string, options: SynthOptions): {
         ref: string;
         stream: Readable;
-    }>;
+    };
     static getConfigValidationSchema(): z.Schema;
     static getCredentialsValidationSchema(): z.Schema;
 }

package/dist/voice/tts/Google.js CHANGED Viewed

@@ -43,34 +43,14 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
 };
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.Google = exports.ENGINE_NAME = void 0;
-/**
- * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
- * http://github.com/fonoster/fonoster
- *
- * This file is part of Fonoster
- *
- * Licensed under the MIT License (the "License");
- * you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    https://opensource.org/licenses/MIT
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-const stream_1 = require("stream");
 const common_1 = require("@fonoster/common");
-const logger_1 = require("@fonoster/logger");
 const text_to_speech_1 = require("@google-cloud/text-to-speech");
 const z = __importStar(require("zod"));
 const AbstractTextToSpeech_1 = require("./AbstractTextToSpeech");
-const isSsml_1 = require("./isSsml");
+const createChunkedSynthesisStream_1 = require("./utils/createChunkedSynthesisStream");
+const isSsml_1 = require("./utils/isSsml");
 const ENGINE_NAME = "tts.google";
 exports.ENGINE_NAME = ENGINE_NAME;
-const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
 class Google extends AbstractTextToSpeech_1.AbstractTextToSpeech {
     constructor(config) {
         super();
@@ -83,12 +63,13 @@ class Google extends AbstractTextToSpeech_1.AbstractTextToSpeech {
         this.engineConfig = config;
     }
     synthesize(text, options) {
-        return __awaiter(this, void 0, void 0, function* () {
-            logger.verbose(`synthesize [input: ${text}, isSsml=${(0, isSsml_1.isSsml)(text)} options: ${JSON.stringify(options)}]`);
-            const { voice } = this.engineConfig.config;
-            const lang = `${voice.split("-")[0]}-${voice.split("-")[1]}`;
+        this.logSynthesisRequest(text, options);
+        const ref = this.createMediaReference();
+        const { voice } = this.engineConfig.config;
+        const lang = `${voice.split("-")[0]}-${voice.split("-")[1]}`;
+        const stream = (0, createChunkedSynthesisStream_1.createChunkedSynthesisStream)(text, (chunkText) => __awaiter(this, void 0, void 0, function* () {
             const request = {
-                input: (0, isSsml_1.isSsml)(text) ? { ssml: text } : { text },
+                input: (0, isSsml_1.isSsml)(chunkText) ? { ssml: chunkText } : { text: chunkText },
                 audioConfig: {
                     audioEncoding: this.AUDIO_ENCODING,
                     sampleRateHertz: this.SAMPLE_RATE_HERTZ
@@ -99,9 +80,11 @@ class Google extends AbstractTextToSpeech_1.AbstractTextToSpeech {
                 }
             };
             const [response] = yield this.client.synthesizeSpeech(request);
-            const ref = this.createMediaReference();
-            return { ref, stream: stream_1.Readable.from(response.audioContent) };
-        });
+            const audioContent = response.audioContent;
+            // Ignore the first 44 bytes of the response to avoid the WAV header
+            return audioContent.subarray(44);
+        }));
+        return { ref, stream };
     }
     static getConfigValidationSchema() {
         return z.object({

package/dist/voice/tts/types.d.ts CHANGED Viewed

@@ -19,4 +19,30 @@
 type SynthOptions = {
     voice: string;
 };
-export { SynthOptions };
+type DeepgramTtsConfig = {
+    [key: string]: Record<string, string>;
+    credentials: {
+        apiKey: string;
+    };
+};
+type ElevenLabsTtsConfig = {
+    [key: string]: Record<string, string>;
+    credentials: {
+        apiKey: string;
+    };
+};
+type GoogleTtsConfig = {
+    [key: string]: Record<string, string>;
+    credentials: {
+        client_email: string;
+        private_key: string;
+    };
+};
+type AzureTTSConfig = {
+    [key: string]: Record<string, string>;
+    credentials: {
+        subscriptionKey: string;
+        serviceRegion: string;
+    };
+};
+export { SynthOptions, AzureTTSConfig, DeepgramTtsConfig, ElevenLabsTtsConfig, GoogleTtsConfig };

package/dist/voice/tts/utils/createChunkedSynthesisStream.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { Readable } from "stream";
+/**
+ * Creates a readable stream that processes text in chunks for better streaming performance.
+ * This utility ensures that chunks are processed in parallel but streamed in the correct order.
+ *
+ * @param text - The text to be synthesized
+ * @param synthesizeChunk - Function that processes each chunk and returns a Buffer or Readable
+ * @returns A readable stream containing the synthesized audio
+ */
+export declare function createChunkedSynthesisStream(text: string, synthesizeChunk: (text: string, index: number) => Promise<Buffer | Readable>): Readable;

package/dist/voice/tts/utils/createChunkedSynthesisStream.js ADDED Viewed

@@ -0,0 +1,78 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.createChunkedSynthesisStream = createChunkedSynthesisStream;
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const stream_1 = require("stream");
+const logger_1 = require("@fonoster/logger");
+const textChunksByFirstNaturalPause_1 = require("../../handlers/utils/textChunksByFirstNaturalPause");
+const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
+/**
+ * Creates a readable stream that processes text in chunks for better streaming performance.
+ * This utility ensures that chunks are processed in parallel but streamed in the correct order.
+ *
+ * @param text - The text to be synthesized
+ * @param synthesizeChunk - Function that processes each chunk and returns a Buffer or Readable
+ * @returns A readable stream containing the synthesized audio
+ */
+function createChunkedSynthesisStream(text, synthesizeChunk) {
+    const chunks = (0, textChunksByFirstNaturalPause_1.textChunksByFirstNaturalPause)(text);
+    const stream = new stream_1.Readable({ read() { } });
+    if (chunks.length === 0) {
+        logger.verbose("no text chunks to synthesize, returning empty stream");
+        stream.push(null);
+        return stream;
+    }
+    logger.verbose(`processing ${chunks.length} text chunks for synthesis`);
+    const results = new Array(chunks.length);
+    let nextIndexToPush = 0;
+    let hasError = false;
+    function observeQueue() {
+        if (nextIndexToPush < results.length &&
+            results[nextIndexToPush] !== undefined) {
+            stream.push(results[nextIndexToPush]);
+            nextIndexToPush++;
+            setImmediate(observeQueue);
+        }
+        else if (nextIndexToPush < results.length) {
+            setTimeout(observeQueue, 10);
+        }
+        else {
+            stream.push(null);
+        }
+    }
+    observeQueue();
+    chunks.forEach((chunkText, index) => {
+        synthesizeChunk(chunkText, index)
+            .then((synthesizedText) => {
+            if (!hasError) {
+                results[index] = synthesizedText;
+            }
+        })
+            .catch((error) => {
+            if (!hasError) {
+                hasError = true;
+                logger.error(`chunk synthesis failed: ${error.message}`);
+                stream.emit("error", new Error(`Synthesis failed: ${error.message}`));
+                stream.push(null);
+            }
+        });
+    });
+    return stream;
+}

package/dist/voice/tts/utils/createErrorStream.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { Readable } from "stream";
+declare function createErrorStream(errorMessage: string): Readable;
+export { createErrorStream };

package/dist/voice/tts/utils/createErrorStream.js ADDED Viewed

@@ -0,0 +1,28 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.createErrorStream = createErrorStream;
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const stream_1 = require("stream");
+function createErrorStream(errorMessage) {
+    const errorStream = new stream_1.Readable({ read() { } });
+    errorStream.emit("error", new Error(errorMessage));
+    errorStream.push(null);
+    return errorStream;
+}

package/dist/voice/types/voice.d.ts CHANGED Viewed

@@ -48,10 +48,10 @@ type VoiceClient = {
     getTranscriptionsStream: () => Stream;
 };
 type TextToSpeech = {
-    synthesize: (text: string, options: Record<string, unknown>) => Promise<{
+    synthesize: (text: string, options: Record<string, unknown>) => {
         ref: string;
         stream: Readable;
-    }>;
+    };
 };
 type SpeechToText = {
     transcribe: (stream: Stream) => Promise<SpeechResult>;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fonoster/apiserver",
-  "version": "0.9.35",
+  "version": "0.9.39",
   "description": "APIServer for Fonoster",
   "author": "Pedro Sanders <psanders@fonoster.com>",
   "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -73,5 +73,5 @@
     "@types/uuid": "^10.0.0",
     "@types/validator": "^13.12.0"
   },
-  "gitHead": "051baac1de8595a8d657022ee588d8cbb59f32ef"
+  "gitHead": "ff9bc63efb101ceb4c6c4e08b7b11f720336454c"
 }

/package/dist/voice/tts/{isSsml.d.ts → utils/isSsml.d.ts} RENAMED Viewed

File without changes

/package/dist/voice/tts/{isSsml.js → utils/isSsml.js} RENAMED Viewed

File without changes

/package/dist/voice/tts/{streamToBuffer.d.ts → utils/streamToBuffer.d.ts} RENAMED Viewed

File without changes

/package/dist/voice/tts/{streamToBuffer.js → utils/streamToBuffer.js} RENAMED Viewed

File without changes