@fonoster/apiserver 0.9.35 → 0.9.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/applications/createGetFnUtil.d.ts +3 -3
- package/dist/core/httpBridge.js +12 -2
- package/dist/voice/VoiceClientImpl.d.ts +1 -1
- package/dist/voice/VoiceClientImpl.js +15 -11
- package/dist/voice/stt/Deepgram.js +57 -8
- package/dist/voice/tts/AbstractTextToSpeech.d.ts +7 -2
- package/dist/voice/tts/AbstractTextToSpeech.js +30 -0
- package/dist/voice/tts/Azure.d.ts +3 -10
- package/dist/voice/tts/Azure.js +38 -49
- package/dist/voice/tts/Deepgram.d.ts +3 -10
- package/dist/voice/tts/Deepgram.js +11 -64
- package/dist/voice/tts/ElevenLabs.d.ts +3 -10
- package/dist/voice/tts/ElevenLabs.js +11 -64
- package/dist/voice/tts/Google.d.ts +3 -10
- package/dist/voice/tts/Google.js +13 -30
- package/dist/voice/tts/types.d.ts +27 -1
- package/dist/voice/tts/utils/createChunkedSynthesisStream.d.ts +28 -0
- package/dist/voice/tts/utils/createChunkedSynthesisStream.js +78 -0
- package/dist/voice/tts/utils/createErrorStream.d.ts +21 -0
- package/dist/voice/tts/utils/createErrorStream.js +28 -0
- package/dist/voice/types/voice.d.ts +2 -2
- package/package.json +2 -2
- /package/dist/voice/tts/{isSsml.d.ts → utils/isSsml.d.ts} +0 -0
- /package/dist/voice/tts/{isSsml.js → utils/isSsml.js} +0 -0
- /package/dist/voice/tts/{streamToBuffer.d.ts → utils/streamToBuffer.d.ts} +0 -0
- /package/dist/voice/tts/{streamToBuffer.js → utils/streamToBuffer.js} +0 -0
|
@@ -5,22 +5,22 @@ declare function createGetFnUtil(prisma: Prisma): (ref: string) => Promise<{
|
|
|
5
5
|
};
|
|
6
6
|
textToSpeech: {
|
|
7
7
|
ref: string;
|
|
8
|
-
config: import("@prisma/client/runtime/library").JsonValue;
|
|
9
8
|
credentials: string | null;
|
|
9
|
+
config: import("@prisma/client/runtime/library").JsonValue;
|
|
10
10
|
productRef: string;
|
|
11
11
|
applicationRef: string;
|
|
12
12
|
};
|
|
13
13
|
speechToText: {
|
|
14
14
|
ref: string;
|
|
15
|
-
config: import("@prisma/client/runtime/library").JsonValue;
|
|
16
15
|
credentials: string | null;
|
|
16
|
+
config: import("@prisma/client/runtime/library").JsonValue;
|
|
17
17
|
productRef: string;
|
|
18
18
|
applicationRef: string;
|
|
19
19
|
};
|
|
20
20
|
intelligence: {
|
|
21
21
|
ref: string;
|
|
22
|
-
config: import("@prisma/client/runtime/library").JsonValue;
|
|
23
22
|
credentials: string | null;
|
|
23
|
+
config: import("@prisma/client/runtime/library").JsonValue;
|
|
24
24
|
productRef: string;
|
|
25
25
|
applicationRef: string;
|
|
26
26
|
};
|
package/dist/core/httpBridge.js
CHANGED
|
@@ -32,8 +32,13 @@ function httpBridge(identityConfig, params) {
|
|
|
32
32
|
}
|
|
33
33
|
res.setHeader("content-type", CONTENT_TYPE);
|
|
34
34
|
stream.on("error", (error) => {
|
|
35
|
-
logger.error(`
|
|
36
|
-
res.
|
|
35
|
+
logger.error(`error reading file: ${error.message}`);
|
|
36
|
+
if (!res.headersSent) {
|
|
37
|
+
res.status(500).send("Error reading file!");
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
res.end();
|
|
41
|
+
}
|
|
37
42
|
});
|
|
38
43
|
stream.on("end", () => {
|
|
39
44
|
res.end();
|
|
@@ -61,6 +66,11 @@ function httpBridge(identityConfig, params) {
|
|
|
61
66
|
streamMap.set(id, stream);
|
|
62
67
|
},
|
|
63
68
|
removeStream: (id) => {
|
|
69
|
+
logger.verbose(`removing stream with id: ${id}`);
|
|
70
|
+
const stream = streamMap.get(id);
|
|
71
|
+
if (stream) {
|
|
72
|
+
stream.destroy();
|
|
73
|
+
}
|
|
64
74
|
streamMap.delete(id);
|
|
65
75
|
},
|
|
66
76
|
getStream: (id) => {
|
|
@@ -52,7 +52,7 @@ declare class VoiceClientImpl implements VoiceClient {
|
|
|
52
52
|
startSpeechGather(callback: (stream: {
|
|
53
53
|
speech: string;
|
|
54
54
|
responseTime: number;
|
|
55
|
-
}) => void):
|
|
55
|
+
}) => void): void;
|
|
56
56
|
startDtmfGather(sessionRef: string, callback: (event: {
|
|
57
57
|
digit: string;
|
|
58
58
|
}) => void): Promise<void>;
|
|
@@ -106,7 +106,6 @@ class VoiceClientImpl {
|
|
|
106
106
|
}
|
|
107
107
|
catch (e) {
|
|
108
108
|
logger.error("authz service error", e);
|
|
109
|
-
// TODO: Play a different sound
|
|
110
109
|
yield ari.channels.answer({ channelId });
|
|
111
110
|
yield ari.channels.play({ channelId, media: "sound:unavailable" });
|
|
112
111
|
yield new Promise((resolve) => setTimeout(resolve, 2000));
|
|
@@ -186,7 +185,13 @@ class VoiceClientImpl {
|
|
|
186
185
|
}
|
|
187
186
|
synthesize(text, options) {
|
|
188
187
|
return __awaiter(this, void 0, void 0, function* () {
|
|
189
|
-
const { ref, stream } =
|
|
188
|
+
const { ref, stream } = this.tts.synthesize(text, options);
|
|
189
|
+
stream.on("error", (error) => __awaiter(this, void 0, void 0, function* () {
|
|
190
|
+
logger.error(`stream error for ref ${ref}: ${error.message}`, {
|
|
191
|
+
errorDetails: error.stack || "No stack trace"
|
|
192
|
+
});
|
|
193
|
+
this.filesServer.removeStream(ref);
|
|
194
|
+
}));
|
|
190
195
|
this.filesServer.addStream(ref, stream);
|
|
191
196
|
return ref;
|
|
192
197
|
});
|
|
@@ -203,15 +208,14 @@ class VoiceClientImpl {
|
|
|
203
208
|
});
|
|
204
209
|
}
|
|
205
210
|
startSpeechGather(callback) {
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
});
|
|
211
|
+
const out = this.stt.streamTranscribe(this.transcriptionsStream);
|
|
212
|
+
out.on("data", callback);
|
|
213
|
+
out.on("error", (error) => __awaiter(this, void 0, void 0, function* () {
|
|
214
|
+
logger.error("speech recognition error", { error });
|
|
215
|
+
const { sessionRef: channelId } = this.config;
|
|
216
|
+
const { ari } = this;
|
|
217
|
+
ari.channels.hangup({ channelId });
|
|
218
|
+
}));
|
|
215
219
|
}
|
|
216
220
|
startDtmfGather(sessionRef, callback) {
|
|
217
221
|
return __awaiter(this, void 0, void 0, function* () {
|
|
@@ -83,15 +83,34 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
83
83
|
streamTranscribe(stream) {
|
|
84
84
|
const connection = this.client.listen.live(buildTranscribeConfig(this.engineConfig.config));
|
|
85
85
|
const out = new stream_1.Stream();
|
|
86
|
+
// Add error handler immediately to catch any connection errors
|
|
87
|
+
connection.on(LiveTranscriptionEvents.Error, (err) => {
|
|
88
|
+
logger.error("error on Deepgram connection", { err });
|
|
89
|
+
// Emit error properly for handling upstream
|
|
90
|
+
out.emit("error", new Error("Speech recognition service error"));
|
|
91
|
+
try {
|
|
92
|
+
connection.destroy();
|
|
93
|
+
}
|
|
94
|
+
catch (destroyErr) {
|
|
95
|
+
logger.error("error destroying connection", { destroyErr });
|
|
96
|
+
}
|
|
97
|
+
});
|
|
86
98
|
connection.on(LiveTranscriptionEvents.Open, () => {
|
|
87
99
|
stream.on("data", (chunk) => {
|
|
88
|
-
|
|
100
|
+
try {
|
|
101
|
+
connection.send(chunk);
|
|
102
|
+
}
|
|
103
|
+
catch (err) {
|
|
104
|
+
logger.error("error sending chunk to Deepgram", { err });
|
|
105
|
+
}
|
|
89
106
|
});
|
|
90
107
|
connection.on(LiveTranscriptionEvents.Transcript, (data) => {
|
|
91
|
-
|
|
108
|
+
var _a, _b, _c;
|
|
109
|
+
if (!((_c = (_b = (_a = data.channel) === null || _a === void 0 ? void 0 : _a.alternatives) === null || _b === void 0 ? void 0 : _b[0]) === null || _c === void 0 ? void 0 : _c.transcript) ||
|
|
110
|
+
!data.speech_final) {
|
|
92
111
|
return;
|
|
93
112
|
}
|
|
94
|
-
const words = data.channel.alternatives[0].words;
|
|
113
|
+
const words = data.channel.alternatives[0].words || [];
|
|
95
114
|
const responseTime = words.length > 0
|
|
96
115
|
? (words.reduce((acc, word) => acc + (word.end - word.start), 0) *
|
|
97
116
|
1000) /
|
|
@@ -106,10 +125,30 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
106
125
|
responseTime
|
|
107
126
|
});
|
|
108
127
|
});
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
128
|
+
});
|
|
129
|
+
// Handle stream errors and cleanup
|
|
130
|
+
stream.on("error", (err) => {
|
|
131
|
+
logger.warn("error on input stream", { err });
|
|
132
|
+
// Instead of emitting an error, just end the stream with a message
|
|
133
|
+
out.emit("data", {
|
|
134
|
+
speech: "Error with audio input stream",
|
|
135
|
+
responseTime: 0
|
|
112
136
|
});
|
|
137
|
+
out.emit("end");
|
|
138
|
+
try {
|
|
139
|
+
connection.destroy();
|
|
140
|
+
}
|
|
141
|
+
catch (destroyErr) {
|
|
142
|
+
logger.warn("error destroying connection", { destroyErr });
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
stream.on("end", () => {
|
|
146
|
+
try {
|
|
147
|
+
connection.destroy();
|
|
148
|
+
}
|
|
149
|
+
catch (err) {
|
|
150
|
+
logger.error("error destroying connection on stream end", { err });
|
|
151
|
+
}
|
|
113
152
|
});
|
|
114
153
|
return out;
|
|
115
154
|
}
|
|
@@ -143,10 +182,20 @@ class Deepgram extends AbstractSpeechToText_1.AbstractSpeechToText {
|
|
|
143
182
|
});
|
|
144
183
|
});
|
|
145
184
|
stream.on("end", () => {
|
|
146
|
-
|
|
185
|
+
try {
|
|
186
|
+
connection.destroy();
|
|
187
|
+
}
|
|
188
|
+
catch (destroyErr) {
|
|
189
|
+
logger.error("error destroying connection", { destroyErr });
|
|
190
|
+
}
|
|
147
191
|
});
|
|
148
192
|
stream.on("error", (err) => {
|
|
149
|
-
|
|
193
|
+
try {
|
|
194
|
+
connection.destroy();
|
|
195
|
+
}
|
|
196
|
+
catch (destroyErr) {
|
|
197
|
+
logger.error("error destroying connection", { destroyErr });
|
|
198
|
+
}
|
|
150
199
|
reject(err);
|
|
151
200
|
});
|
|
152
201
|
});
|
|
@@ -23,13 +23,18 @@ declare abstract class AbstractTextToSpeech<E, S extends SynthOptions = SynthOpt
|
|
|
23
23
|
abstract readonly engineName: E;
|
|
24
24
|
protected abstract OUTPUT_FORMAT: "wav" | "sln16";
|
|
25
25
|
protected abstract CACHING_FIELDS: string[];
|
|
26
|
-
abstract synthesize(text: string, options: S):
|
|
26
|
+
abstract synthesize(text: string, options: S): {
|
|
27
27
|
ref: string;
|
|
28
28
|
stream: Readable;
|
|
29
|
-
}
|
|
29
|
+
};
|
|
30
30
|
static getConfigValidationSchema(): z.Schema;
|
|
31
31
|
static getCredentialsValidationSchema(): z.Schema;
|
|
32
32
|
protected createMediaReference(): string;
|
|
33
33
|
getName(): E;
|
|
34
|
+
protected logSynthesisRequest(text: string, options: S): void;
|
|
35
|
+
protected safeSynthesize(ref: string, synthesisFunction: () => Promise<Readable>): Promise<{
|
|
36
|
+
ref: string;
|
|
37
|
+
stream: Readable;
|
|
38
|
+
}>;
|
|
34
39
|
}
|
|
35
40
|
export { AbstractTextToSpeech };
|
|
@@ -1,8 +1,21 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
2
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
12
|
exports.AbstractTextToSpeech = void 0;
|
|
13
|
+
const logger_1 = require("@fonoster/logger");
|
|
4
14
|
const uuid_1 = require("uuid");
|
|
5
15
|
const MethodNotImplementedError_1 = require("../errors/MethodNotImplementedError");
|
|
16
|
+
const createErrorStream_1 = require("./utils/createErrorStream");
|
|
17
|
+
const isSsml_1 = require("./utils/isSsml");
|
|
18
|
+
const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
|
|
6
19
|
class AbstractTextToSpeech {
|
|
7
20
|
static getConfigValidationSchema() {
|
|
8
21
|
throw new MethodNotImplementedError_1.MethodNotImplementedError();
|
|
@@ -16,5 +29,22 @@ class AbstractTextToSpeech {
|
|
|
16
29
|
getName() {
|
|
17
30
|
return this.engineName;
|
|
18
31
|
}
|
|
32
|
+
logSynthesisRequest(text, options) {
|
|
33
|
+
logger.verbose(`synthesize [input: ${text}, isSsml=${(0, isSsml_1.isSsml)(text)} options: ${JSON.stringify(options)}]`);
|
|
34
|
+
}
|
|
35
|
+
safeSynthesize(ref, synthesisFunction) {
|
|
36
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
37
|
+
try {
|
|
38
|
+
const stream = yield synthesisFunction();
|
|
39
|
+
return { ref, stream };
|
|
40
|
+
}
|
|
41
|
+
catch (error) {
|
|
42
|
+
return {
|
|
43
|
+
ref,
|
|
44
|
+
stream: (0, createErrorStream_1.createErrorStream)(`${this.engineName} synthesis failed: ${error.message}`)
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
}
|
|
19
49
|
}
|
|
20
50
|
exports.AbstractTextToSpeech = AbstractTextToSpeech;
|
|
@@ -19,25 +19,18 @@
|
|
|
19
19
|
import { Readable } from "stream";
|
|
20
20
|
import * as z from "zod";
|
|
21
21
|
import { AbstractTextToSpeech } from "./AbstractTextToSpeech";
|
|
22
|
-
import { SynthOptions } from "./types";
|
|
22
|
+
import { AzureTTSConfig, SynthOptions } from "./types";
|
|
23
23
|
declare const ENGINE_NAME = "tts.azure";
|
|
24
|
-
type AzureTTSConfig = {
|
|
25
|
-
[key: string]: Record<string, string>;
|
|
26
|
-
credentials: {
|
|
27
|
-
subscriptionKey: string;
|
|
28
|
-
serviceRegion: string;
|
|
29
|
-
};
|
|
30
|
-
};
|
|
31
24
|
declare class Azure extends AbstractTextToSpeech<typeof ENGINE_NAME> {
|
|
32
25
|
config: AzureTTSConfig;
|
|
33
26
|
readonly engineName = "tts.azure";
|
|
34
27
|
protected readonly OUTPUT_FORMAT = "sln16";
|
|
35
28
|
protected readonly CACHING_FIELDS: string[];
|
|
36
29
|
constructor(config: AzureTTSConfig);
|
|
37
|
-
synthesize(text: string, options: SynthOptions):
|
|
30
|
+
synthesize(text: string, options: SynthOptions): {
|
|
38
31
|
ref: string;
|
|
39
32
|
stream: Readable;
|
|
40
|
-
}
|
|
33
|
+
};
|
|
41
34
|
static getConfigValidationSchema(): z.Schema;
|
|
42
35
|
static getCredentialsValidationSchema(): z.Schema;
|
|
43
36
|
}
|
package/dist/voice/tts/Azure.js
CHANGED
|
@@ -43,34 +43,14 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
43
43
|
};
|
|
44
44
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
45
|
exports.ENGINE_NAME = exports.Azure = void 0;
|
|
46
|
-
/**
|
|
47
|
-
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
48
|
-
* http://github.com/fonoster/fonoster
|
|
49
|
-
*
|
|
50
|
-
* This file is part of Fonoster
|
|
51
|
-
*
|
|
52
|
-
* Licensed under the MIT License (the "License");
|
|
53
|
-
* you may not use this file except in compliance with
|
|
54
|
-
* the License. You may obtain a copy of the License at
|
|
55
|
-
*
|
|
56
|
-
* https://opensource.org/licenses/MIT
|
|
57
|
-
*
|
|
58
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
59
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
60
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
61
|
-
* See the License for the specific language governing permissions and
|
|
62
|
-
* limitations under the License.
|
|
63
|
-
*/
|
|
64
|
-
const stream_1 = require("stream");
|
|
65
46
|
const common_1 = require("@fonoster/common");
|
|
66
|
-
const logger_1 = require("@fonoster/logger");
|
|
67
47
|
const sdk = __importStar(require("microsoft-cognitiveservices-speech-sdk"));
|
|
68
48
|
const z = __importStar(require("zod"));
|
|
69
49
|
const AbstractTextToSpeech_1 = require("./AbstractTextToSpeech");
|
|
70
|
-
const
|
|
50
|
+
const createChunkedSynthesisStream_1 = require("./utils/createChunkedSynthesisStream");
|
|
51
|
+
const isSsml_1 = require("./utils/isSsml");
|
|
71
52
|
const ENGINE_NAME = "tts.azure";
|
|
72
53
|
exports.ENGINE_NAME = ENGINE_NAME;
|
|
73
|
-
const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
|
|
74
54
|
class Azure extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
75
55
|
constructor(config) {
|
|
76
56
|
super();
|
|
@@ -80,36 +60,45 @@ class Azure extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
|
80
60
|
this.config = config;
|
|
81
61
|
}
|
|
82
62
|
synthesize(text, options) {
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
63
|
+
this.logSynthesisRequest(text, options);
|
|
64
|
+
const ref = this.createMediaReference();
|
|
65
|
+
const { subscriptionKey, serviceRegion } = this.config.credentials;
|
|
66
|
+
const voice = options.voice || this.config.config.voice;
|
|
67
|
+
const speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
|
|
68
|
+
speechConfig.speechSynthesisVoiceName = voice;
|
|
69
|
+
speechConfig.speechSynthesisOutputFormat =
|
|
70
|
+
sdk.SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm;
|
|
71
|
+
const stream = (0, createChunkedSynthesisStream_1.createChunkedSynthesisStream)(text, (chunkText) => __awaiter(this, void 0, void 0, function* () {
|
|
90
72
|
const synthesizer = new sdk.SpeechSynthesizer(speechConfig);
|
|
91
|
-
const isSSML = (0, isSsml_1.isSsml)(
|
|
73
|
+
const isSSML = (0, isSsml_1.isSsml)(chunkText);
|
|
92
74
|
const func = isSSML ? "speakSsmlAsync" : "speakTextAsync";
|
|
93
|
-
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
75
|
+
try {
|
|
76
|
+
const audioData = yield new Promise((resolve, reject) => {
|
|
77
|
+
const audioChunks = [];
|
|
78
|
+
synthesizer[func](chunkText, (result) => {
|
|
79
|
+
if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
|
|
80
|
+
audioChunks.push(Buffer.from(result.audioData));
|
|
81
|
+
resolve(Buffer.concat(audioChunks));
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
reject(new Error("Speech synthesis canceled: " + result.errorDetails));
|
|
85
|
+
}
|
|
86
|
+
synthesizer.close();
|
|
87
|
+
}, (err) => {
|
|
88
|
+
synthesizer.close();
|
|
89
|
+
reject(new Error(err));
|
|
90
|
+
});
|
|
107
91
|
});
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
92
|
+
// Ignore the first 44 bytes of the response to avoid the WAV header
|
|
93
|
+
return audioData.subarray(44);
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
// Make sure synthesizer is closed in case of error
|
|
97
|
+
synthesizer.close();
|
|
98
|
+
throw error;
|
|
99
|
+
}
|
|
100
|
+
}));
|
|
101
|
+
return { ref, stream };
|
|
113
102
|
}
|
|
114
103
|
static getConfigValidationSchema() {
|
|
115
104
|
return z.object({
|
|
@@ -20,14 +20,8 @@ import { Readable } from "stream";
|
|
|
20
20
|
import { DeepgramClient } from "@deepgram/sdk";
|
|
21
21
|
import * as z from "zod";
|
|
22
22
|
import { AbstractTextToSpeech } from "./AbstractTextToSpeech";
|
|
23
|
-
import { SynthOptions } from "./types";
|
|
23
|
+
import { DeepgramTtsConfig, SynthOptions } from "./types";
|
|
24
24
|
declare const ENGINE_NAME = "tts.deepgram";
|
|
25
|
-
type DeepgramTtsConfig = {
|
|
26
|
-
[key: string]: Record<string, string>;
|
|
27
|
-
credentials: {
|
|
28
|
-
apiKey: string;
|
|
29
|
-
};
|
|
30
|
-
};
|
|
31
25
|
declare class Deepgram extends AbstractTextToSpeech<typeof ENGINE_NAME> {
|
|
32
26
|
client: DeepgramClient;
|
|
33
27
|
engineConfig: DeepgramTtsConfig;
|
|
@@ -37,11 +31,10 @@ declare class Deepgram extends AbstractTextToSpeech<typeof ENGINE_NAME> {
|
|
|
37
31
|
protected readonly AUDIO_ENCODING: "linear16";
|
|
38
32
|
protected readonly SAMPLE_RATE_HERTZ = 16000;
|
|
39
33
|
constructor(config: DeepgramTtsConfig);
|
|
40
|
-
synthesize(text: string, options: SynthOptions):
|
|
34
|
+
synthesize(text: string, options: SynthOptions): {
|
|
41
35
|
ref: string;
|
|
42
36
|
stream: Readable;
|
|
43
|
-
}
|
|
44
|
-
private doSynthesize;
|
|
37
|
+
};
|
|
45
38
|
static getConfigValidationSchema(): z.Schema;
|
|
46
39
|
static getCredentialsValidationSchema(): z.Schema;
|
|
47
40
|
}
|
|
@@ -43,36 +43,14 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
43
43
|
};
|
|
44
44
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
45
|
exports.ENGINE_NAME = exports.Deepgram = void 0;
|
|
46
|
-
/**
|
|
47
|
-
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
48
|
-
* http://github.com/fonoster/fonoster
|
|
49
|
-
*
|
|
50
|
-
* This file is part of Fonoster
|
|
51
|
-
*
|
|
52
|
-
* Licensed under the MIT License (the "License");
|
|
53
|
-
* you may not use this file except in compliance with
|
|
54
|
-
* the License. You may obtain a copy of the License at
|
|
55
|
-
*
|
|
56
|
-
* https://opensource.org/licenses/MIT
|
|
57
|
-
*
|
|
58
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
59
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
60
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
61
|
-
* See the License for the specific language governing permissions and
|
|
62
|
-
* limitations under the License.
|
|
63
|
-
*/
|
|
64
|
-
const stream_1 = require("stream");
|
|
65
46
|
const sdk_1 = require("@deepgram/sdk");
|
|
66
47
|
const common_1 = require("@fonoster/common");
|
|
67
|
-
const logger_1 = require("@fonoster/logger");
|
|
68
48
|
const z = __importStar(require("zod"));
|
|
69
|
-
const textChunksByFirstNaturalPause_1 = require("../handlers/utils/textChunksByFirstNaturalPause");
|
|
70
49
|
const AbstractTextToSpeech_1 = require("./AbstractTextToSpeech");
|
|
71
|
-
const
|
|
72
|
-
const streamToBuffer_1 = require("./streamToBuffer");
|
|
50
|
+
const createChunkedSynthesisStream_1 = require("./utils/createChunkedSynthesisStream");
|
|
51
|
+
const streamToBuffer_1 = require("./utils/streamToBuffer");
|
|
73
52
|
const ENGINE_NAME = "tts.deepgram";
|
|
74
53
|
exports.ENGINE_NAME = ENGINE_NAME;
|
|
75
|
-
const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
|
|
76
54
|
class Deepgram extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
77
55
|
constructor(config) {
|
|
78
56
|
super();
|
|
@@ -85,51 +63,20 @@ class Deepgram extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
|
85
63
|
this.engineConfig = config;
|
|
86
64
|
}
|
|
87
65
|
synthesize(text, options) {
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
const
|
|
94
|
-
|
|
95
|
-
let nextIndexToPush = 0;
|
|
96
|
-
function observeQueue() {
|
|
97
|
-
if (nextIndexToPush < results.length &&
|
|
98
|
-
results[nextIndexToPush] !== undefined) {
|
|
99
|
-
stream.push(results[nextIndexToPush]);
|
|
100
|
-
nextIndexToPush++;
|
|
101
|
-
setImmediate(observeQueue);
|
|
102
|
-
}
|
|
103
|
-
else if (nextIndexToPush < results.length) {
|
|
104
|
-
setTimeout(observeQueue, 10);
|
|
105
|
-
}
|
|
106
|
-
else {
|
|
107
|
-
stream.push(null);
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
observeQueue();
|
|
111
|
-
chunks.forEach((text, index) => {
|
|
112
|
-
this.doSynthesize(text, voice)
|
|
113
|
-
.then((synthesizedText) => {
|
|
114
|
-
results[index] = synthesizedText;
|
|
115
|
-
})
|
|
116
|
-
.catch((error) => {
|
|
117
|
-
stream.emit("error", error);
|
|
118
|
-
});
|
|
119
|
-
});
|
|
120
|
-
return { ref, stream };
|
|
121
|
-
});
|
|
122
|
-
}
|
|
123
|
-
doSynthesize(text, voice) {
|
|
124
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
125
|
-
const response = yield this.client.speak.request({ text }, {
|
|
126
|
-
model: voice || common_1.DeepgramVoice.AURA_ASTERIA_EN,
|
|
66
|
+
this.logSynthesisRequest(text, options);
|
|
67
|
+
const { voice } = this.engineConfig.config;
|
|
68
|
+
const ref = this.createMediaReference();
|
|
69
|
+
const selectedVoice = voice || common_1.DeepgramVoice.AURA_ASTERIA_EN;
|
|
70
|
+
const stream = (0, createChunkedSynthesisStream_1.createChunkedSynthesisStream)(text, (chunkText) => __awaiter(this, void 0, void 0, function* () {
|
|
71
|
+
const response = yield this.client.speak.request({ text: chunkText }, {
|
|
72
|
+
model: selectedVoice,
|
|
127
73
|
encoding: this.AUDIO_ENCODING,
|
|
128
74
|
sample_rate: this.SAMPLE_RATE_HERTZ,
|
|
129
75
|
container: "none"
|
|
130
76
|
});
|
|
131
77
|
return (yield (0, streamToBuffer_1.streamToBuffer)(yield response.getStream()));
|
|
132
|
-
});
|
|
78
|
+
}));
|
|
79
|
+
return { ref, stream };
|
|
133
80
|
}
|
|
134
81
|
static getConfigValidationSchema() {
|
|
135
82
|
return z.object({
|
|
@@ -20,14 +20,8 @@ import { Readable } from "stream";
|
|
|
20
20
|
import { ElevenLabsClient } from "elevenlabs";
|
|
21
21
|
import * as z from "zod";
|
|
22
22
|
import { AbstractTextToSpeech } from "./AbstractTextToSpeech";
|
|
23
|
-
import { SynthOptions } from "./types";
|
|
23
|
+
import { ElevenLabsTtsConfig, SynthOptions } from "./types";
|
|
24
24
|
declare const ENGINE_NAME = "tts.elevenlabs";
|
|
25
|
-
type ElevenLabsTtsConfig = {
|
|
26
|
-
[key: string]: Record<string, string>;
|
|
27
|
-
credentials: {
|
|
28
|
-
apiKey: string;
|
|
29
|
-
};
|
|
30
|
-
};
|
|
31
25
|
declare class ElevenLabs extends AbstractTextToSpeech<typeof ENGINE_NAME> {
|
|
32
26
|
client: ElevenLabsClient;
|
|
33
27
|
engineConfig: ElevenLabsTtsConfig;
|
|
@@ -35,11 +29,10 @@ declare class ElevenLabs extends AbstractTextToSpeech<typeof ENGINE_NAME> {
|
|
|
35
29
|
protected readonly OUTPUT_FORMAT = "sln16";
|
|
36
30
|
protected readonly CACHING_FIELDS: string[];
|
|
37
31
|
constructor(config: ElevenLabsTtsConfig);
|
|
38
|
-
synthesize(text: string, options: SynthOptions):
|
|
32
|
+
synthesize(text: string, options: SynthOptions): {
|
|
39
33
|
ref: string;
|
|
40
34
|
stream: Readable;
|
|
41
|
-
}
|
|
42
|
-
private doSynthesize;
|
|
35
|
+
};
|
|
43
36
|
static getConfigValidationSchema(): z.Schema;
|
|
44
37
|
static getCredentialsValidationSchema(): z.Schema;
|
|
45
38
|
}
|
|
@@ -43,35 +43,13 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
43
43
|
};
|
|
44
44
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
45
|
exports.ElevenLabs = exports.ENGINE_NAME = void 0;
|
|
46
|
-
/**
|
|
47
|
-
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
48
|
-
* http://github.com/fonoster/fonoster
|
|
49
|
-
*
|
|
50
|
-
* This file is part of Fonoster
|
|
51
|
-
*
|
|
52
|
-
* Licensed under the MIT License (the "License");
|
|
53
|
-
* you may not use this file except in compliance with
|
|
54
|
-
* the License. You may obtain a copy of the License at
|
|
55
|
-
*
|
|
56
|
-
* https://opensource.org/licenses/MIT
|
|
57
|
-
*
|
|
58
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
59
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
60
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
61
|
-
* See the License for the specific language governing permissions and
|
|
62
|
-
* limitations under the License.
|
|
63
|
-
*/
|
|
64
|
-
const stream_1 = require("stream");
|
|
65
|
-
const logger_1 = require("@fonoster/logger");
|
|
66
46
|
const elevenlabs_1 = require("elevenlabs");
|
|
67
47
|
const z = __importStar(require("zod"));
|
|
68
|
-
const textChunksByFirstNaturalPause_1 = require("../handlers/utils/textChunksByFirstNaturalPause"); // Assuming this is the chunking function
|
|
69
48
|
const AbstractTextToSpeech_1 = require("./AbstractTextToSpeech");
|
|
70
|
-
const
|
|
71
|
-
const streamToBuffer_1 = require("./streamToBuffer");
|
|
49
|
+
const createChunkedSynthesisStream_1 = require("./utils/createChunkedSynthesisStream");
|
|
50
|
+
const streamToBuffer_1 = require("./utils/streamToBuffer");
|
|
72
51
|
const ENGINE_NAME = "tts.elevenlabs";
|
|
73
52
|
exports.ENGINE_NAME = ENGINE_NAME;
|
|
74
|
-
const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
|
|
75
53
|
class ElevenLabs extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
76
54
|
constructor(config) {
|
|
77
55
|
super();
|
|
@@ -82,55 +60,24 @@ class ElevenLabs extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
|
82
60
|
this.engineConfig = config;
|
|
83
61
|
}
|
|
84
62
|
synthesize(text, options) {
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
const chunks = (0, textChunksByFirstNaturalPause_1.textChunksByFirstNaturalPause)(text);
|
|
90
|
-
const stream = new stream_1.Readable({ read() { } });
|
|
91
|
-
const results = new Array(chunks.length);
|
|
92
|
-
let nextIndexToPush = 0;
|
|
93
|
-
function observeQueue() {
|
|
94
|
-
if (nextIndexToPush < results.length &&
|
|
95
|
-
results[nextIndexToPush] !== undefined) {
|
|
96
|
-
stream.push(results[nextIndexToPush]);
|
|
97
|
-
nextIndexToPush++;
|
|
98
|
-
setImmediate(observeQueue);
|
|
99
|
-
}
|
|
100
|
-
else if (nextIndexToPush < results.length) {
|
|
101
|
-
setTimeout(observeQueue, 10);
|
|
102
|
-
}
|
|
103
|
-
else {
|
|
104
|
-
stream.push(null);
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
observeQueue();
|
|
108
|
-
chunks.forEach((text, index) => {
|
|
109
|
-
this.doSynthesize({ text, voice, model })
|
|
110
|
-
.then((synthesizedText) => {
|
|
111
|
-
results[index] = synthesizedText;
|
|
112
|
-
})
|
|
113
|
-
.catch((error) => {
|
|
114
|
-
stream.emit("error", error);
|
|
115
|
-
});
|
|
116
|
-
});
|
|
117
|
-
return { ref, stream };
|
|
118
|
-
});
|
|
119
|
-
}
|
|
120
|
-
doSynthesize(params) {
|
|
121
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
122
|
-
const { text, voice, model } = params;
|
|
63
|
+
this.logSynthesisRequest(text, options);
|
|
64
|
+
const { voice, model } = this.engineConfig.config;
|
|
65
|
+
const ref = this.createMediaReference();
|
|
66
|
+
const stream = (0, createChunkedSynthesisStream_1.createChunkedSynthesisStream)(text, (chunkText) => __awaiter(this, void 0, void 0, function* () {
|
|
123
67
|
const response = yield this.client.generate({
|
|
124
68
|
stream: true,
|
|
125
69
|
voice,
|
|
126
|
-
text,
|
|
70
|
+
text: chunkText,
|
|
127
71
|
model_id: model !== null && model !== void 0 ? model : "eleven_flash_v2_5",
|
|
128
72
|
output_format: "pcm_16000",
|
|
129
73
|
// TODO: Make this configurable
|
|
130
74
|
optimize_streaming_latency: 2
|
|
75
|
+
}, {
|
|
76
|
+
maxRetries: 3
|
|
131
77
|
});
|
|
132
78
|
return (yield (0, streamToBuffer_1.streamToBuffer)(response));
|
|
133
|
-
});
|
|
79
|
+
}));
|
|
80
|
+
return { ref, stream };
|
|
134
81
|
}
|
|
135
82
|
static getConfigValidationSchema() {
|
|
136
83
|
return z.object({});
|
|
@@ -20,15 +20,8 @@ import { Readable } from "stream";
|
|
|
20
20
|
import { TextToSpeechClient } from "@google-cloud/text-to-speech";
|
|
21
21
|
import * as z from "zod";
|
|
22
22
|
import { AbstractTextToSpeech } from "./AbstractTextToSpeech";
|
|
23
|
-
import { SynthOptions } from "./types";
|
|
23
|
+
import { GoogleTtsConfig, SynthOptions } from "./types";
|
|
24
24
|
declare const ENGINE_NAME = "tts.google";
|
|
25
|
-
type GoogleTtsConfig = {
|
|
26
|
-
[key: string]: Record<string, string>;
|
|
27
|
-
credentials: {
|
|
28
|
-
client_email: string;
|
|
29
|
-
private_key: string;
|
|
30
|
-
};
|
|
31
|
-
};
|
|
32
25
|
declare class Google extends AbstractTextToSpeech<typeof ENGINE_NAME> {
|
|
33
26
|
client: TextToSpeechClient;
|
|
34
27
|
engineConfig: GoogleTtsConfig;
|
|
@@ -38,10 +31,10 @@ declare class Google extends AbstractTextToSpeech<typeof ENGINE_NAME> {
|
|
|
38
31
|
protected readonly AUDIO_ENCODING: "LINEAR16";
|
|
39
32
|
protected readonly SAMPLE_RATE_HERTZ = 16000;
|
|
40
33
|
constructor(config: GoogleTtsConfig);
|
|
41
|
-
synthesize(text: string, options: SynthOptions):
|
|
34
|
+
synthesize(text: string, options: SynthOptions): {
|
|
42
35
|
ref: string;
|
|
43
36
|
stream: Readable;
|
|
44
|
-
}
|
|
37
|
+
};
|
|
45
38
|
static getConfigValidationSchema(): z.Schema;
|
|
46
39
|
static getCredentialsValidationSchema(): z.Schema;
|
|
47
40
|
}
|
package/dist/voice/tts/Google.js
CHANGED
|
@@ -43,34 +43,14 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
43
43
|
};
|
|
44
44
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
45
|
exports.Google = exports.ENGINE_NAME = void 0;
|
|
46
|
-
/**
|
|
47
|
-
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
48
|
-
* http://github.com/fonoster/fonoster
|
|
49
|
-
*
|
|
50
|
-
* This file is part of Fonoster
|
|
51
|
-
*
|
|
52
|
-
* Licensed under the MIT License (the "License");
|
|
53
|
-
* you may not use this file except in compliance with
|
|
54
|
-
* the License. You may obtain a copy of the License at
|
|
55
|
-
*
|
|
56
|
-
* https://opensource.org/licenses/MIT
|
|
57
|
-
*
|
|
58
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
59
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
60
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
61
|
-
* See the License for the specific language governing permissions and
|
|
62
|
-
* limitations under the License.
|
|
63
|
-
*/
|
|
64
|
-
const stream_1 = require("stream");
|
|
65
46
|
const common_1 = require("@fonoster/common");
|
|
66
|
-
const logger_1 = require("@fonoster/logger");
|
|
67
47
|
const text_to_speech_1 = require("@google-cloud/text-to-speech");
|
|
68
48
|
const z = __importStar(require("zod"));
|
|
69
49
|
const AbstractTextToSpeech_1 = require("./AbstractTextToSpeech");
|
|
70
|
-
const
|
|
50
|
+
const createChunkedSynthesisStream_1 = require("./utils/createChunkedSynthesisStream");
|
|
51
|
+
const isSsml_1 = require("./utils/isSsml");
|
|
71
52
|
const ENGINE_NAME = "tts.google";
|
|
72
53
|
exports.ENGINE_NAME = ENGINE_NAME;
|
|
73
|
-
const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
|
|
74
54
|
class Google extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
75
55
|
constructor(config) {
|
|
76
56
|
super();
|
|
@@ -83,12 +63,13 @@ class Google extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
|
83
63
|
this.engineConfig = config;
|
|
84
64
|
}
|
|
85
65
|
synthesize(text, options) {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
66
|
+
this.logSynthesisRequest(text, options);
|
|
67
|
+
const ref = this.createMediaReference();
|
|
68
|
+
const { voice } = this.engineConfig.config;
|
|
69
|
+
const lang = `${voice.split("-")[0]}-${voice.split("-")[1]}`;
|
|
70
|
+
const stream = (0, createChunkedSynthesisStream_1.createChunkedSynthesisStream)(text, (chunkText) => __awaiter(this, void 0, void 0, function* () {
|
|
90
71
|
const request = {
|
|
91
|
-
input: (0, isSsml_1.isSsml)(
|
|
72
|
+
input: (0, isSsml_1.isSsml)(chunkText) ? { ssml: chunkText } : { text: chunkText },
|
|
92
73
|
audioConfig: {
|
|
93
74
|
audioEncoding: this.AUDIO_ENCODING,
|
|
94
75
|
sampleRateHertz: this.SAMPLE_RATE_HERTZ
|
|
@@ -99,9 +80,11 @@ class Google extends AbstractTextToSpeech_1.AbstractTextToSpeech {
|
|
|
99
80
|
}
|
|
100
81
|
};
|
|
101
82
|
const [response] = yield this.client.synthesizeSpeech(request);
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
|
|
83
|
+
const audioContent = response.audioContent;
|
|
84
|
+
// Ignore the first 44 bytes of the response to avoid the WAV header
|
|
85
|
+
return audioContent.subarray(44);
|
|
86
|
+
}));
|
|
87
|
+
return { ref, stream };
|
|
105
88
|
}
|
|
106
89
|
static getConfigValidationSchema() {
|
|
107
90
|
return z.object({
|
|
@@ -19,4 +19,30 @@
|
|
|
19
19
|
type SynthOptions = {
|
|
20
20
|
voice: string;
|
|
21
21
|
};
|
|
22
|
-
|
|
22
|
+
type DeepgramTtsConfig = {
|
|
23
|
+
[key: string]: Record<string, string>;
|
|
24
|
+
credentials: {
|
|
25
|
+
apiKey: string;
|
|
26
|
+
};
|
|
27
|
+
};
|
|
28
|
+
type ElevenLabsTtsConfig = {
|
|
29
|
+
[key: string]: Record<string, string>;
|
|
30
|
+
credentials: {
|
|
31
|
+
apiKey: string;
|
|
32
|
+
};
|
|
33
|
+
};
|
|
34
|
+
type GoogleTtsConfig = {
|
|
35
|
+
[key: string]: Record<string, string>;
|
|
36
|
+
credentials: {
|
|
37
|
+
client_email: string;
|
|
38
|
+
private_key: string;
|
|
39
|
+
};
|
|
40
|
+
};
|
|
41
|
+
type AzureTTSConfig = {
|
|
42
|
+
[key: string]: Record<string, string>;
|
|
43
|
+
credentials: {
|
|
44
|
+
subscriptionKey: string;
|
|
45
|
+
serviceRegion: string;
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
export { SynthOptions, AzureTTSConfig, DeepgramTtsConfig, ElevenLabsTtsConfig, GoogleTtsConfig };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
3
|
+
* http://github.com/fonoster/fonoster
|
|
4
|
+
*
|
|
5
|
+
* This file is part of Fonoster
|
|
6
|
+
*
|
|
7
|
+
* Licensed under the MIT License (the "License");
|
|
8
|
+
* you may not use this file except in compliance with
|
|
9
|
+
* the License. You may obtain a copy of the License at
|
|
10
|
+
*
|
|
11
|
+
* https://opensource.org/licenses/MIT
|
|
12
|
+
*
|
|
13
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
* See the License for the specific language governing permissions and
|
|
17
|
+
* limitations under the License.
|
|
18
|
+
*/
|
|
19
|
+
import { Readable } from "stream";
|
|
20
|
+
/**
|
|
21
|
+
* Creates a readable stream that processes text in chunks for better streaming performance.
|
|
22
|
+
* This utility ensures that chunks are processed in parallel but streamed in the correct order.
|
|
23
|
+
*
|
|
24
|
+
* @param text - The text to be synthesized
|
|
25
|
+
* @param synthesizeChunk - Function that processes each chunk and returns a Buffer or Readable
|
|
26
|
+
* @returns A readable stream containing the synthesized audio
|
|
27
|
+
*/
|
|
28
|
+
export declare function createChunkedSynthesisStream(text: string, synthesizeChunk: (text: string, index: number) => Promise<Buffer | Readable>): Readable;
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createChunkedSynthesisStream = createChunkedSynthesisStream;
|
|
4
|
+
/**
|
|
5
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
6
|
+
* http://github.com/fonoster/fonoster
|
|
7
|
+
*
|
|
8
|
+
* This file is part of Fonoster
|
|
9
|
+
*
|
|
10
|
+
* Licensed under the MIT License (the "License");
|
|
11
|
+
* you may not use this file except in compliance with
|
|
12
|
+
* the License. You may obtain a copy of the License at
|
|
13
|
+
*
|
|
14
|
+
* https://opensource.org/licenses/MIT
|
|
15
|
+
*
|
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
+
* See the License for the specific language governing permissions and
|
|
20
|
+
* limitations under the License.
|
|
21
|
+
*/
|
|
22
|
+
const stream_1 = require("stream");
|
|
23
|
+
const logger_1 = require("@fonoster/logger");
|
|
24
|
+
const textChunksByFirstNaturalPause_1 = require("../../handlers/utils/textChunksByFirstNaturalPause");
|
|
25
|
+
const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
|
|
26
|
+
/**
|
|
27
|
+
* Creates a readable stream that processes text in chunks for better streaming performance.
|
|
28
|
+
* This utility ensures that chunks are processed in parallel but streamed in the correct order.
|
|
29
|
+
*
|
|
30
|
+
* @param text - The text to be synthesized
|
|
31
|
+
* @param synthesizeChunk - Function that processes each chunk and returns a Buffer or Readable
|
|
32
|
+
* @returns A readable stream containing the synthesized audio
|
|
33
|
+
*/
|
|
34
|
+
function createChunkedSynthesisStream(text, synthesizeChunk) {
|
|
35
|
+
const chunks = (0, textChunksByFirstNaturalPause_1.textChunksByFirstNaturalPause)(text);
|
|
36
|
+
const stream = new stream_1.Readable({ read() { } });
|
|
37
|
+
if (chunks.length === 0) {
|
|
38
|
+
logger.verbose("no text chunks to synthesize, returning empty stream");
|
|
39
|
+
stream.push(null);
|
|
40
|
+
return stream;
|
|
41
|
+
}
|
|
42
|
+
logger.verbose(`processing ${chunks.length} text chunks for synthesis`);
|
|
43
|
+
const results = new Array(chunks.length);
|
|
44
|
+
let nextIndexToPush = 0;
|
|
45
|
+
let hasError = false;
|
|
46
|
+
function observeQueue() {
|
|
47
|
+
if (nextIndexToPush < results.length &&
|
|
48
|
+
results[nextIndexToPush] !== undefined) {
|
|
49
|
+
stream.push(results[nextIndexToPush]);
|
|
50
|
+
nextIndexToPush++;
|
|
51
|
+
setImmediate(observeQueue);
|
|
52
|
+
}
|
|
53
|
+
else if (nextIndexToPush < results.length) {
|
|
54
|
+
setTimeout(observeQueue, 10);
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
stream.push(null);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
observeQueue();
|
|
61
|
+
chunks.forEach((chunkText, index) => {
|
|
62
|
+
synthesizeChunk(chunkText, index)
|
|
63
|
+
.then((synthesizedText) => {
|
|
64
|
+
if (!hasError) {
|
|
65
|
+
results[index] = synthesizedText;
|
|
66
|
+
}
|
|
67
|
+
})
|
|
68
|
+
.catch((error) => {
|
|
69
|
+
if (!hasError) {
|
|
70
|
+
hasError = true;
|
|
71
|
+
logger.error(`chunk synthesis failed: ${error.message}`);
|
|
72
|
+
stream.emit("error", new Error(`Synthesis failed: ${error.message}`));
|
|
73
|
+
stream.push(null);
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
return stream;
|
|
78
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
3
|
+
* http://github.com/fonoster/fonoster
|
|
4
|
+
*
|
|
5
|
+
* This file is part of Fonoster
|
|
6
|
+
*
|
|
7
|
+
* Licensed under the MIT License (the "License");
|
|
8
|
+
* you may not use this file except in compliance with
|
|
9
|
+
* the License. You may obtain a copy of the License at
|
|
10
|
+
*
|
|
11
|
+
* https://opensource.org/licenses/MIT
|
|
12
|
+
*
|
|
13
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
* See the License for the specific language governing permissions and
|
|
17
|
+
* limitations under the License.
|
|
18
|
+
*/
|
|
19
|
+
import { Readable } from "stream";
|
|
20
|
+
declare function createErrorStream(errorMessage: string): Readable;
|
|
21
|
+
export { createErrorStream };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.createErrorStream = createErrorStream;
|
|
4
|
+
/**
|
|
5
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
6
|
+
* http://github.com/fonoster/fonoster
|
|
7
|
+
*
|
|
8
|
+
* This file is part of Fonoster
|
|
9
|
+
*
|
|
10
|
+
* Licensed under the MIT License (the "License");
|
|
11
|
+
* you may not use this file except in compliance with
|
|
12
|
+
* the License. You may obtain a copy of the License at
|
|
13
|
+
*
|
|
14
|
+
* https://opensource.org/licenses/MIT
|
|
15
|
+
*
|
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
+
* See the License for the specific language governing permissions and
|
|
20
|
+
* limitations under the License.
|
|
21
|
+
*/
|
|
22
|
+
const stream_1 = require("stream");
|
|
23
|
+
function createErrorStream(errorMessage) {
|
|
24
|
+
const errorStream = new stream_1.Readable({ read() { } });
|
|
25
|
+
errorStream.emit("error", new Error(errorMessage));
|
|
26
|
+
errorStream.push(null);
|
|
27
|
+
return errorStream;
|
|
28
|
+
}
|
|
@@ -48,10 +48,10 @@ type VoiceClient = {
|
|
|
48
48
|
getTranscriptionsStream: () => Stream;
|
|
49
49
|
};
|
|
50
50
|
type TextToSpeech = {
|
|
51
|
-
synthesize: (text: string, options: Record<string, unknown>) =>
|
|
51
|
+
synthesize: (text: string, options: Record<string, unknown>) => {
|
|
52
52
|
ref: string;
|
|
53
53
|
stream: Readable;
|
|
54
|
-
}
|
|
54
|
+
};
|
|
55
55
|
};
|
|
56
56
|
type SpeechToText = {
|
|
57
57
|
transcribe: (stream: Stream) => Promise<SpeechResult>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/apiserver",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.39",
|
|
4
4
|
"description": "APIServer for Fonoster",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -73,5 +73,5 @@
|
|
|
73
73
|
"@types/uuid": "^10.0.0",
|
|
74
74
|
"@types/validator": "^13.12.0"
|
|
75
75
|
},
|
|
76
|
-
"gitHead": "
|
|
76
|
+
"gitHead": "ff9bc63efb101ceb4c6c4e08b7b11f720336454c"
|
|
77
77
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|