@pico-brief/speech-services-parallel 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +372 -0
- package/dist/KeyManager.d.ts +41 -0
- package/dist/KeyManager.d.ts.map +1 -0
- package/dist/KeyManager.js +61 -0
- package/dist/KeyManager.js.map +1 -0
- package/dist/audioFormat.d.ts +24 -0
- package/dist/audioFormat.d.ts.map +1 -0
- package/dist/audioFormat.js +52 -0
- package/dist/audioFormat.js.map +1 -0
- package/dist/clientFactory.d.ts +26 -0
- package/dist/clientFactory.d.ts.map +1 -0
- package/dist/clientFactory.js +28 -0
- package/dist/clientFactory.js.map +1 -0
- package/dist/concurrency.d.ts +25 -0
- package/dist/concurrency.d.ts.map +1 -0
- package/dist/concurrency.js +84 -0
- package/dist/concurrency.js.map +1 -0
- package/dist/errors.d.ts +24 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +48 -0
- package/dist/errors.js.map +1 -0
- package/dist/helpers.d.ts +48 -0
- package/dist/helpers.d.ts.map +1 -0
- package/dist/helpers.js +73 -0
- package/dist/helpers.js.map +1 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -0
- package/dist/merge.d.ts +35 -0
- package/dist/merge.d.ts.map +1 -0
- package/dist/merge.js +37 -0
- package/dist/merge.js.map +1 -0
- package/dist/retry.d.ts +38 -0
- package/dist/retry.d.ts.map +1 -0
- package/dist/retry.js +68 -0
- package/dist/retry.js.map +1 -0
- package/dist/synthesizeParallel.d.ts +32 -0
- package/dist/synthesizeParallel.d.ts.map +1 -0
- package/dist/synthesizeParallel.js +144 -0
- package/dist/synthesizeParallel.js.map +1 -0
- package/dist/transcribeParallel.d.ts +35 -0
- package/dist/transcribeParallel.d.ts.map +1 -0
- package/dist/transcribeParallel.js +131 -0
- package/dist/transcribeParallel.js.map +1 -0
- package/dist/types.d.ts +169 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +9 -0
- package/dist/types.js.map +1 -0
- package/package.json +51 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parallel text-to-speech synthesis with audio concatenation.
|
|
3
|
+
*
|
|
4
|
+
* Each text chunk is synthesized independently (with retry and credential
|
|
5
|
+
* rotation), written to a temporary file to keep memory usage low, and then
|
|
6
|
+
* concatenated into a single audio buffer via ffmpeg (or `Buffer.concat` as
|
|
7
|
+
* a fallback). The result includes per-chunk metadata with timing offsets.
|
|
8
|
+
*/
|
|
9
|
+
import fs from "fs";
|
|
10
|
+
import path from "path";
|
|
11
|
+
import os from "os";
|
|
12
|
+
import { exec } from "child_process";
|
|
13
|
+
import { promisify } from "util";
|
|
14
|
+
import { getAudioDuration } from "@pico-brief/audio-duration";
|
|
15
|
+
import { KeyManager } from "./KeyManager.js";
|
|
16
|
+
import { createClientFromCredential } from "./clientFactory.js";
|
|
17
|
+
import { withRetry } from "./retry.js";
|
|
18
|
+
import { mapWithConcurrency } from "./concurrency.js";
|
|
19
|
+
import { generateRandomString, bufferToArrayBuffer } from "./helpers.js";
|
|
20
|
+
import { detectAudioFormat, audioFormatToExtension } from "./audioFormat.js";
|
|
21
|
+
const commander = promisify(exec);
|
|
22
|
+
/** Directory for temporary chunk audio files. */
|
|
23
|
+
const workdir = os.tmpdir();
|
|
24
|
+
/**
|
|
25
|
+
* Synthesizes multiple text chunks into a single audio buffer in parallel.
|
|
26
|
+
*
|
|
27
|
+
* Chunks are processed concurrently (optionally limited by `maxConcurrency`),
|
|
28
|
+
* each with automatic retry and credential rotation. The resulting audio
|
|
29
|
+
* segments are concatenated via ffmpeg into one continuous file.
|
|
30
|
+
*
|
|
31
|
+
* @param params - Configuration including provider, credentials, text chunks, and options.
|
|
32
|
+
* @returns Combined audio buffer, format string, and per-chunk metadata with timing offsets.
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* ```ts
|
|
36
|
+
* const result = await synthesizeParallel({
|
|
37
|
+
* provider: "openai",
|
|
38
|
+
* credentials: [{ apiKey: "sk-..." }],
|
|
39
|
+
* chunks: [{ text: "Hello" }, { text: "World" }],
|
|
40
|
+
* ffmpegPath: "ffmpeg",
|
|
41
|
+
* });
|
|
42
|
+
* fs.writeFileSync("output.mp3", result.audio);
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
export async function synthesizeParallel(params) {
|
|
46
|
+
const { chunks, ffmpegPath } = params;
|
|
47
|
+
const retryTimeoutMs = params.retryTimeoutMs ?? 5 * 60 * 1000;
|
|
48
|
+
const { onProgress, signal, maxConcurrency } = params;
|
|
49
|
+
const provider = params.provider;
|
|
50
|
+
const defaultGender = params.gender;
|
|
51
|
+
const defaultVoice = params.voice;
|
|
52
|
+
const defaultLanguages = params.languages;
|
|
53
|
+
const defaultProviderOptions = params.providerOptions;
|
|
54
|
+
const keyManager = new KeyManager(params.credentials);
|
|
55
|
+
const tmpPrefix = path.join(workdir, generateRandomString());
|
|
56
|
+
const tmpFiles = [];
|
|
57
|
+
try {
|
|
58
|
+
// Synthesize all chunks with optional concurrency limiting.
|
|
59
|
+
// Each chunk is written to disk immediately to avoid holding all audio in memory.
|
|
60
|
+
let completed = 0;
|
|
61
|
+
const chunkMetas = await mapWithConcurrency(chunks, async (input, i) => {
|
|
62
|
+
// Merge per-chunk provider options on top of the defaults
|
|
63
|
+
const resolvedProviderOptions = input.providerOptions
|
|
64
|
+
? { ...defaultProviderOptions, ...input.providerOptions }
|
|
65
|
+
: defaultProviderOptions;
|
|
66
|
+
const result = await withRetry({ keyManager, retryTimeoutMs, signal, operationName: "Synthesis" }, (credential) => {
|
|
67
|
+
const client = createClientFromCredential(provider, credential);
|
|
68
|
+
return client.synthesize({
|
|
69
|
+
provider,
|
|
70
|
+
text: input.text,
|
|
71
|
+
gender: input.gender ?? defaultGender,
|
|
72
|
+
voice: input.voice ?? defaultVoice,
|
|
73
|
+
languages: input.languages ?? defaultLanguages,
|
|
74
|
+
providerOptions: resolvedProviderOptions,
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
// Compute duration while the audio buffer is still in scope
|
|
78
|
+
const duration = getAudioDuration(bufferToArrayBuffer(result.audio));
|
|
79
|
+
// Write each chunk to disk immediately to free memory
|
|
80
|
+
const ext = audioFormatToExtension(detectAudioFormat(result.audio));
|
|
81
|
+
const chunkFile = `${tmpPrefix}_chunk_${i}.${ext}`;
|
|
82
|
+
fs.writeFileSync(chunkFile, result.audio);
|
|
83
|
+
tmpFiles.push(chunkFile);
|
|
84
|
+
onProgress?.(++completed, chunks.length);
|
|
85
|
+
return {
|
|
86
|
+
index: i,
|
|
87
|
+
duration,
|
|
88
|
+
voice: result.voice,
|
|
89
|
+
format: result.format,
|
|
90
|
+
chunkFile,
|
|
91
|
+
};
|
|
92
|
+
}, maxConcurrency);
|
|
93
|
+
// Sort by original index to ensure correct ordering
|
|
94
|
+
chunkMetas.sort((a, b) => a.index - b.index);
|
|
95
|
+
// Build per-chunk result metadata with cumulative start times
|
|
96
|
+
let timeAcc = 0;
|
|
97
|
+
const chunkResults = chunkMetas.map(({ index, duration, voice, format }) => {
|
|
98
|
+
const startTime = timeAcc;
|
|
99
|
+
timeAcc += duration;
|
|
100
|
+
// Try to derive language from voice name (e.g. "en-US-JennyNeural" → "en-US"),
|
|
101
|
+
// otherwise fall back to the first language in the languages array
|
|
102
|
+
const voiceParts = voice.split("-");
|
|
103
|
+
const resolvedLanguages = chunks[index].languages ?? params.languages;
|
|
104
|
+
const language = voiceParts.length >= 2
|
|
105
|
+
? voiceParts.slice(0, 2).join("-")
|
|
106
|
+
: resolvedLanguages?.[0];
|
|
107
|
+
return { chunkIndex: index, startTime, duration, voice, language, format, provider };
|
|
108
|
+
});
|
|
109
|
+
const format = chunkMetas[0]?.format ?? "mp3";
|
|
110
|
+
// Concatenate all chunk audio files into a single output
|
|
111
|
+
let combinedAudio;
|
|
112
|
+
if (ffmpegPath) {
|
|
113
|
+
// Use ffmpeg's concat demuxer for seamless joining
|
|
114
|
+
const outExt = audioFormatToExtension(detectAudioFormat(fs.readFileSync(chunkMetas[0].chunkFile)));
|
|
115
|
+
const manifestFile = `${tmpPrefix}_manifest.txt`;
|
|
116
|
+
// ffmpeg concat demuxer requires single-quoted paths in the manifest
|
|
117
|
+
const manifestContent = chunkMetas
|
|
118
|
+
.map(m => `file '${m.chunkFile}'`)
|
|
119
|
+
.join("\n");
|
|
120
|
+
fs.writeFileSync(manifestFile, manifestContent);
|
|
121
|
+
tmpFiles.push(manifestFile);
|
|
122
|
+
const outFile = `${tmpPrefix}_out.${outExt}`;
|
|
123
|
+
tmpFiles.push(outFile);
|
|
124
|
+
await commander(`"${ffmpegPath}" -f concat -safe 0 -i "${manifestFile}" -c copy "${outFile}"`);
|
|
125
|
+
combinedAudio = fs.readFileSync(outFile);
|
|
126
|
+
}
|
|
127
|
+
else {
|
|
128
|
+
// Fallback: naive buffer concatenation (may produce audible glitches)
|
|
129
|
+
console.warn("ffmpegPath not set; using Buffer.concat — audio may not be seamlessly joined");
|
|
130
|
+
combinedAudio = Buffer.concat(chunkMetas.map(m => fs.readFileSync(m.chunkFile)));
|
|
131
|
+
}
|
|
132
|
+
return { audio: combinedAudio, format, chunks: chunkResults };
|
|
133
|
+
}
|
|
134
|
+
finally {
|
|
135
|
+
// Clean up all temporary files regardless of success or failure
|
|
136
|
+
for (const f of tmpFiles) {
|
|
137
|
+
try {
|
|
138
|
+
fs.unlinkSync(f);
|
|
139
|
+
}
|
|
140
|
+
catch { /* ignore cleanup errors */ }
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=synthesizeParallel.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"synthesizeParallel.js","sourceRoot":"","sources":["../src/synthesizeParallel.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,IAAI,EAAE,MAAM,eAAe,CAAC;AACrC,OAAO,EAAE,SAAS,EAAE,MAAM,MAAM,CAAC;AACjC,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAE9D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,0BAA0B,EAAE,MAAM,oBAAoB,CAAC;AAChE,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AACzE,OAAO,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAS7E,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;AAElC,iDAAiD;AACjD,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;AAE5B;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,MAAgC;IACrE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,CAAC;IACtC,MAAM,cAAc,GAAG,MAAM,CAAC,cAAc,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC;IAC9D,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC;IAEtD,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IACjC,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC;IACpC,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC;IAClC,MAAM,gBAAgB,GAAG,MAAM,CAAC,SAAS,CAAC;IAC1C,MAAM,sBAAsB,GAAG,MAAM,CAAC,eAAe,CAAC;IAEtD,MAAM,UAAU,GAAG,IAAI,UAAU,CAAuB,MAAM,CAAC,WAAqC,CAAC,CAAC;IAEtG,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,oBAAoB,EAAE,CAAC,CAAC;IAC7D,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,CAAC;QACD,4DAA4D;QAC5D,kFAAkF;QAClF,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,UAAU,GAAG,MAAM,kBAAkB,CACvC,MAAM,EACN,KAAK,EAAE,KAA2B,EAAE,CAAS,EAAE,EAAE;YAC7C,0DAA0D;YAC1D,MAAM,uBAAuB,GAAG,KAAK,CAAC,eAAe;gBACjD,CAAC,CAAC,EAAE,GAAG,sBAAsB,EAAE,GAAG,KAAK,CAAC,eAAe,EAAE;gBACzD,CAAC,CAAC,sBAAsB,CAAC;YAE7B,MAAM,MAAM,GAAG,MAAM,SAAS,CAC1B,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,EAAE,aAAa,EAAE,WAAW,EAAE,EAClE,CAAC,UAAU,EAAE,EAAE;gBACX,MAAM,MAAM,GAAG,0BAA0B,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;gBAChE,OAAO,MAAM,CAAC,UAAU,CAAC;oBACrB,QAAQ;oBACR,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,aAAa;oBACrC,KAAK,EAAE,KAAK,CAAC,KAAK,IAAI,YAAY;oBAClC,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,gBAAgB;oBAC9C,eAAe,EAAE,uBAAuB;iBACZ,CAAC,CAAC;YACtC,CAAC,CACJ,CAAC;YAEF,4DAA4D;YAC5D,MAAM,QAAQ,GAAG,gBAAgB,CAAC,mBAAmB,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YAErE,sDAAsD;YACtD,MAAM,GAAG,GAAG,sBAAsB,CAAC,iBAAiB,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACpE,MAAM,SAAS,GAAG,GAAG,SAAS,UAAU,CAAC,IAAI,GAAG,EAAE,CAAC;YACnD,EAAE,CAAC,aAAa,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;YAC1C,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAEzB,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YAEzC,OAAO;gBACH,KAAK,EAAE,CAAC;gBACR,QAAQ;gBACR,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,SAAS;aACZ,CAAC;QACN,CAAC,EACD,cAAc,CACjB,CAAC;QAEF,oDAAoD;QACpD,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAE7C,8DAA8D;QAC9D,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,MAAM,YAAY,GAA4B,UAAU,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE;YAChG,MAAM,SAAS,GAAG,OAAO,CAAC;YAC1B,OAAO,IAAI,QAAQ,CAAC;YAEpB,+EAA+E;YAC/E,mEAAmE;YACnE,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,iBAAiB,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,IAAI,MAAM,CAAC,SAAS,CAAC;YACtE,MAAM,QAAQ,GACV,UAAU,CAAC,MAAM,IAAI,CAAC;gBAClB,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;gBAClC,CAAC,CAAC,iBAAiB,EAAE,CAAC,CAAC,CAAC,CAAC;YAEjC,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QACzF,CAAC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,KAAK,CAAC;QAE9C,yDAAyD;QACzD,IAAI,aAAqB,CAAC;QAC1B,IAAI,UAAU,EAAE,CAAC;YACb,mDAAmD;YACnD,MAAM,MAAM,GAAG,sBAAsB,CAAC,iBAAiB,CAAC,EAAE,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;YACnG,MAAM,YAAY,GAAG,GAAG,SAAS,eAAe,CAAC;YACjD,qEAAqE;YACrE,MAAM,eAAe,GAAG,UAAU;iBAC7B,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC,SAAS,GAAG,CAAC;iBACjC,IAAI,CAAC,IAAI,CAAC,CAAC;YAChB,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;YAChD,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAE5B,MAAM,OAAO,GAAG,GAAG,SAAS,QAAQ,MAAM,EAAE,CAAC;YAC7C,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACvB,MAAM,SAAS,CAAC,IAAI,UAAU,2BAA2B,YAAY,cAAc,OAAO,GAAG,CAAC,CAAC;YAC/F,aAAa,GAAG,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACJ,sEAAsE;YACtE,OAAO,CAAC,IAAI,CAAC,8EAA8E,CAAC,CAAC;YAC7F,aAAa,GAAG,MAAM,CAAC,MAAM,CACzB,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CACpD,CAAC;QACN,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC;IAClE,CAAC;YAAS,CAAC;QACP,gEAAgE;QAChE,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACvB,IAAI,CAAC;gBAAC,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,2BAA2B,CAAC,CAAC;QACnE,CAAC;IACL,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parallel audio transcription with automatic chunking.
|
|
3
|
+
*
|
|
4
|
+
* Long audio files are split into overlapping chunks via ffmpeg, each chunk is
|
|
5
|
+
* transcribed in parallel (with retry and credential rotation), and the results
|
|
6
|
+
* are merged back into a single {@link TranscribeResult} with word-level timing.
|
|
7
|
+
*
|
|
8
|
+
* Short audio (≤ one chunk) is transcribed directly without any splitting.
|
|
9
|
+
*/
|
|
10
|
+
import type { TranscribeResult } from "@pico-brief/speech-services";
|
|
11
|
+
import type { TranscribeParallelParams } from "./types.js";
|
|
12
|
+
/**
|
|
13
|
+
* Transcribes an audio buffer in parallel using the specified provider.
|
|
14
|
+
*
|
|
15
|
+
* If the audio is longer than `targetChunkDuration` (default 300 s), it is split
|
|
16
|
+
* into overlapping chunks via ffmpeg. Each chunk is sent to the provider with
|
|
17
|
+
* automatic retry, credential rotation, and optional concurrency limiting.
|
|
18
|
+
* Overlapping words at chunk boundaries are deduplicated during the merge step.
|
|
19
|
+
*
|
|
20
|
+
* @param params - Configuration including provider, credentials, audio data, and options.
|
|
21
|
+
* @returns A merged transcription result with full text, word-level timestamps, language, and duration.
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```ts
|
|
25
|
+
* const result = await transcribeParallel({
|
|
26
|
+
* provider: "openai",
|
|
27
|
+
* credentials: [{ apiKey: "sk-..." }],
|
|
28
|
+
* audio: fs.readFileSync("interview.mp3"),
|
|
29
|
+
* ffmpegPath: "ffmpeg",
|
|
30
|
+
* });
|
|
31
|
+
* console.log(result.text);
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export declare function transcribeParallel(params: TranscribeParallelParams): Promise<TranscribeResult>;
|
|
35
|
+
//# sourceMappingURL=transcribeParallel.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribeParallel.d.ts","sourceRoot":"","sources":["../src/transcribeParallel.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAQH,OAAO,KAAK,EAAE,gBAAgB,EAAoB,MAAM,6BAA6B,CAAC;AAStF,OAAO,KAAK,EAAE,wBAAwB,EAAwB,MAAM,YAAY,CAAC;AAOjF;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAsB,kBAAkB,CAAC,MAAM,EAAE,wBAAwB,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAoGpG"}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parallel audio transcription with automatic chunking.
|
|
3
|
+
*
|
|
4
|
+
* Long audio files are split into overlapping chunks via ffmpeg, each chunk is
|
|
5
|
+
* transcribed in parallel (with retry and credential rotation), and the results
|
|
6
|
+
* are merged back into a single {@link TranscribeResult} with word-level timing.
|
|
7
|
+
*
|
|
8
|
+
* Short audio (≤ one chunk) is transcribed directly without any splitting.
|
|
9
|
+
*/
|
|
10
|
+
import fs from "fs";
|
|
11
|
+
import path from "path";
|
|
12
|
+
import os from "os";
|
|
13
|
+
import { exec } from "child_process";
|
|
14
|
+
import { promisify } from "util";
|
|
15
|
+
import { getAudioDuration } from "@pico-brief/audio-duration";
|
|
16
|
+
import { KeyManager } from "./KeyManager.js";
|
|
17
|
+
import { createClientFromCredential } from "./clientFactory.js";
|
|
18
|
+
import { mergeTranscribeResults } from "./merge.js";
|
|
19
|
+
import { withRetry } from "./retry.js";
|
|
20
|
+
import { mapWithConcurrency } from "./concurrency.js";
|
|
21
|
+
import { generateRandomString, formatTimestamp, bufferToArrayBuffer } from "./helpers.js";
|
|
22
|
+
import { detectAudioFormat, audioFormatToExtension } from "./audioFormat.js";
|
|
23
|
+
const commander = promisify(exec);
|
|
24
|
+
/** Directory for temporary chunk files created during splitting. */
|
|
25
|
+
const workdir = os.tmpdir();
|
|
26
|
+
/**
|
|
27
|
+
* Transcribes an audio buffer in parallel using the specified provider.
|
|
28
|
+
*
|
|
29
|
+
* If the audio is longer than `targetChunkDuration` (default 300 s), it is split
|
|
30
|
+
* into overlapping chunks via ffmpeg. Each chunk is sent to the provider with
|
|
31
|
+
* automatic retry, credential rotation, and optional concurrency limiting.
|
|
32
|
+
* Overlapping words at chunk boundaries are deduplicated during the merge step.
|
|
33
|
+
*
|
|
34
|
+
* @param params - Configuration including provider, credentials, audio data, and options.
|
|
35
|
+
* @returns A merged transcription result with full text, word-level timestamps, language, and duration.
|
|
36
|
+
*
|
|
37
|
+
* @example
|
|
38
|
+
* ```ts
|
|
39
|
+
* const result = await transcribeParallel({
|
|
40
|
+
* provider: "openai",
|
|
41
|
+
* credentials: [{ apiKey: "sk-..." }],
|
|
42
|
+
* audio: fs.readFileSync("interview.mp3"),
|
|
43
|
+
* ffmpegPath: "ffmpeg",
|
|
44
|
+
* });
|
|
45
|
+
* console.log(result.text);
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
export async function transcribeParallel(params) {
|
|
49
|
+
const { audio, languages, ffmpegPath } = params;
|
|
50
|
+
const targetChunkDuration = params.targetChunkDuration ?? 300;
|
|
51
|
+
const chunkOverlap = params.chunkOverlap ?? 15;
|
|
52
|
+
const retryTimeoutMs = params.retryTimeoutMs ?? 5 * 60 * 1000;
|
|
53
|
+
const { onProgress, signal, maxConcurrency } = params;
|
|
54
|
+
const provider = params.provider;
|
|
55
|
+
const providerOptions = params.providerOptions;
|
|
56
|
+
const keyManager = new KeyManager(params.credentials);
|
|
57
|
+
/**
|
|
58
|
+
* Transcribes a single audio buffer using the provider with automatic retry
|
|
59
|
+
* and credential rotation.
|
|
60
|
+
*/
|
|
61
|
+
const transcribeBuffer = (buffer) => {
|
|
62
|
+
return withRetry({ keyManager, retryTimeoutMs, signal, operationName: "Transcription" }, (credential) => {
|
|
63
|
+
const client = createClientFromCredential(provider, credential);
|
|
64
|
+
return client.transcribe({
|
|
65
|
+
provider,
|
|
66
|
+
audio: buffer,
|
|
67
|
+
languages,
|
|
68
|
+
providerOptions,
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
};
|
|
72
|
+
// Compute total duration and determine how many chunks we need
|
|
73
|
+
const totalDuration = getAudioDuration(bufferToArrayBuffer(audio));
|
|
74
|
+
const numChunks = Math.max(1, Math.round(totalDuration / targetChunkDuration));
|
|
75
|
+
// Single chunk — transcribe directly, no ffmpeg splitting needed
|
|
76
|
+
if (numChunks === 1)
|
|
77
|
+
return transcribeBuffer(audio);
|
|
78
|
+
// Multiple chunks — split the audio file and transcribe each piece
|
|
79
|
+
const ext = audioFormatToExtension(detectAudioFormat(audio));
|
|
80
|
+
const tmpPrefix = path.join(workdir, generateRandomString());
|
|
81
|
+
const srcFile = `${tmpPrefix}_src.${ext}`;
|
|
82
|
+
const tmpFiles = [srcFile];
|
|
83
|
+
try {
|
|
84
|
+
fs.writeFileSync(srcFile, audio);
|
|
85
|
+
// D = ideal duration of each chunk (before adding overlap)
|
|
86
|
+
const D = totalDuration / numChunks;
|
|
87
|
+
// Build chunk file paths and ownership boundaries
|
|
88
|
+
const chunkFiles = [];
|
|
89
|
+
const chunkMetas = [];
|
|
90
|
+
for (let i = 0; i < numChunks; i++) {
|
|
91
|
+
const chunkFile = `${tmpPrefix}_chunk_${i}.${ext}`;
|
|
92
|
+
tmpFiles.push(chunkFile);
|
|
93
|
+
chunkFiles.push(chunkFile);
|
|
94
|
+
// Each chunk "owns" a time range. Words outside this range are discarded
|
|
95
|
+
// during merge to eliminate duplicates from overlapping regions.
|
|
96
|
+
const ownedStart = i * D;
|
|
97
|
+
const ownedEnd = i < numChunks - 1 ? (i + 1) * D : Infinity;
|
|
98
|
+
chunkMetas.push({ ownedStart, ownedEnd });
|
|
99
|
+
}
|
|
100
|
+
// Slice all chunks in parallel via ffmpeg (each writes to a separate file).
|
|
101
|
+
// Slices extend beyond the owned range by `chunkOverlap` on each side so
|
|
102
|
+
// that words straddling chunk boundaries are captured by both neighbors.
|
|
103
|
+
await Promise.all(chunkFiles.map((chunkFile, i) => {
|
|
104
|
+
const sliceStart = Math.max(0, i * D - chunkOverlap);
|
|
105
|
+
const sliceEnd = Math.min(totalDuration, (i + 1) * D + chunkOverlap);
|
|
106
|
+
const startTs = formatTimestamp(sliceStart);
|
|
107
|
+
const endTs = formatTimestamp(sliceEnd);
|
|
108
|
+
return commander(`"${ffmpegPath}" -ss ${startTs} -to ${endTs} -i "${srcFile}" -c copy "${chunkFile}"`);
|
|
109
|
+
}));
|
|
110
|
+
// Transcribe each chunk with optional concurrency limiting
|
|
111
|
+
let completed = 0;
|
|
112
|
+
const chunkBoundaries = await mapWithConcurrency(chunkFiles, async (chunkFile, i) => {
|
|
113
|
+
const chunkBuffer = fs.readFileSync(chunkFile);
|
|
114
|
+
const result = await transcribeBuffer(chunkBuffer);
|
|
115
|
+
onProgress?.(++completed, numChunks);
|
|
116
|
+
return { result, ownedStart: chunkMetas[i].ownedStart, ownedEnd: chunkMetas[i].ownedEnd };
|
|
117
|
+
}, maxConcurrency);
|
|
118
|
+
// Merge all chunk results, deduplicating words at overlap boundaries
|
|
119
|
+
return mergeTranscribeResults(chunkBoundaries);
|
|
120
|
+
}
|
|
121
|
+
finally {
|
|
122
|
+
// Clean up all temporary files regardless of success or failure
|
|
123
|
+
for (const f of tmpFiles) {
|
|
124
|
+
try {
|
|
125
|
+
fs.unlinkSync(f);
|
|
126
|
+
}
|
|
127
|
+
catch { /* ignore cleanup errors */ }
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
//# sourceMappingURL=transcribeParallel.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribeParallel.js","sourceRoot":"","sources":["../src/transcribeParallel.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,IAAI,EAAE,MAAM,eAAe,CAAC;AACrC,OAAO,EAAE,SAAS,EAAE,MAAM,MAAM,CAAC;AACjC,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAE9D,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,0BAA0B,EAAE,MAAM,oBAAoB,CAAC;AAChE,OAAO,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AAEpD,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAC1F,OAAO,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAG7E,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;AAElC,oEAAoE;AACpE,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;AAE5B;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,MAAgC;IACrE,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,GAAG,MAAM,CAAC;IAChD,MAAM,mBAAmB,GAAG,MAAM,CAAC,mBAAmB,IAAI,GAAG,CAAC;IAC9D,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,EAAE,CAAC;IAC/C,MAAM,cAAc,GAAG,MAAM,CAAC,cAAc,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC;IAC9D,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC;IAEtD,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IACjC,MAAM,eAAe,GAAG,MAAM,CAAC,eAAe,CAAC;IAE/C,MAAM,UAAU,GAAG,IAAI,UAAU,CAAuB,MAAM,CAAC,WAAqC,CAAC,CAAC;IAEtG;;;OAGG;IACH,MAAM,gBAAgB,GAAG,CAAC,MAAc,EAA6B,EAAE;QACnE,OAAO,SAAS,CACZ,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,EAAE,aAAa,EAAE,eAAe,EAAE,EACtE,CAAC,UAAU,EAAE,EAAE;YACX,MAAM,MAAM,GAAG,0BAA0B,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;YAChE,OAAO,MAAM,CAAC,UAAU,CAAC;gBACrB,QAAQ;gBACR,KAAK,EAAE,MAAM;gBACb,SAAS;gBACT,eAAe;aACa,CAAC,CAAC;QACtC,CAAC,CACJ,CAAC;IACN,CAAC,CAAC;IAEF,+DAA+D;IAC/D,MAAM,aAAa,GAAG,gBAAgB,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC,CAAC;IACnE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,mBAAmB,CAAC,CAAC,CAAC;IAE/E,iEAAiE;IACjE,IAAI,SAAS,KAAK,CAAC;QAAE,OAAO,gBAAgB,CAAC,KAAK,CAAC,CAAC;IAEpD,mEAAmE;IACnE,MAAM,GAAG,GAAG,sBAAsB,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,oBAAoB,EAAE,CAAC,CAAC;IAC7D,MAAM,OAAO,GAAG,GAAG,SAAS,QAAQ,GAAG,EAAE,CAAC;IAC1C,MAAM,QAAQ,GAAa,CAAC,OAAO,CAAC,CAAC;IAErC,IAAI,CAAC;QACD,EAAE,CAAC,aAAa,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEjC,2DAA2D;QAC3D,MAAM,CAAC,GAAG,aAAa,GAAG,SAAS,CAAC;QAEpC,kDAAkD;QAClD,MAAM,UAAU,GAAa,EAAE,CAAC;QAChC,MAAM,UAAU,GAA+C,EAAE,CAAC;QAElE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,SAAS,GAAG,GAAG,SAAS,UAAU,CAAC,IAAI,GAAG,EAAE,CAAC;YACnD,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACzB,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAE3B,yEAAyE;YACzE,iEAAiE;YACjE,MAAM,UAAU,GAAG,CAAC,GAAG,CAAC,CAAC;YACzB,MAAM,QAAQ,GAAG,CAAC,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;YAC5D,UAAU,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC,CAAC;QAC9C,CAAC;QAED,4EAA4E;QAC5E,yEAAyE;QACzE,yEAAyE;QACzE,MAAM,OAAO,CAAC,GAAG,CACb,UAAU,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE;YAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,CAAC;YACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,CAAC;YACrE,MAAM,OAAO,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC5C,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;YACxC,OAAO,SAAS,CAAC,IAAI,UAAU,SAAS,OAAO,QAAQ,KAAK,QAAQ,OAAO,cAAc,SAAS,GAAG,CAAC,CAAC;QAC3G,CAAC,CAAC,CACL,CAAC;QAEF,2DAA2D;QAC3D,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,eAAe,GAAG,MAAM,kBAAkB,CAC5C,UAAU,EACV,KAAK,EAAE,SAAS,EAAE,CAAC,EAA0B,EAAE;YAC3C,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;YAC/C,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC,WAAW,CAAC,CAAC;YACnD,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;YACrC,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;QAC9F,CAAC,EACD,cAAc,CACjB,CAAC;QAEF,qEAAqE;QACrE,OAAO,sBAAsB,CAAC,eAAe,CAAC,CAAC;IACnD,CAAC;YAAS,CAAC;QACP,gEAAgE;QAChE,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACvB,IAAI,CAAC;gBAAC,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC,CAAC,2BAA2B,CAAC,CAAC;QACnE,CAAC;IACL,CAAC;AACL,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for the parallel speech services API.
|
|
3
|
+
*
|
|
4
|
+
* This file defines the parameter and result types for {@link transcribeParallel}
|
|
5
|
+
* and {@link synthesizeParallel}. Discriminated unions are used so that TypeScript
|
|
6
|
+
* can narrow the credential and option types based on the chosen `provider`.
|
|
7
|
+
*/
|
|
8
|
+
import type { AssemblyAIConfig, AssemblyAITranscribeOptions, AzureConfig, AzureTranscribeOptions, AzureSynthesizeOptions, CartesiaConfig, CartesiaSynthesizeOptions, DeepgramConfig, DeepgramTranscribeOptions, DeepgramSynthesizeOptions, ElevenLabsConfig, ElevenLabsTranscribeOptions, ElevenLabsSynthesizeOptions, GoogleConfig, GoogleTranscribeOptions, GoogleSynthesizeOptions, OpenAIConfig, OpenAITranscribeOptions, OpenAISynthesizeOptions, PlayHTConfig, PlayHTSynthesizeOptions, RevAIConfig, RevAITranscribeOptions, SpeechmaticsConfig, SpeechmaticsTranscribeOptions } from "@pico-brief/speech-services";
|
|
9
|
+
/** Maps each transcription provider name to its credential/config type. */
|
|
10
|
+
export interface TranscribeProviderConfigMap {
|
|
11
|
+
azure: AzureConfig;
|
|
12
|
+
assemblyai: AssemblyAIConfig;
|
|
13
|
+
deepgram: DeepgramConfig;
|
|
14
|
+
elevenlabs: ElevenLabsConfig;
|
|
15
|
+
google: GoogleConfig;
|
|
16
|
+
openai: OpenAIConfig;
|
|
17
|
+
revai: RevAIConfig;
|
|
18
|
+
speechmatics: SpeechmaticsConfig;
|
|
19
|
+
}
|
|
20
|
+
/** Maps each synthesis provider name to its credential/config type. */
|
|
21
|
+
export interface SynthesizeProviderConfigMap {
|
|
22
|
+
azure: AzureConfig;
|
|
23
|
+
cartesia: CartesiaConfig;
|
|
24
|
+
deepgram: DeepgramConfig;
|
|
25
|
+
elevenlabs: ElevenLabsConfig;
|
|
26
|
+
google: GoogleConfig;
|
|
27
|
+
openai: OpenAIConfig;
|
|
28
|
+
playht: PlayHTConfig;
|
|
29
|
+
}
|
|
30
|
+
/** Maps each transcription provider name to its provider-specific options type. */
|
|
31
|
+
export interface TranscribeProviderOptionsMap {
|
|
32
|
+
azure: AzureTranscribeOptions;
|
|
33
|
+
assemblyai: AssemblyAITranscribeOptions;
|
|
34
|
+
deepgram: DeepgramTranscribeOptions;
|
|
35
|
+
elevenlabs: ElevenLabsTranscribeOptions;
|
|
36
|
+
google: GoogleTranscribeOptions;
|
|
37
|
+
openai: OpenAITranscribeOptions;
|
|
38
|
+
revai: RevAITranscribeOptions;
|
|
39
|
+
speechmatics: SpeechmaticsTranscribeOptions;
|
|
40
|
+
}
|
|
41
|
+
/** Maps each synthesis provider name to its provider-specific options type. */
|
|
42
|
+
export interface SynthesizeProviderOptionsMap {
|
|
43
|
+
azure: AzureSynthesizeOptions;
|
|
44
|
+
cartesia: CartesiaSynthesizeOptions;
|
|
45
|
+
deepgram: DeepgramSynthesizeOptions;
|
|
46
|
+
elevenlabs: ElevenLabsSynthesizeOptions;
|
|
47
|
+
google: GoogleSynthesizeOptions;
|
|
48
|
+
openai: OpenAISynthesizeOptions;
|
|
49
|
+
playht: PlayHTSynthesizeOptions;
|
|
50
|
+
}
|
|
51
|
+
/** Shared (provider-agnostic) fields for {@link TranscribeParallelParams}. */
|
|
52
|
+
type TranscribeParallelBase = {
|
|
53
|
+
/** Raw audio data to transcribe. */
|
|
54
|
+
audio: Buffer;
|
|
55
|
+
/** Language hints for the provider (e.g. `["en"]`). */
|
|
56
|
+
languages?: string[];
|
|
57
|
+
/** Target duration of each audio chunk in seconds. @default 300 */
|
|
58
|
+
targetChunkDuration?: number;
|
|
59
|
+
/** Overlap between adjacent chunks in seconds, to avoid losing words at boundaries. @default 15 */
|
|
60
|
+
chunkOverlap?: number;
|
|
61
|
+
/** Absolute path (or command name) for the ffmpeg binary used to split audio. */
|
|
62
|
+
ffmpegPath: string;
|
|
63
|
+
/** Maximum time in milliseconds to keep retrying transient failures. @default 300000 */
|
|
64
|
+
retryTimeoutMs?: number;
|
|
65
|
+
/** Maximum number of chunks to transcribe in parallel at once. Unlimited by default. */
|
|
66
|
+
maxConcurrency?: number;
|
|
67
|
+
/** Signal to cancel the operation early. */
|
|
68
|
+
signal?: AbortSignal;
|
|
69
|
+
/** Called each time a chunk finishes. `completed` counts up to `total`. */
|
|
70
|
+
onProgress?: (completed: number, total: number) => void;
|
|
71
|
+
};
|
|
72
|
+
/**
|
|
73
|
+
* Parameters for {@link transcribeParallel}.
|
|
74
|
+
*
|
|
75
|
+
* This is a discriminated union — setting `provider` to e.g. `"openai"` narrows
|
|
76
|
+
* `credentials` to `OpenAIConfig[]` and `providerOptions` to `OpenAITranscribeOptions`.
|
|
77
|
+
*/
|
|
78
|
+
export type TranscribeParallelParams = {
|
|
79
|
+
[P in keyof TranscribeProviderConfigMap]: TranscribeParallelBase & {
|
|
80
|
+
/** The speech-to-text provider to use. */
|
|
81
|
+
provider: P;
|
|
82
|
+
/** One or more credential objects for the chosen provider. Multiple credentials enable automatic rotation. */
|
|
83
|
+
credentials: TranscribeProviderConfigMap[P][];
|
|
84
|
+
/** Extra provider-specific options passed through to the underlying client. */
|
|
85
|
+
providerOptions?: TranscribeProviderOptionsMap[P];
|
|
86
|
+
};
|
|
87
|
+
}[keyof TranscribeProviderConfigMap];
|
|
88
|
+
/** A single text chunk to be synthesized, with optional per-chunk overrides. */
|
|
89
|
+
export interface SynthesizeChunkInput {
|
|
90
|
+
/** The text content to convert to speech. */
|
|
91
|
+
text: string;
|
|
92
|
+
/** Override the default gender for this chunk. */
|
|
93
|
+
gender?: "male" | "female";
|
|
94
|
+
/** Override the default voice ID/name for this chunk. */
|
|
95
|
+
voice?: string;
|
|
96
|
+
/** Override the default languages for this chunk. */
|
|
97
|
+
languages?: string[];
|
|
98
|
+
/** Override the default provider options for this chunk. Merged on top of the top-level `providerOptions`. */
|
|
99
|
+
providerOptions?: Record<string, unknown>;
|
|
100
|
+
}
|
|
101
|
+
/** Metadata about a single synthesized chunk within the combined audio. */
|
|
102
|
+
export interface SynthesizeChunkResult {
|
|
103
|
+
/** Zero-based index of this chunk in the original input array. */
|
|
104
|
+
chunkIndex: number;
|
|
105
|
+
/** Offset of this chunk within the combined audio, in seconds. */
|
|
106
|
+
startTime: number;
|
|
107
|
+
/** Duration of this chunk in seconds. */
|
|
108
|
+
duration: number;
|
|
109
|
+
/** The voice ID or name that was actually used. */
|
|
110
|
+
voice: string;
|
|
111
|
+
/** Language derived from the voice name or from the `languages` array. */
|
|
112
|
+
language?: string;
|
|
113
|
+
/** Audio format of this chunk (e.g. `"mp3"`). */
|
|
114
|
+
format: string;
|
|
115
|
+
/** The provider that produced this chunk. */
|
|
116
|
+
provider: string;
|
|
117
|
+
}
|
|
118
|
+
/** The combined result returned by {@link synthesizeParallel}. */
|
|
119
|
+
export interface SynthesizeParallelResult {
|
|
120
|
+
/** The concatenated audio data for all chunks. */
|
|
121
|
+
audio: Buffer;
|
|
122
|
+
/** Audio format of the combined output (e.g. `"mp3"`). */
|
|
123
|
+
format: string;
|
|
124
|
+
/** Per-chunk metadata including timing offsets and voices used. */
|
|
125
|
+
chunks: SynthesizeChunkResult[];
|
|
126
|
+
}
|
|
127
|
+
/** Shared (provider-agnostic) fields for {@link SynthesizeParallelParams}. */
|
|
128
|
+
type SynthesizeParallelBase = {
|
|
129
|
+
/** Array of text chunks to synthesize. */
|
|
130
|
+
chunks: SynthesizeChunkInput[];
|
|
131
|
+
/** Default voice gender applied to chunks that don't specify their own. */
|
|
132
|
+
gender?: "male" | "female";
|
|
133
|
+
/** Default voice ID/name applied to chunks that don't specify their own. */
|
|
134
|
+
voice?: string;
|
|
135
|
+
/** Default language hints applied to chunks that don't specify their own. */
|
|
136
|
+
languages?: string[];
|
|
137
|
+
/** Path to ffmpeg for concatenating chunk audio files. If omitted, `Buffer.concat` is used as a fallback. */
|
|
138
|
+
ffmpegPath?: string;
|
|
139
|
+
/** Maximum time in milliseconds to keep retrying transient failures. @default 300000 */
|
|
140
|
+
retryTimeoutMs?: number;
|
|
141
|
+
/** Maximum number of chunks to synthesize in parallel at once. Unlimited by default. */
|
|
142
|
+
maxConcurrency?: number;
|
|
143
|
+
/** Signal to cancel the operation early. */
|
|
144
|
+
signal?: AbortSignal;
|
|
145
|
+
/** Called each time a chunk finishes. `completed` counts up to `total`. */
|
|
146
|
+
onProgress?: (completed: number, total: number) => void;
|
|
147
|
+
};
|
|
148
|
+
/**
|
|
149
|
+
* Parameters for {@link synthesizeParallel}.
|
|
150
|
+
*
|
|
151
|
+
* This is a discriminated union — setting `provider` to e.g. `"elevenlabs"` narrows
|
|
152
|
+
* `credentials` to `ElevenLabsConfig[]` and `providerOptions` to `ElevenLabsSynthesizeOptions`.
|
|
153
|
+
*/
|
|
154
|
+
export type SynthesizeParallelParams = {
|
|
155
|
+
[P in keyof SynthesizeProviderConfigMap]: SynthesizeParallelBase & {
|
|
156
|
+
/** The text-to-speech provider to use. */
|
|
157
|
+
provider: P;
|
|
158
|
+
/** One or more credential objects for the chosen provider. Multiple credentials enable automatic rotation. */
|
|
159
|
+
credentials: SynthesizeProviderConfigMap[P][];
|
|
160
|
+
/** Extra provider-specific options passed through to the underlying client. */
|
|
161
|
+
providerOptions?: SynthesizeProviderOptionsMap[P];
|
|
162
|
+
};
|
|
163
|
+
}[keyof SynthesizeProviderConfigMap];
|
|
164
|
+
/** @internal Union of all transcription provider credential types. */
|
|
165
|
+
export type TranscribeCredential = TranscribeProviderConfigMap[keyof TranscribeProviderConfigMap];
|
|
166
|
+
/** @internal Union of all synthesis provider credential types. */
|
|
167
|
+
export type SynthesizeCredential = SynthesizeProviderConfigMap[keyof SynthesizeProviderConfigMap];
|
|
168
|
+
export {};
|
|
169
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EACR,gBAAgB,EAChB,2BAA2B,EAC3B,WAAW,EACX,sBAAsB,EACtB,sBAAsB,EACtB,cAAc,EACd,yBAAyB,EACzB,cAAc,EACd,yBAAyB,EACzB,yBAAyB,EACzB,gBAAgB,EAChB,2BAA2B,EAC3B,2BAA2B,EAC3B,YAAY,EACZ,uBAAuB,EACvB,uBAAuB,EACvB,YAAY,EACZ,uBAAuB,EACvB,uBAAuB,EACvB,YAAY,EACZ,uBAAuB,EACvB,WAAW,EACX,sBAAsB,EACtB,kBAAkB,EAClB,6BAA6B,EAChC,MAAM,6BAA6B,CAAC;AAIrC,2EAA2E;AAC3E,MAAM,WAAW,2BAA2B;IACxC,KAAK,EAAE,WAAW,CAAC;IACnB,UAAU,EAAE,gBAAgB,CAAC;IAC7B,QAAQ,EAAE,cAAc,CAAC;IACzB,UAAU,EAAE,gBAAgB,CAAC;IAC7B,MAAM,EAAE,YAAY,CAAC;IACrB,MAAM,EAAE,YAAY,CAAC;IACrB,KAAK,EAAE,WAAW,CAAC;IACnB,YAAY,EAAE,kBAAkB,CAAC;CACpC;AAED,uEAAuE;AACvE,MAAM,WAAW,2BAA2B;IACxC,KAAK,EAAE,WAAW,CAAC;IACnB,QAAQ,EAAE,cAAc,CAAC;IACzB,QAAQ,EAAE,cAAc,CAAC;IACzB,UAAU,EAAE,gBAAgB,CAAC;IAC7B,MAAM,EAAE,YAAY,CAAC;IACrB,MAAM,EAAE,YAAY,CAAC;IACrB,MAAM,EAAE,YAAY,CAAC;CACxB;AAID,mFAAmF;AACnF,MAAM,WAAW,4BAA4B;IACzC,KAAK,EAAE,sBAAsB,CAAC;IAC9B,UAAU,EAAE,2BAA2B,CAAC;IACxC,QAAQ,EAAE,yBAAyB,CAAC;IACpC,UAAU,EAAE,2BAA2B,CAAC;IACxC,MAAM,EAAE,uBAAuB,CAAC;IAChC,MAAM,EAAE,uBAAuB,CAAC;IAChC,KAAK,EAAE,sBAAsB,CAAC;IAC9B,YAAY,EAAE,6BAA6B,CAAC;CAC/C;AAED,+EAA+E;AAC/E,MAAM,WAAW,4BAA4B;IACzC,KAAK,EAAE,sBAAsB,CAAC;IAC9B,QAAQ,EAAE,yBAAyB,CAAC;IACpC,QAAQ,EAAE,yBAAyB,CAAC;IACpC,UAAU,EAAE,2BAA2B,CAAC;IACxC,MAAM,EAAE,uBAAuB,CAAC;IAChC,MAAM,EAAE,uBAAuB,CAAC;IAChC,MAAM,EAAE,uBAAuB,CAAC;CACnC;AAID,8EAA8E;AAC9E,KAAK,sBAAsB,GAAG;IAC1B,oCAAoC;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,uDAAuD;IACvD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,mEAAmE;IACnE,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,mGAAmG;IACnG,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,iFAAiF;IACjF,UAAU,EAAE,MAAM,CAAC;IACnB,wFAAwF;IACxF,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,wFAAwF;IACxF,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,4CAA4C;IAC5C,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,2EAA2E;IAC3E,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CAC3D,CAAC;AAEF;;;;;GAKG;AACH,MAAM,MAAM,wBAAwB,GAAG;KAClC,CAAC,IAAI,MAAM,2BAA2B,GAAG,sBAAsB,GAAG;QAC/D,0CAA0C;QAC1C,QAAQ,EAAE,CAAC,CAAC;QACZ,8GAA8G;QAC9G,WAAW,EAAE,2BAA2B,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9C,+EAA+E;QAC/E,eAAe,CAAC,EAAE,4BAA4B,CAAC,CAAC,CAAC,CAAC;KACrD;CACJ,CAAC,MAAM,2BAA2B,CAAC,CAAC;AAIrC,gFAAgF;AAChF,MAAM,WAAW,oBAAoB;IACjC,6CAA6C;IAC7C,IAAI,EAAE,MAAM,CAAC;IACb,kDAAkD;IAClD,MAAM,CAAC,EAAE,MAAM,GAAG,QAAQ,CAAC;IAC3B,yDAAyD;IACzD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,qDAAqD;IACrD,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,8GAA8G;IAC9G,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC7C;AAED,2EAA2E;AAC3E,MAAM,WAAW,qBAAqB;IAClC,kEAAkE;IAClE,UAAU,EAAE,MAAM,CAAC;IACnB,kEAAkE;IAClE,SAAS,EAAE,MAAM,CAAC;IAClB,yCAAyC;IACzC,QAAQ,EAAE,MAAM,CAAC;IACjB,mDAAmD;IACnD,KAAK,EAAE,MAAM,CAAC;IACd,0EAA0E;IAC1E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iDAAiD;IACjD,MAAM,EAAE,MAAM,CAAC;IACf,6CAA6C;IAC7C,QAAQ,EAAE,MAAM,CAAC;CACpB;AAED,kEAAkE;AAClE,MAAM,WAAW,wBAAwB;IACrC,kDAAkD;IAClD,KAAK,EAAE,MAAM,CAAC;IACd,0DAA0D;IAC1D,MAAM,EAAE,MAAM,CAAC;IACf,mEAAmE;IACnE,MAAM,EAAE,qBAAqB,EAAE,CAAC;CACnC;AAED,8EAA8E;AAC9E,KAAK,sBAAsB,GAAG;IAC1B,0CAA0C;IAC1C,MAAM,EAAE,oBAAoB,EAAE,CAAC;IAC/B,2EAA2E;IAC3E,MAAM,CAAC,EAAE,MAAM,GAAG,QAAQ,CAAC;IAC3B,4EAA4E;IAC5E,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,6EAA6E;IAC7E,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,6GAA6G;IAC7G,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wFAAwF;IACxF,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,wFAAwF;IACxF,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,4CAA4C;IAC5C,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,2EAA2E;IAC3E,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CAC3D,CAAC;AAEF;;;;;GAKG;AACH,MAAM,MAAM,wBAAwB,GAAG;KAClC,CAAC,IAAI,MAAM,2BAA2B,GAAG,sBAAsB,GAAG;QAC/D,0CAA0C;QAC1C,QAAQ,EAAE,CAAC,CAAC;QACZ,8GAA8G;QAC9G,WAAW,EAAE,2BAA2B,CAAC,CAAC,CAAC,EAAE,CAAC;QAC9C,+EAA+E;QAC/E,eAAe,CAAC,EAAE,4BAA4B,CAAC,CAAC,CAAC,CAAC;KACrD;CACJ,CAAC,MAAM,2BAA2B,CAAC,CAAC;AAIrC,sEAAsE;AACtE,MAAM,MAAM,oBAAoB,GAAG,2BAA2B,CAAC,MAAM,2BAA2B,CAAC,CAAC;AAClG,kEAAkE;AAClE,MAAM,MAAM,oBAAoB,GAAG,2BAA2B,CAAC,MAAM,2BAA2B,CAAC,CAAC"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for the parallel speech services API.
|
|
3
|
+
*
|
|
4
|
+
* This file defines the parameter and result types for {@link transcribeParallel}
|
|
5
|
+
* and {@link synthesizeParallel}. Discriminated unions are used so that TypeScript
|
|
6
|
+
* can narrow the credential and option types based on the chosen `provider`.
|
|
7
|
+
*/
|
|
8
|
+
export {};
|
|
9
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG"}
|
package/package.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@pico-brief/speech-services-parallel",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Parallel STT/TTS orchestration with credential rotation, built on @pico-brief/speech-services",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"dist"
|
|
16
|
+
],
|
|
17
|
+
"scripts": {
|
|
18
|
+
"build": "tsc",
|
|
19
|
+
"dev": "tsc --watch",
|
|
20
|
+
"test": "npm run build && tsx --test test/*.test.ts",
|
|
21
|
+
"prepublishOnly": "npm run build"
|
|
22
|
+
},
|
|
23
|
+
"repository": {
|
|
24
|
+
"type": "git",
|
|
25
|
+
"url": "https://github.com/PicoBrief/speech-services-parallel.git"
|
|
26
|
+
},
|
|
27
|
+
"engines": {
|
|
28
|
+
"node": ">=18.0.0"
|
|
29
|
+
},
|
|
30
|
+
"keywords": [
|
|
31
|
+
"speech-to-text",
|
|
32
|
+
"text-to-speech",
|
|
33
|
+
"stt",
|
|
34
|
+
"tts",
|
|
35
|
+
"parallel",
|
|
36
|
+
"credential-rotation",
|
|
37
|
+
"transcription",
|
|
38
|
+
"speech-synthesis"
|
|
39
|
+
],
|
|
40
|
+
"license": "MIT",
|
|
41
|
+
"dependencies": {
|
|
42
|
+
"@pico-brief/audio-duration": "^1.0.0",
|
|
43
|
+
"@pico-brief/speech-services": "^1.0.0"
|
|
44
|
+
},
|
|
45
|
+
"devDependencies": {
|
|
46
|
+
"@types/node": "^22.0.0",
|
|
47
|
+
"dotenv": "^17.3.1",
|
|
48
|
+
"tsx": "^4.21.0",
|
|
49
|
+
"typescript": "^5.0.0"
|
|
50
|
+
}
|
|
51
|
+
}
|