@pico-brief/speech-services 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +177 -0
- package/dist/audio-sampler.d.ts +9 -0
- package/dist/audio-sampler.d.ts.map +1 -0
- package/dist/audio-sampler.js +135 -0
- package/dist/audio-sampler.js.map +1 -0
- package/dist/client.d.ts +3 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +23 -0
- package/dist/client.js.map +1 -0
- package/dist/detect-locale.d.ts +3 -0
- package/dist/detect-locale.d.ts.map +1 -0
- package/dist/detect-locale.js +73 -0
- package/dist/detect-locale.js.map +1 -0
- package/dist/errors.d.ts +7 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +13 -0
- package/dist/errors.js.map +1 -0
- package/dist/fetch-voices.d.ts +3 -0
- package/dist/fetch-voices.d.ts.map +1 -0
- package/dist/fetch-voices.js +50 -0
- package/dist/fetch-voices.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +12 -0
- package/dist/index.js.map +1 -0
- package/dist/providers/assemblyai/index.d.ts +2 -0
- package/dist/providers/assemblyai/index.d.ts.map +1 -0
- package/dist/providers/assemblyai/index.js +2 -0
- package/dist/providers/assemblyai/index.js.map +1 -0
- package/dist/providers/assemblyai/transcribe.d.ts +3 -0
- package/dist/providers/assemblyai/transcribe.d.ts.map +1 -0
- package/dist/providers/assemblyai/transcribe.js +92 -0
- package/dist/providers/assemblyai/transcribe.js.map +1 -0
- package/dist/providers/assemblyai/types.d.ts +18 -0
- package/dist/providers/assemblyai/types.d.ts.map +1 -0
- package/dist/providers/assemblyai/types.js +2 -0
- package/dist/providers/assemblyai/types.js.map +1 -0
- package/dist/providers/azure/batch-transcribe.d.ts +3 -0
- package/dist/providers/azure/batch-transcribe.d.ts.map +1 -0
- package/dist/providers/azure/batch-transcribe.js +118 -0
- package/dist/providers/azure/batch-transcribe.js.map +1 -0
- package/dist/providers/azure/detect-languages.d.ts +3 -0
- package/dist/providers/azure/detect-languages.d.ts.map +1 -0
- package/dist/providers/azure/detect-languages.js +15 -0
- package/dist/providers/azure/detect-languages.js.map +1 -0
- package/dist/providers/azure/fetch-voices.d.ts +3 -0
- package/dist/providers/azure/fetch-voices.d.ts.map +1 -0
- package/dist/providers/azure/fetch-voices.js +22 -0
- package/dist/providers/azure/fetch-voices.js.map +1 -0
- package/dist/providers/azure/helpers.d.ts +2 -0
- package/dist/providers/azure/helpers.d.ts.map +1 -0
- package/dist/providers/azure/helpers.js +9 -0
- package/dist/providers/azure/helpers.js.map +1 -0
- package/dist/providers/azure/index.d.ts +5 -0
- package/dist/providers/azure/index.d.ts.map +1 -0
- package/dist/providers/azure/index.js +5 -0
- package/dist/providers/azure/index.js.map +1 -0
- package/dist/providers/azure/synthesize.d.ts +3 -0
- package/dist/providers/azure/synthesize.d.ts.map +1 -0
- package/dist/providers/azure/synthesize.js +57 -0
- package/dist/providers/azure/synthesize.js.map +1 -0
- package/dist/providers/azure/transcribe.d.ts +5 -0
- package/dist/providers/azure/transcribe.d.ts.map +1 -0
- package/dist/providers/azure/transcribe.js +75 -0
- package/dist/providers/azure/transcribe.js.map +1 -0
- package/dist/providers/azure/types.d.ts +48 -0
- package/dist/providers/azure/types.d.ts.map +1 -0
- package/dist/providers/azure/types.js +2 -0
- package/dist/providers/azure/types.js.map +1 -0
- package/dist/providers/cartesia/fetch-voices.d.ts +3 -0
- package/dist/providers/cartesia/fetch-voices.d.ts.map +1 -0
- package/dist/providers/cartesia/fetch-voices.js +37 -0
- package/dist/providers/cartesia/fetch-voices.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +3 -0
- package/dist/providers/cartesia/index.d.ts.map +1 -0
- package/dist/providers/cartesia/index.js +3 -0
- package/dist/providers/cartesia/index.js.map +1 -0
- package/dist/providers/cartesia/synthesize.d.ts +3 -0
- package/dist/providers/cartesia/synthesize.d.ts.map +1 -0
- package/dist/providers/cartesia/synthesize.js +54 -0
- package/dist/providers/cartesia/synthesize.js.map +1 -0
- package/dist/providers/cartesia/types.d.ts +14 -0
- package/dist/providers/cartesia/types.d.ts.map +1 -0
- package/dist/providers/cartesia/types.js +3 -0
- package/dist/providers/cartesia/types.js.map +1 -0
- package/dist/providers/deepgram/fetch-voices.d.ts +3 -0
- package/dist/providers/deepgram/fetch-voices.d.ts.map +1 -0
- package/dist/providers/deepgram/fetch-voices.js +27 -0
- package/dist/providers/deepgram/fetch-voices.js.map +1 -0
- package/dist/providers/deepgram/index.d.ts +4 -0
- package/dist/providers/deepgram/index.d.ts.map +1 -0
- package/dist/providers/deepgram/index.js +4 -0
- package/dist/providers/deepgram/index.js.map +1 -0
- package/dist/providers/deepgram/synthesize.d.ts +3 -0
- package/dist/providers/deepgram/synthesize.d.ts.map +1 -0
- package/dist/providers/deepgram/synthesize.js +31 -0
- package/dist/providers/deepgram/synthesize.js.map +1 -0
- package/dist/providers/deepgram/transcribe.d.ts +3 -0
- package/dist/providers/deepgram/transcribe.d.ts.map +1 -0
- package/dist/providers/deepgram/transcribe.js +53 -0
- package/dist/providers/deepgram/transcribe.js.map +1 -0
- package/dist/providers/deepgram/types.d.ts +39 -0
- package/dist/providers/deepgram/types.d.ts.map +1 -0
- package/dist/providers/deepgram/types.js +2 -0
- package/dist/providers/deepgram/types.js.map +1 -0
- package/dist/providers/elevenlabs/fetch-voices.d.ts +3 -0
- package/dist/providers/elevenlabs/fetch-voices.d.ts.map +1 -0
- package/dist/providers/elevenlabs/fetch-voices.js +27 -0
- package/dist/providers/elevenlabs/fetch-voices.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +4 -0
- package/dist/providers/elevenlabs/index.d.ts.map +1 -0
- package/dist/providers/elevenlabs/index.js +4 -0
- package/dist/providers/elevenlabs/index.js.map +1 -0
- package/dist/providers/elevenlabs/synthesize.d.ts +3 -0
- package/dist/providers/elevenlabs/synthesize.d.ts.map +1 -0
- package/dist/providers/elevenlabs/synthesize.js +43 -0
- package/dist/providers/elevenlabs/synthesize.js.map +1 -0
- package/dist/providers/elevenlabs/transcribe.d.ts +3 -0
- package/dist/providers/elevenlabs/transcribe.d.ts.map +1 -0
- package/dist/providers/elevenlabs/transcribe.js +50 -0
- package/dist/providers/elevenlabs/transcribe.js.map +1 -0
- package/dist/providers/elevenlabs/types.d.ts +24 -0
- package/dist/providers/elevenlabs/types.d.ts.map +1 -0
- package/dist/providers/elevenlabs/types.js +2 -0
- package/dist/providers/elevenlabs/types.js.map +1 -0
- package/dist/providers/google/fetch-voices.d.ts +3 -0
- package/dist/providers/google/fetch-voices.d.ts.map +1 -0
- package/dist/providers/google/fetch-voices.js +28 -0
- package/dist/providers/google/fetch-voices.js.map +1 -0
- package/dist/providers/google/helpers.d.ts +10 -0
- package/dist/providers/google/helpers.d.ts.map +1 -0
- package/dist/providers/google/helpers.js +15 -0
- package/dist/providers/google/helpers.js.map +1 -0
- package/dist/providers/google/index.d.ts +4 -0
- package/dist/providers/google/index.d.ts.map +1 -0
- package/dist/providers/google/index.js +4 -0
- package/dist/providers/google/index.js.map +1 -0
- package/dist/providers/google/synthesize.d.ts +3 -0
- package/dist/providers/google/synthesize.d.ts.map +1 -0
- package/dist/providers/google/synthesize.js +35 -0
- package/dist/providers/google/synthesize.js.map +1 -0
- package/dist/providers/google/transcribe.d.ts +3 -0
- package/dist/providers/google/transcribe.d.ts.map +1 -0
- package/dist/providers/google/transcribe.js +117 -0
- package/dist/providers/google/transcribe.js.map +1 -0
- package/dist/providers/google/types.d.ts +43 -0
- package/dist/providers/google/types.d.ts.map +1 -0
- package/dist/providers/google/types.js +3 -0
- package/dist/providers/google/types.js.map +1 -0
- package/dist/providers/openai/fetch-voices.d.ts +3 -0
- package/dist/providers/openai/fetch-voices.d.ts.map +1 -0
- package/dist/providers/openai/fetch-voices.js +14 -0
- package/dist/providers/openai/fetch-voices.js.map +1 -0
- package/dist/providers/openai/index.d.ts +4 -0
- package/dist/providers/openai/index.d.ts.map +1 -0
- package/dist/providers/openai/index.js +4 -0
- package/dist/providers/openai/index.js.map +1 -0
- package/dist/providers/openai/synthesize.d.ts +3 -0
- package/dist/providers/openai/synthesize.d.ts.map +1 -0
- package/dist/providers/openai/synthesize.js +37 -0
- package/dist/providers/openai/synthesize.js.map +1 -0
- package/dist/providers/openai/transcribe.d.ts +3 -0
- package/dist/providers/openai/transcribe.d.ts.map +1 -0
- package/dist/providers/openai/transcribe.js +58 -0
- package/dist/providers/openai/transcribe.js.map +1 -0
- package/dist/providers/openai/types.d.ts +18 -0
- package/dist/providers/openai/types.d.ts.map +1 -0
- package/dist/providers/openai/types.js +2 -0
- package/dist/providers/openai/types.js.map +1 -0
- package/dist/providers/playht/fetch-voices.d.ts +3 -0
- package/dist/providers/playht/fetch-voices.d.ts.map +1 -0
- package/dist/providers/playht/fetch-voices.js +25 -0
- package/dist/providers/playht/fetch-voices.js.map +1 -0
- package/dist/providers/playht/index.d.ts +3 -0
- package/dist/providers/playht/index.d.ts.map +1 -0
- package/dist/providers/playht/index.js +3 -0
- package/dist/providers/playht/index.js.map +1 -0
- package/dist/providers/playht/synthesize.d.ts +3 -0
- package/dist/providers/playht/synthesize.d.ts.map +1 -0
- package/dist/providers/playht/synthesize.js +41 -0
- package/dist/providers/playht/synthesize.js.map +1 -0
- package/dist/providers/playht/types.d.ts +11 -0
- package/dist/providers/playht/types.d.ts.map +1 -0
- package/dist/providers/playht/types.js +2 -0
- package/dist/providers/playht/types.js.map +1 -0
- package/dist/providers/revai/index.d.ts +2 -0
- package/dist/providers/revai/index.d.ts.map +1 -0
- package/dist/providers/revai/index.js +2 -0
- package/dist/providers/revai/index.js.map +1 -0
- package/dist/providers/revai/transcribe.d.ts +3 -0
- package/dist/providers/revai/transcribe.d.ts.map +1 -0
- package/dist/providers/revai/transcribe.js +97 -0
- package/dist/providers/revai/transcribe.js.map +1 -0
- package/dist/providers/revai/types.d.ts +23 -0
- package/dist/providers/revai/types.d.ts.map +1 -0
- package/dist/providers/revai/types.js +2 -0
- package/dist/providers/revai/types.js.map +1 -0
- package/dist/providers/speechmatics/detect-languages.d.ts +3 -0
- package/dist/providers/speechmatics/detect-languages.d.ts.map +1 -0
- package/dist/providers/speechmatics/detect-languages.js +24 -0
- package/dist/providers/speechmatics/detect-languages.js.map +1 -0
- package/dist/providers/speechmatics/helpers.d.ts +4 -0
- package/dist/providers/speechmatics/helpers.d.ts.map +1 -0
- package/dist/providers/speechmatics/helpers.js +57 -0
- package/dist/providers/speechmatics/helpers.js.map +1 -0
- package/dist/providers/speechmatics/index.d.ts +3 -0
- package/dist/providers/speechmatics/index.d.ts.map +1 -0
- package/dist/providers/speechmatics/index.js +3 -0
- package/dist/providers/speechmatics/index.js.map +1 -0
- package/dist/providers/speechmatics/transcribe.d.ts +3 -0
- package/dist/providers/speechmatics/transcribe.d.ts.map +1 -0
- package/dist/providers/speechmatics/transcribe.js +61 -0
- package/dist/providers/speechmatics/transcribe.js.map +1 -0
- package/dist/providers/speechmatics/types.d.ts +27 -0
- package/dist/providers/speechmatics/types.d.ts.map +1 -0
- package/dist/providers/speechmatics/types.js +2 -0
- package/dist/providers/speechmatics/types.js.map +1 -0
- package/dist/synthesize.d.ts +4 -0
- package/dist/synthesize.d.ts.map +1 -0
- package/dist/synthesize.js +73 -0
- package/dist/synthesize.js.map +1 -0
- package/dist/transcribe.d.ts +3 -0
- package/dist/transcribe.d.ts.map +1 -0
- package/dist/transcribe.js +55 -0
- package/dist/transcribe.js.map +1 -0
- package/dist/types.d.ts +361 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +19 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +101 -0
- package/dist/utils.js.map +1 -0
- package/dist/voice-cache.d.ts +9 -0
- package/dist/voice-cache.d.ts.map +1 -0
- package/dist/voice-cache.js +21 -0
- package/dist/voice-cache.js.map +1 -0
- package/dist/voice-resolver.d.ts +7 -0
- package/dist/voice-resolver.d.ts.map +1 -0
- package/dist/voice-resolver.js +82 -0
- package/dist/voice-resolver.js.map +1 -0
- package/package.json +100 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { isUrl, normalizeLanguageCode, poll } from "../../utils.js";
|
|
3
|
+
import { BASE_URL } from "./types.js";
|
|
4
|
+
export async function transcribe(config, audio, languages, options = {}) {
|
|
5
|
+
const { speechModel = "universal", pollInterval = 3000, timeout = 300_000, } = options;
|
|
6
|
+
// Step 1: Get an audio URL
|
|
7
|
+
let audioUrl;
|
|
8
|
+
if (typeof audio === "string" && isUrl(audio)) {
|
|
9
|
+
audioUrl = audio;
|
|
10
|
+
}
|
|
11
|
+
else if (Buffer.isBuffer(audio)) {
|
|
12
|
+
audioUrl = await uploadAudio(config, audio);
|
|
13
|
+
}
|
|
14
|
+
else {
|
|
15
|
+
throw new SpeechServiceError("audio must be a Buffer or a URL string", "INVALID_INPUT", "assemblyai");
|
|
16
|
+
}
|
|
17
|
+
// Step 2: Submit transcription
|
|
18
|
+
const body = {
|
|
19
|
+
audio_url: audioUrl,
|
|
20
|
+
speech_model: speechModel,
|
|
21
|
+
};
|
|
22
|
+
if (languages && languages.length > 1) {
|
|
23
|
+
body.language_codes = languages.map((l) => l.replace("-", "_").toLowerCase());
|
|
24
|
+
}
|
|
25
|
+
else if (languages && languages.length === 1) {
|
|
26
|
+
body.language_code = languages[0].replace("-", "_").toLowerCase();
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
body.language_detection = true;
|
|
30
|
+
}
|
|
31
|
+
const submitResponse = await fetch(`${BASE_URL}/v2/transcript`, {
|
|
32
|
+
method: "POST",
|
|
33
|
+
headers: {
|
|
34
|
+
Authorization: config.apiKey,
|
|
35
|
+
"Content-Type": "application/json",
|
|
36
|
+
},
|
|
37
|
+
body: JSON.stringify(body),
|
|
38
|
+
});
|
|
39
|
+
if (!submitResponse.ok) {
|
|
40
|
+
const errorText = await submitResponse.text();
|
|
41
|
+
throw new SpeechServiceError(`AssemblyAI transcription submission failed: ${errorText}`, "API_ERROR", "assemblyai", submitResponse.status);
|
|
42
|
+
}
|
|
43
|
+
const submitResult = (await submitResponse.json());
|
|
44
|
+
if (submitResult.status === "error") {
|
|
45
|
+
throw new SpeechServiceError(`AssemblyAI transcription failed: ${submitResult.error}`, "TRANSCRIPTION_FAILED", "assemblyai");
|
|
46
|
+
}
|
|
47
|
+
// Step 3: Poll until complete
|
|
48
|
+
const transcriptId = submitResult.id;
|
|
49
|
+
const finalResult = await poll(async () => {
|
|
50
|
+
const res = await fetch(`${BASE_URL}/v2/transcript/${transcriptId}`, {
|
|
51
|
+
headers: { Authorization: config.apiKey },
|
|
52
|
+
});
|
|
53
|
+
if (!res.ok) {
|
|
54
|
+
throw new SpeechServiceError(`AssemblyAI polling failed: ${res.status}`, "API_ERROR", "assemblyai", res.status);
|
|
55
|
+
}
|
|
56
|
+
return res.json();
|
|
57
|
+
}, (result) => result.status === "completed" || result.status === "error", pollInterval, timeout, "assemblyai");
|
|
58
|
+
if (finalResult.status === "error") {
|
|
59
|
+
throw new SpeechServiceError(`AssemblyAI transcription failed: ${finalResult.error}`, "TRANSCRIPTION_FAILED", "assemblyai");
|
|
60
|
+
}
|
|
61
|
+
// Step 4: Normalize the result
|
|
62
|
+
const words = (finalResult.words ?? []).map((w) => ({
|
|
63
|
+
text: w.text,
|
|
64
|
+
startTime: w.start / 1000,
|
|
65
|
+
endTime: w.end / 1000,
|
|
66
|
+
confidence: w.confidence,
|
|
67
|
+
speaker: w.speaker ?? undefined,
|
|
68
|
+
}));
|
|
69
|
+
return {
|
|
70
|
+
text: finalResult.text ?? "",
|
|
71
|
+
words,
|
|
72
|
+
language: normalizeLanguageCode(finalResult.language_code ?? ""),
|
|
73
|
+
duration: finalResult.audio_duration ?? 0,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
async function uploadAudio(config, audio) {
|
|
77
|
+
const response = await fetch(`${BASE_URL}/v2/upload`, {
|
|
78
|
+
method: "POST",
|
|
79
|
+
headers: {
|
|
80
|
+
Authorization: config.apiKey,
|
|
81
|
+
"Content-Type": "application/octet-stream",
|
|
82
|
+
},
|
|
83
|
+
body: audio,
|
|
84
|
+
});
|
|
85
|
+
if (!response.ok) {
|
|
86
|
+
const errorText = await response.text();
|
|
87
|
+
throw new SpeechServiceError(`AssemblyAI upload failed: ${errorText}`, "UPLOAD_FAILED", "assemblyai", response.status);
|
|
88
|
+
}
|
|
89
|
+
const result = (await response.json());
|
|
90
|
+
return result.upload_url;
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=transcribe.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribe.js","sourceRoot":"","sources":["../../../src/providers/assemblyai/transcribe.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,qBAAqB,EAAE,IAAI,EAAE,MAAM,gBAAgB,CAAC;AACpE,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAGtC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC5B,MAAwB,EACxB,KAAsB,EACtB,SAA+B,EAC/B,UAAuC,EAAE;IAEzC,MAAM,EACF,WAAW,GAAG,WAAW,EACzB,YAAY,GAAG,IAAI,EACnB,OAAO,GAAG,OAAO,GACpB,GAAG,OAAO,CAAC;IAEZ,2BAA2B;IAC3B,IAAI,QAAgB,CAAC;IACrB,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5C,QAAQ,GAAG,KAAK,CAAC;IACrB,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAChC,QAAQ,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IAChD,CAAC;SAAM,CAAC;QACJ,MAAM,IAAI,kBAAkB,CACxB,wCAAwC,EACxC,eAAe,EACf,YAAY,CACf,CAAC;IACN,CAAC;IAED,+BAA+B;IAC/B,MAAM,IAAI,GAA4B;QAClC,SAAS,EAAE,QAAQ;QACnB,YAAY,EAAE,WAAW;KAC5B,CAAC;IAEF,IAAI,SAAS,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,IAAI,CAAC,cAAc,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;IAClF,CAAC;SAAM,IAAI,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7C,IAAI,CAAC,aAAa,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;IACtE,CAAC;SAAM,CAAC;QACJ,IAAI,CAAC,kBAAkB,GAAG,IAAI,CAAC;IACnC,CAAC;IAED,MAAM,cAAc,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,gBAAgB,EAAE;QAC5D,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,aAAa,EAAE,MAAM,CAAC,MAAM;YAC5B,cAAc,EAAE,kBAAkB;SACrC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;KAC7B,CAAC,CAAC;IAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;QACrB,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,kBAAkB,CACxB,+CAA+C,SAAS,EAAE,EAC1D,WAAW,EACX,YAAY,EACZ,cAAc,CAAC,MAAM,CACxB,CAAC;IACN,CAAC;IAED,MAAM,YAAY,GAAG,CAAC,MAAM,cAAc,CAAC,IAAI,EAAE,CAAmD,CAAC;IAErG,IAAI,YAAY,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;QAClC,MAAM,IAAI,kBAAkB,CACxB,oCAAoC,YAAY,CAAC,KAAK,EAAE,EACxD,sBAAsB,EACtB,YAAY,CACf,CAAC;IACN,CAAC;IAED,8BAA8B;IAC9B,MAAM,YAAY,GAAG,YAAY,CAAC,EAAE,CAAC;IAErC,MAAM,WAAW,GAAG,MAAM,IAAI,CAC1B,KAAK,IAAI,EAAE;QACP,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,kBAAkB,YAAY,EAAE,EAAE;YACjE,OAAO,EAAE,EAAE,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE;SAC5C,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACV,MAAM,IAAI,kBAAkB,CACxB,8BAA8B,GAAG,CAAC,MAAM,EAAE,EAC1C,WAAW,EACX,YAAY,EACZ,GAAG,CAAC,MAAM,CACb,CAAC;QACN,CAAC;QACD,OAAO,GAAG,CAAC,IAAI,EAA2C,CAAC;IAC/D,CAAC,EACD,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,MAAM,KAAK,OAAO,EACtE,YAAY,EACZ,OAAO,EACP,YAAY,CACf,CAAC;IAEF,IAAI,WAAW,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;QACjC,MAAM,IAAI,kBAAkB,CACxB,oCAAoC,WAAW,CAAC,KAAK,EAAE,EACvD,sBAAsB,EACtB,YAAY,CACf,CAAC;IACN,CAAC;IAED,+BAA+B;IAC/B,MAAM,KAAK,GAAsB,CAAC,WAAW,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACnE,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,SAAS,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;QACzB,OAAO,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;QACrB,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,SAAS;KAClC,CAAC,CAAC,CAAC;IAEJ,OAAO;QACH,IAAI,EAAE,WAAW,CAAC,IAAI,IAAI,EAAE;QAC5B,KAAK;QACL,QAAQ,EAAE,qBAAqB,CAAC,WAAW,CAAC,aAAa,IAAI,EAAE,CAAC;QAChE,QAAQ,EAAE,WAAW,CAAC,cAAc,IAAI,CAAC;KAC5C,CAAC;AACN,CAAC;AAED,KAAK,UAAU,WAAW,CAAC,MAAwB,EAAE,KAAa;IAC9D,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,QAAQ,YAAY,EAAE;QAClD,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,aAAa,EAAE,MAAM,CAAC,MAAM;YAC5B,cAAc,EAAE,0BAA0B;SAC7C;QACD,IAAI,EAAE,KAAK;KACd,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,6BAA6B,SAAS,EAAE,EACxC,eAAe,EACf,YAAY,EACZ,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAA2B,CAAC;IACjE,OAAO,MAAM,CAAC,UAAU,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export declare const BASE_URL = "https://api.assemblyai.com";
|
|
2
|
+
export interface AssemblyAITranscriptResponse {
|
|
3
|
+
id: string;
|
|
4
|
+
status: "queued" | "processing" | "completed" | "error";
|
|
5
|
+
text: string | null;
|
|
6
|
+
words: AssemblyAIWord[] | null;
|
|
7
|
+
language_code: string;
|
|
8
|
+
audio_duration: number | null;
|
|
9
|
+
error?: string;
|
|
10
|
+
}
|
|
11
|
+
export interface AssemblyAIWord {
|
|
12
|
+
text: string;
|
|
13
|
+
start: number;
|
|
14
|
+
end: number;
|
|
15
|
+
confidence: number;
|
|
16
|
+
speaker: string | null;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/providers/assemblyai/types.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,QAAQ,+BAA+B,CAAC;AAErD,MAAM,WAAW,4BAA4B;IACzC,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,QAAQ,GAAG,YAAY,GAAG,WAAW,GAAG,OAAO,CAAC;IACxD,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,KAAK,EAAE,cAAc,EAAE,GAAG,IAAI,CAAC;IAC/B,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,KAAK,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,cAAc;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/providers/assemblyai/types.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,QAAQ,GAAG,4BAA4B,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { AzureConfig, AzureTranscribeOptions, TranscribeResult } from "../../types.js";
|
|
2
|
+
export declare function transcribeBatch(config: AzureConfig, audio: Buffer | string, languages: string[] | undefined, options: AzureTranscribeOptions): Promise<TranscribeResult>;
|
|
3
|
+
//# sourceMappingURL=batch-transcribe.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"batch-transcribe.d.ts","sourceRoot":"","sources":["../../../src/providers/azure/batch-transcribe.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,WAAW,EACX,sBAAsB,EACtB,gBAAgB,EAEnB,MAAM,gBAAgB,CAAC;AAKxB,wBAAsB,eAAe,CACjC,MAAM,EAAE,WAAW,EACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,SAAS,EAAE,MAAM,EAAE,GAAG,SAAS,EAC/B,OAAO,EAAE,sBAAsB,GAChC,OAAO,CAAC,gBAAgB,CAAC,CAmL3B"}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { isUrl, poll } from "../../utils.js";
|
|
3
|
+
export async function transcribeBatch(config, audio, languages, options) {
|
|
4
|
+
const { profanityFilter = "none", pollInterval = 5000, timeout = 300_000, } = options;
|
|
5
|
+
// Batch mode requires a URL
|
|
6
|
+
if (Buffer.isBuffer(audio)) {
|
|
7
|
+
throw new SpeechServiceError('Azure batch transcription requires a URL. Pass a URL string or use mode: "fast" for Buffer inputs.', "INVALID_INPUT", "azure");
|
|
8
|
+
}
|
|
9
|
+
if (typeof audio !== "string" || !isUrl(audio)) {
|
|
10
|
+
throw new SpeechServiceError("audio must be a public URL for batch transcription", "INVALID_INPUT", "azure");
|
|
11
|
+
}
|
|
12
|
+
const profanityMap = {
|
|
13
|
+
none: "None",
|
|
14
|
+
masked: "Masked",
|
|
15
|
+
removed: "Removed",
|
|
16
|
+
};
|
|
17
|
+
// Submit batch job
|
|
18
|
+
const submitUrl = `https://${config.region}.api.cognitive.microsoft.com/speechtotext/transcriptions:submit?api-version=2025-10-15`;
|
|
19
|
+
const primaryLocale = languages?.[0] ?? "en-US";
|
|
20
|
+
const body = {
|
|
21
|
+
contentUrls: [audio],
|
|
22
|
+
locale: primaryLocale,
|
|
23
|
+
displayName: `transcription-${Date.now()}`,
|
|
24
|
+
properties: {
|
|
25
|
+
wordLevelTimestampsEnabled: true,
|
|
26
|
+
punctuationMode: "DictatedAndAutomatic",
|
|
27
|
+
profanityFilterMode: profanityMap[profanityFilter] ?? "None",
|
|
28
|
+
timeToLiveHours: 48,
|
|
29
|
+
},
|
|
30
|
+
};
|
|
31
|
+
// Azure batch language identification: 2-10 candidate locales, no duplicate base languages
|
|
32
|
+
if (!languages || languages.length !== 1) {
|
|
33
|
+
let candidateLocales;
|
|
34
|
+
if (languages && languages.length > 1) {
|
|
35
|
+
candidateLocales = languages.slice(0, 10);
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
candidateLocales = ["en-US", "es-ES", "fr-FR", "de-DE", "zh-CN", "ja-JP"];
|
|
39
|
+
}
|
|
40
|
+
if (!candidateLocales.includes(primaryLocale)) {
|
|
41
|
+
candidateLocales = [primaryLocale, ...candidateLocales.slice(0, 9)];
|
|
42
|
+
}
|
|
43
|
+
body.properties.languageIdentification = {
|
|
44
|
+
candidateLocales,
|
|
45
|
+
mode: "Continuous",
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
const submitResponse = await fetch(submitUrl, {
|
|
49
|
+
method: "POST",
|
|
50
|
+
headers: {
|
|
51
|
+
"Ocp-Apim-Subscription-Key": config.subscriptionKey,
|
|
52
|
+
"Content-Type": "application/json",
|
|
53
|
+
},
|
|
54
|
+
body: JSON.stringify(body),
|
|
55
|
+
});
|
|
56
|
+
if (!submitResponse.ok) {
|
|
57
|
+
const errorText = await submitResponse.text();
|
|
58
|
+
throw new SpeechServiceError(`Azure batch transcription submission failed: ${errorText}`, "API_ERROR", "azure", submitResponse.status);
|
|
59
|
+
}
|
|
60
|
+
const submitResult = (await submitResponse.json());
|
|
61
|
+
const transcriptionUrl = submitResult.self;
|
|
62
|
+
// Poll for completion
|
|
63
|
+
const finalStatus = await poll(async () => {
|
|
64
|
+
const res = await fetch(transcriptionUrl, {
|
|
65
|
+
headers: { "Ocp-Apim-Subscription-Key": config.subscriptionKey },
|
|
66
|
+
});
|
|
67
|
+
if (!res.ok) {
|
|
68
|
+
throw new SpeechServiceError(`Azure batch polling failed: ${res.status}`, "API_ERROR", "azure", res.status);
|
|
69
|
+
}
|
|
70
|
+
return res.json();
|
|
71
|
+
}, (result) => result.status === "Succeeded" || result.status === "Failed", pollInterval, timeout, "azure");
|
|
72
|
+
if (finalStatus.status === "Failed") {
|
|
73
|
+
throw new SpeechServiceError("Azure batch transcription failed", "TRANSCRIPTION_FAILED", "azure");
|
|
74
|
+
}
|
|
75
|
+
// Fetch the result files
|
|
76
|
+
const filesUrl = finalStatus.links?.files;
|
|
77
|
+
if (!filesUrl) {
|
|
78
|
+
throw new SpeechServiceError("Azure batch transcription returned no file links", "API_ERROR", "azure");
|
|
79
|
+
}
|
|
80
|
+
const filesResponse = await fetch(filesUrl, {
|
|
81
|
+
headers: { "Ocp-Apim-Subscription-Key": config.subscriptionKey },
|
|
82
|
+
});
|
|
83
|
+
const filesResult = (await filesResponse.json());
|
|
84
|
+
const transcriptionFile = filesResult.values.find((f) => f.kind === "Transcription");
|
|
85
|
+
if (!transcriptionFile) {
|
|
86
|
+
throw new SpeechServiceError("Azure batch transcription returned no transcription file", "API_ERROR", "azure");
|
|
87
|
+
}
|
|
88
|
+
const contentResponse = await fetch(transcriptionFile.links.contentUrl);
|
|
89
|
+
const content = (await contentResponse.json());
|
|
90
|
+
// Normalize: Azure batch uses ticks (100-nanosecond units)
|
|
91
|
+
const TICKS_PER_SECOND = 10_000_000;
|
|
92
|
+
const words = [];
|
|
93
|
+
let detectedLanguage = primaryLocale;
|
|
94
|
+
for (const phrase of content.recognizedPhrases ?? []) {
|
|
95
|
+
if (!detectedLanguage && phrase.locale) {
|
|
96
|
+
detectedLanguage = phrase.locale;
|
|
97
|
+
}
|
|
98
|
+
const best = phrase.nBest?.[0];
|
|
99
|
+
if (best?.words) {
|
|
100
|
+
for (const word of best.words) {
|
|
101
|
+
words.push({
|
|
102
|
+
text: word.word,
|
|
103
|
+
startTime: word.offsetInTicks / TICKS_PER_SECOND,
|
|
104
|
+
endTime: (word.offsetInTicks + word.durationInTicks) / TICKS_PER_SECOND,
|
|
105
|
+
confidence: word.confidence,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
const text = content.combinedRecognizedPhrases?.map((p) => p.display).join(" ") ?? "";
|
|
111
|
+
return {
|
|
112
|
+
text,
|
|
113
|
+
words,
|
|
114
|
+
language: detectedLanguage,
|
|
115
|
+
duration: (content.durationMilliseconds ?? 0) / 1000,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
//# sourceMappingURL=batch-transcribe.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"batch-transcribe.js","sourceRoot":"","sources":["../../../src/providers/azure/batch-transcribe.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,gBAAgB,CAAC;AAG7C,MAAM,CAAC,KAAK,UAAU,eAAe,CACjC,MAAmB,EACnB,KAAsB,EACtB,SAA+B,EAC/B,OAA+B;IAE/B,MAAM,EACF,eAAe,GAAG,MAAM,EACxB,YAAY,GAAG,IAAI,EACnB,OAAO,GAAG,OAAO,GACpB,GAAG,OAAO,CAAC;IAEZ,4BAA4B;IAC5B,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,kBAAkB,CACxB,oGAAoG,EACpG,eAAe,EACf,OAAO,CACV,CAAC;IACN,CAAC;IAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,kBAAkB,CACxB,oDAAoD,EACpD,eAAe,EACf,OAAO,CACV,CAAC;IACN,CAAC;IAED,MAAM,YAAY,GAA2B;QACzC,IAAI,EAAE,MAAM;QACZ,MAAM,EAAE,QAAQ;QAChB,OAAO,EAAE,SAAS;KACrB,CAAC;IAEF,mBAAmB;IACnB,MAAM,SAAS,GAAG,WAAW,MAAM,CAAC,MAAM,wFAAwF,CAAC;IAEnI,MAAM,aAAa,GAAG,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,OAAO,CAAC;IAEhD,MAAM,IAAI,GAA4B;QAClC,WAAW,EAAE,CAAC,KAAK,CAAC;QACpB,MAAM,EAAE,aAAa;QACrB,WAAW,EAAE,iBAAiB,IAAI,CAAC,GAAG,EAAE,EAAE;QAC1C,UAAU,EAAE;YACR,0BAA0B,EAAE,IAAI;YAChC,eAAe,EAAE,sBAAsB;YACvC,mBAAmB,EAAE,YAAY,CAAC,eAAe,CAAC,IAAI,MAAM;YAC5D,eAAe,EAAE,EAAE;SACtB;KACJ,CAAC;IAEF,2FAA2F;IAC3F,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvC,IAAI,gBAA0B,CAAC;QAE/B,IAAI,SAAS,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpC,gBAAgB,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC9C,CAAC;aAAM,CAAC;YACJ,gBAAgB,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QAC9E,CAAC;QAED,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;YAC5C,gBAAgB,GAAG,CAAC,aAAa,EAAE,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACxE,CAAC;QAEA,IAAI,CAAC,UAAsC,CAAC,sBAAsB,GAAG;YAClE,gBAAgB;YAChB,IAAI,EAAE,YAAY;SACrB,CAAC;IACN,CAAC;IAED,MAAM,cAAc,GAAG,MAAM,KAAK,CAAC,SAAS,EAAE;QAC1C,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,2BAA2B,EAAE,MAAM,CAAC,eAAe;YACnD,cAAc,EAAE,kBAAkB;SACrC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;KAC7B,CAAC,CAAC;IAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;QACrB,MAAM,SAAS,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,kBAAkB,CACxB,gDAAgD,SAAS,EAAE,EAC3D,WAAW,EACX,OAAO,EACP,cAAc,CAAC,MAAM,CACxB,CAAC;IACN,CAAC;IAED,MAAM,YAAY,GAAG,CAAC,MAAM,cAAc,CAAC,IAAI,EAAE,CAAqB,CAAC;IACvE,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC;IAE3C,sBAAsB;IACtB,MAAM,WAAW,GAAG,MAAM,IAAI,CAC1B,KAAK,IAAI,EAAE;QACP,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,gBAAgB,EAAE;YACtC,OAAO,EAAE,EAAE,2BAA2B,EAAE,MAAM,CAAC,eAAe,EAAE;SACnE,CAAC,CAAC;QACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACV,MAAM,IAAI,kBAAkB,CACxB,+BAA+B,GAAG,CAAC,MAAM,EAAE,EAC3C,WAAW,EACX,OAAO,EACP,GAAG,CAAC,MAAM,CACb,CAAC;QACN,CAAC;QACD,OAAO,GAAG,CAAC,IAAI,EAA4D,CAAC;IAChF,CAAC,EACD,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,MAAM,KAAK,QAAQ,EACvE,YAAY,EACZ,OAAO,EACP,OAAO,CACV,CAAC;IAEF,IAAI,WAAW,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QAClC,MAAM,IAAI,kBAAkB,CACxB,kCAAkC,EAClC,sBAAsB,EACtB,OAAO,CACV,CAAC;IACN,CAAC;IAED,yBAAyB;IACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,EAAE,KAAK,CAAC;IAC1C,IAAI,CAAC,QAAQ,EAAE,CAAC;QACZ,MAAM,IAAI,kBAAkB,CACxB,kDAAkD,EAClD,WAAW,EACX,OAAO,CACV,CAAC;IACN,CAAC;IAED,MAAM,aAAa,GAAG,MAAM,KAAK,CAAC,QAAQ,EAAE;QACxC,OAAO,EAAE,EAAE,2BAA2B,EAAE,MAAM,CAAC,eAAe,EAAE;KACnE,CAAC,CAAC;IACH,MAAM,WAAW,GAAG,CAAC,MAAM,aAAa,CAAC,IAAI,EAAE,CAE9C,CAAC;IAEF,MAAM,iBAAiB,GAAG,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,eAAe,CAAC,CAAC;IACrF,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACrB,MAAM,IAAI,kBAAkB,CACxB,0DAA0D,EAC1D,WAAW,EACX,OAAO,CACV,CAAC;IACN,CAAC;IAED,MAAM,eAAe,GAAG,MAAM,KAAK,CAAC,iBAAiB,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IACxE,MAAM,OAAO,GAAG,CAAC,MAAM,eAAe,CAAC,IAAI,EAAE,CAAkC,CAAC;IAEhF,2DAA2D;IAC3D,MAAM,gBAAgB,GAAG,UAAU,CAAC;IACpC,MAAM,KAAK,GAAsB,EAAE,CAAC;IACpC,IAAI,gBAAgB,GAAG,aAAa,CAAC;IAErC,KAAK,MAAM,MAAM,IAAI,OAAO,CAAC,iBAAiB,IAAI,EAAE,EAAE,CAAC;QACnD,IAAI,CAAC,gBAAgB,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YACrC,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAC;QACrC,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,IAAI,EAAE,KAAK,EAAE,CAAC;YACd,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC;oBACP,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,SAAS,EAAE,IAAI,CAAC,aAAa,GAAG,gBAAgB;oBAChD,OAAO,EAAE,CAAC,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC,eAAe,CAAC,GAAG,gBAAgB;oBACvE,UAAU,EAAE,IAAI,CAAC,UAAU;iBAC9B,CAAC,CAAC;YACP,CAAC;QACL,CAAC;IACL,CAAC;IAED,MAAM,IAAI,GACN,OAAO,CAAC,yBAAyB,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;IAE7E,OAAO;QACH,IAAI;QACJ,KAAK;QACL,QAAQ,EAAE,gBAAgB;QAC1B,QAAQ,EAAE,CAAC,OAAO,CAAC,oBAAoB,IAAI,CAAC,CAAC,GAAG,IAAI;KACvD,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detect-languages.d.ts","sourceRoot":"","sources":["../../../src/providers/azure/detect-languages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAIlD,wBAAsB,eAAe,CACjC,MAAM,EAAE,WAAW,EACnB,KAAK,EAAE,MAAM,GAAG,MAAM,GACvB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAa9B"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { getBaseLanguage } from "../../utils.js";
|
|
2
|
+
import { runFastTranscription } from "./transcribe.js";
|
|
3
|
+
export async function detectLanguages(config, audio) {
|
|
4
|
+
// Run fast transcription with no locales -> triggers multilingual model
|
|
5
|
+
const result = await runFastTranscription(config, audio, undefined);
|
|
6
|
+
const counts = new Map();
|
|
7
|
+
for (const phrase of result.phrases ?? []) {
|
|
8
|
+
if (phrase.locale) {
|
|
9
|
+
const base = getBaseLanguage(phrase.locale);
|
|
10
|
+
counts.set(base, (counts.get(base) ?? 0) + 1);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
return counts;
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=detect-languages.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detect-languages.js","sourceRoot":"","sources":["../../../src/providers/azure/detect-languages.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AAEvD,MAAM,CAAC,KAAK,UAAU,eAAe,CACjC,MAAmB,EACnB,KAAsB;IAEtB,wEAAwE;IACxE,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;IAEpE,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC;QACxC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,GAAG,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC5C,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClD,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-voices.d.ts","sourceRoot":"","sources":["../../../src/providers/azure/fetch-voices.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAI7D,wBAAsB,WAAW,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CA4B3E"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
export async function fetchVoices(config) {
|
|
3
|
+
const url = `https://${config.region}.tts.speech.microsoft.com/cognitiveservices/voices/list`;
|
|
4
|
+
const response = await fetch(url, {
|
|
5
|
+
headers: { "Ocp-Apim-Subscription-Key": config.subscriptionKey },
|
|
6
|
+
});
|
|
7
|
+
if (!response.ok) {
|
|
8
|
+
const errorText = await response.text();
|
|
9
|
+
throw new SpeechServiceError(`Azure voice listing failed: ${errorText}`, "API_ERROR", "azure", response.status);
|
|
10
|
+
}
|
|
11
|
+
const voices = (await response.json());
|
|
12
|
+
return voices.map((v) => ({
|
|
13
|
+
id: v.ShortName,
|
|
14
|
+
name: v.DisplayName,
|
|
15
|
+
gender: v.Gender?.toLowerCase() === "male" ? "male"
|
|
16
|
+
: v.Gender?.toLowerCase() === "female" ? "female"
|
|
17
|
+
: undefined,
|
|
18
|
+
locale: v.Locale,
|
|
19
|
+
provider: "azure",
|
|
20
|
+
}));
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=fetch-voices.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-voices.js","sourceRoot":"","sources":["../../../src/providers/azure/fetch-voices.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AAGrD,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,MAAmB;IACjD,MAAM,GAAG,GAAG,WAAW,MAAM,CAAC,MAAM,yDAAyD,CAAC;IAE9F,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAC9B,OAAO,EAAE,EAAE,2BAA2B,EAAE,MAAM,CAAC,eAAe,EAAE;KACnE,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,+BAA+B,SAAS,EAAE,EAC1C,WAAW,EACX,OAAO,EACP,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAA0B,CAAC;IAEhE,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACtB,EAAE,EAAE,CAAC,CAAC,SAAS;QACf,IAAI,EAAE,CAAC,CAAC,WAAW;QACnB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,WAAW,EAAE,KAAK,MAAM,CAAC,CAAC,CAAC,MAAe;YACxD,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,WAAW,EAAE,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAiB;gBAC1D,CAAC,CAAC,SAAS;QACf,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,OAAgB;KAC7B,CAAC,CAAC,CAAC;AACR,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../src/providers/azure/helpers.ts"],"names":[],"mappings":"AAAA,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAO9C"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.js","sourceRoot":"","sources":["../../../src/providers/azure/helpers.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,SAAS,CAAC,IAAY;IAClC,OAAO,IAAI;SACN,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AACjC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/azure/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/azure/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { AzureConfig, AzureSynthesizeOptions, SynthesizeResult } from "../../types.js";
|
|
2
|
+
export declare function synthesize(config: AzureConfig, text: string, voice: string, language: string | undefined, options?: AzureSynthesizeOptions): Promise<SynthesizeResult>;
|
|
3
|
+
//# sourceMappingURL=synthesize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"synthesize.d.ts","sourceRoot":"","sources":["../../../src/providers/azure/synthesize.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAK5F,wBAAsB,UAAU,CAC5B,MAAM,EAAE,WAAW,EACnB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,OAAO,GAAE,sBAA2B,GACrC,OAAO,CAAC,gBAAgB,CAAC,CAqE3B"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { detectFormatFromString } from "../../utils.js";
|
|
3
|
+
import { escapeXml } from "./helpers.js";
|
|
4
|
+
export async function synthesize(config, text, voice, language, options = {}) {
|
|
5
|
+
const { outputFormat = "audio-24khz-160kbitrate-mono-mp3", speed = "-5%", pitch, style, } = options;
|
|
6
|
+
// Infer language from voice name if not provided (e.g., "en-US-JennyNeural" -> "en-US")
|
|
7
|
+
const lang = language ?? voice.split("-").slice(0, 2).join("-");
|
|
8
|
+
// Build SSML
|
|
9
|
+
const escapedText = escapeXml(text);
|
|
10
|
+
let ssml;
|
|
11
|
+
if (style) {
|
|
12
|
+
ssml = [
|
|
13
|
+
`<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${lang}">`,
|
|
14
|
+
` <voice name="${voice}">`,
|
|
15
|
+
` <mstts:express-as style="${style}" styledegree="2">`,
|
|
16
|
+
` <prosody rate="${speed}"${pitch ? ` pitch="${pitch}"` : ""}>`,
|
|
17
|
+
` ${escapedText}`,
|
|
18
|
+
` </prosody>`,
|
|
19
|
+
` </mstts:express-as>`,
|
|
20
|
+
` </voice>`,
|
|
21
|
+
`</speak>`,
|
|
22
|
+
].join("\n");
|
|
23
|
+
}
|
|
24
|
+
else {
|
|
25
|
+
ssml = [
|
|
26
|
+
`<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="${lang}">`,
|
|
27
|
+
` <voice name="${voice}">`,
|
|
28
|
+
` <prosody rate="${speed}"${pitch ? ` pitch="${pitch}"` : ""}>`,
|
|
29
|
+
` ${escapedText}`,
|
|
30
|
+
` </prosody>`,
|
|
31
|
+
` </voice>`,
|
|
32
|
+
`</speak>`,
|
|
33
|
+
].join("\n");
|
|
34
|
+
}
|
|
35
|
+
const url = `https://${config.region}.tts.speech.microsoft.com/cognitiveservices/v1`;
|
|
36
|
+
const response = await fetch(url, {
|
|
37
|
+
method: "POST",
|
|
38
|
+
headers: {
|
|
39
|
+
"Ocp-Apim-Subscription-Key": config.subscriptionKey,
|
|
40
|
+
"Content-Type": "application/ssml+xml",
|
|
41
|
+
"X-Microsoft-OutputFormat": outputFormat,
|
|
42
|
+
"User-Agent": "SpeechServices/1.0",
|
|
43
|
+
},
|
|
44
|
+
body: ssml,
|
|
45
|
+
});
|
|
46
|
+
if (!response.ok) {
|
|
47
|
+
const errorText = await response.text();
|
|
48
|
+
throw new SpeechServiceError(`Azure TTS failed: ${errorText}`, "API_ERROR", "azure", response.status);
|
|
49
|
+
}
|
|
50
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
51
|
+
return {
|
|
52
|
+
audio: Buffer.from(arrayBuffer),
|
|
53
|
+
format: detectFormatFromString(outputFormat),
|
|
54
|
+
voice,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=synthesize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"synthesize.js","sourceRoot":"","sources":["../../../src/providers/azure/synthesize.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC5B,MAAmB,EACnB,IAAY,EACZ,KAAa,EACb,QAA4B,EAC5B,UAAkC,EAAE;IAEpC,MAAM,EACF,YAAY,GAAG,kCAAkC,EACjD,KAAK,GAAG,KAAK,EACb,KAAK,EACL,KAAK,GACR,GAAG,OAAO,CAAC;IAEZ,wFAAwF;IACxF,MAAM,IAAI,GAAG,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEhE,aAAa;IACb,MAAM,WAAW,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACpC,IAAI,IAAY,CAAC;IAEjB,IAAI,KAAK,EAAE,CAAC;QACR,IAAI,GAAG;YACH,0HAA0H,IAAI,IAAI;YAClI,kBAAkB,KAAK,IAAI;YAC3B,gCAAgC,KAAK,oBAAoB;YACzD,wBAAwB,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,WAAW,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG;YACpE,WAAW,WAAW,EAAE;YACxB,kBAAkB;YAClB,yBAAyB;YACzB,YAAY;YACZ,UAAU;SACb,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjB,CAAC;SAAM,CAAC;QACJ,IAAI,GAAG;YACH,8EAA8E,IAAI,IAAI;YACtF,kBAAkB,KAAK,IAAI;YAC3B,sBAAsB,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,WAAW,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG;YAClE,SAAS,WAAW,EAAE;YACtB,gBAAgB;YAChB,YAAY;YACZ,UAAU;SACb,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjB,CAAC;IAED,MAAM,GAAG,GAAG,WAAW,MAAM,CAAC,MAAM,gDAAgD,CAAC;IAErF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAC9B,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,2BAA2B,EAAE,MAAM,CAAC,eAAe;YACnD,cAAc,EAAE,sBAAsB;YACtC,0BAA0B,EAAE,YAAY;YACxC,YAAY,EAAE,oBAAoB;SACrC;QACD,IAAI,EAAE,IAAI;KACb,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,qBAAqB,SAAS,EAAE,EAChC,WAAW,EACX,OAAO,EACP,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;IAEjD,OAAO;QACH,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC;QAC/B,MAAM,EAAE,sBAAsB,CAAC,YAAY,CAAC;QAC5C,KAAK;KACR,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { AzureConfig, AzureTranscribeOptions, TranscribeResult } from "../../types.js";
|
|
2
|
+
import type { AzureFastTranscriptionResponse } from "./types.js";
|
|
3
|
+
export declare function transcribe(config: AzureConfig, audio: Buffer | string, languages: string[] | undefined, options?: AzureTranscribeOptions): Promise<TranscribeResult>;
|
|
4
|
+
export declare function runFastTranscription(config: AzureConfig, audio: Buffer | string, languages: string[] | undefined, profanityFilter?: string): Promise<AzureFastTranscriptionResponse>;
|
|
5
|
+
//# sourceMappingURL=transcribe.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribe.d.ts","sourceRoot":"","sources":["../../../src/providers/azure/transcribe.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,WAAW,EACX,sBAAsB,EACtB,gBAAgB,EAEnB,MAAM,gBAAgB,CAAC;AAGxB,OAAO,KAAK,EAAE,8BAA8B,EAAE,MAAM,YAAY,CAAC;AAEjE,wBAAsB,UAAU,CAC5B,MAAM,EAAE,WAAW,EACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,SAAS,EAAE,MAAM,EAAE,GAAG,SAAS,EAC/B,OAAO,GAAE,sBAA2B,GACrC,OAAO,CAAC,gBAAgB,CAAC,CAQ3B;AAID,wBAAsB,oBAAoB,CACtC,MAAM,EAAE,WAAW,EACnB,KAAK,EAAE,MAAM,GAAG,MAAM,EACtB,SAAS,EAAE,MAAM,EAAE,GAAG,SAAS,EAC/B,eAAe,GAAE,MAAe,GACjC,OAAO,CAAC,8BAA8B,CAAC,CAoDzC"}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { SpeechServiceError } from "../../errors.js";
|
|
2
|
+
import { isUrl } from "../../utils.js";
|
|
3
|
+
export async function transcribe(config, audio, languages, options = {}) {
|
|
4
|
+
const { mode = "fast" } = options;
|
|
5
|
+
if (mode === "batch") {
|
|
6
|
+
const { transcribeBatch } = await import("./batch-transcribe.js");
|
|
7
|
+
return transcribeBatch(config, audio, languages, options);
|
|
8
|
+
}
|
|
9
|
+
return transcribeFast(config, audio, languages, options);
|
|
10
|
+
}
|
|
11
|
+
// ─── Fast Transcription (shared API call) ───────────────────────────────────
|
|
12
|
+
export async function runFastTranscription(config, audio, languages, profanityFilter = "none") {
|
|
13
|
+
const profanityMap = {
|
|
14
|
+
none: "None",
|
|
15
|
+
masked: "Masked",
|
|
16
|
+
removed: "Removed",
|
|
17
|
+
};
|
|
18
|
+
const definition = {
|
|
19
|
+
profanityFilterMode: profanityMap[profanityFilter] ?? "None",
|
|
20
|
+
};
|
|
21
|
+
if (languages && languages.length > 0) {
|
|
22
|
+
definition.locales = languages;
|
|
23
|
+
}
|
|
24
|
+
const formData = new FormData();
|
|
25
|
+
if (typeof audio === "string" && isUrl(audio)) {
|
|
26
|
+
definition.audioUrl = audio;
|
|
27
|
+
formData.append("definition", JSON.stringify(definition));
|
|
28
|
+
}
|
|
29
|
+
else if (Buffer.isBuffer(audio)) {
|
|
30
|
+
formData.append("audio", new Blob([new Uint8Array(audio)]), "audio");
|
|
31
|
+
formData.append("definition", JSON.stringify(definition));
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
throw new SpeechServiceError("audio must be a Buffer or a URL string", "INVALID_INPUT", "azure");
|
|
35
|
+
}
|
|
36
|
+
const url = `https://${config.region}.api.cognitive.microsoft.com/speechtotext/transcriptions:transcribe?api-version=2025-10-15`;
|
|
37
|
+
const response = await fetch(url, {
|
|
38
|
+
method: "POST",
|
|
39
|
+
headers: {
|
|
40
|
+
"Ocp-Apim-Subscription-Key": config.subscriptionKey,
|
|
41
|
+
},
|
|
42
|
+
body: formData,
|
|
43
|
+
});
|
|
44
|
+
if (!response.ok) {
|
|
45
|
+
const errorText = await response.text();
|
|
46
|
+
throw new SpeechServiceError(`Azure fast transcription failed: ${errorText}`, "API_ERROR", "azure", response.status);
|
|
47
|
+
}
|
|
48
|
+
return (await response.json());
|
|
49
|
+
}
|
|
50
|
+
// ─── Fast Transcription (normalize result) ──────────────────────────────────
|
|
51
|
+
async function transcribeFast(config, audio, languages, options) {
|
|
52
|
+
const result = await runFastTranscription(config, audio, languages, options.profanityFilter);
|
|
53
|
+
const words = [];
|
|
54
|
+
let detectedLanguage = languages?.[0] ?? "";
|
|
55
|
+
for (const phrase of result.phrases ?? []) {
|
|
56
|
+
if (!detectedLanguage && phrase.locale) {
|
|
57
|
+
detectedLanguage = phrase.locale;
|
|
58
|
+
}
|
|
59
|
+
for (const word of phrase.words ?? []) {
|
|
60
|
+
words.push({
|
|
61
|
+
text: word.text,
|
|
62
|
+
startTime: word.offsetMilliseconds / 1000,
|
|
63
|
+
endTime: (word.offsetMilliseconds + word.durationMilliseconds) / 1000,
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
const text = result.combinedPhrases?.map((p) => p.text).join(" ") ?? "";
|
|
68
|
+
return {
|
|
69
|
+
text,
|
|
70
|
+
words,
|
|
71
|
+
language: detectedLanguage,
|
|
72
|
+
duration: (result.durationMilliseconds ?? 0) / 1000,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=transcribe.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transcribe.js","sourceRoot":"","sources":["../../../src/providers/azure/transcribe.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAGvC,MAAM,CAAC,KAAK,UAAU,UAAU,CAC5B,MAAmB,EACnB,KAAsB,EACtB,SAA+B,EAC/B,UAAkC,EAAE;IAEpC,MAAM,EAAE,IAAI,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC;IAElC,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;QACnB,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;QAClE,OAAO,eAAe,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAC9D,CAAC;IACD,OAAO,cAAc,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;AAC7D,CAAC;AAED,+EAA+E;AAE/E,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACtC,MAAmB,EACnB,KAAsB,EACtB,SAA+B,EAC/B,kBAA0B,MAAM;IAEhC,MAAM,YAAY,GAA2B;QACzC,IAAI,EAAE,MAAM;QACZ,MAAM,EAAE,QAAQ;QAChB,OAAO,EAAE,SAAS;KACrB,CAAC;IAEF,MAAM,UAAU,GAA4B;QACxC,mBAAmB,EAAE,YAAY,CAAC,eAAe,CAAC,IAAI,MAAM;KAC/D,CAAC;IAEF,IAAI,SAAS,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpC,UAAU,CAAC,OAAO,GAAG,SAAS,CAAC;IACnC,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,QAAQ,EAAE,CAAC;IAEhC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5C,UAAU,CAAC,QAAQ,GAAG,KAAK,CAAC;QAC5B,QAAQ,CAAC,MAAM,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC;IAC9D,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAChC,QAAQ,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QACrE,QAAQ,CAAC,MAAM,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC;IAC9D,CAAC;SAAM,CAAC;QACJ,MAAM,IAAI,kBAAkB,CACxB,wCAAwC,EACxC,eAAe,EACf,OAAO,CACV,CAAC;IACN,CAAC;IAED,MAAM,GAAG,GAAG,WAAW,MAAM,CAAC,MAAM,4FAA4F,CAAC;IAEjI,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAC9B,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,2BAA2B,EAAE,MAAM,CAAC,eAAe;SACtD;QACD,IAAI,EAAE,QAAQ;KACjB,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,IAAI,kBAAkB,CACxB,oCAAoC,SAAS,EAAE,EAC/C,WAAW,EACX,OAAO,EACP,QAAQ,CAAC,MAAM,CAClB,CAAC;IACN,CAAC;IAED,OAAO,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAmC,CAAC;AACrE,CAAC;AAED,+EAA+E;AAE/E,KAAK,UAAU,cAAc,CACzB,MAAmB,EACnB,KAAsB,EACtB,SAA+B,EAC/B,OAA+B;IAE/B,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC;IAE7F,MAAM,KAAK,GAAsB,EAAE,CAAC;IACpC,IAAI,gBAAgB,GAAG,SAAS,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAE5C,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC;QACxC,IAAI,CAAC,gBAAgB,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YACrC,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAC;QACrC,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;YACpC,KAAK,CAAC,IAAI,CAAC;gBACP,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,SAAS,EAAE,IAAI,CAAC,kBAAkB,GAAG,IAAI;gBACzC,OAAO,EAAE,CAAC,IAAI,CAAC,kBAAkB,GAAG,IAAI,CAAC,oBAAoB,CAAC,GAAG,IAAI;aACxE,CAAC,CAAC;QACP,CAAC;IACL,CAAC;IAED,MAAM,IAAI,GACN,MAAM,CAAC,eAAe,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;IAE/D,OAAO;QACH,IAAI;QACJ,KAAK;QACL,QAAQ,EAAE,gBAAgB;QAC1B,QAAQ,EAAE,CAAC,MAAM,CAAC,oBAAoB,IAAI,CAAC,CAAC,GAAG,IAAI;KACtD,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export interface AzureFastTranscriptionResponse {
|
|
2
|
+
durationMilliseconds: number;
|
|
3
|
+
combinedPhrases?: Array<{
|
|
4
|
+
text: string;
|
|
5
|
+
channel?: number;
|
|
6
|
+
}>;
|
|
7
|
+
phrases?: Array<{
|
|
8
|
+
offsetMilliseconds: number;
|
|
9
|
+
durationMilliseconds: number;
|
|
10
|
+
text: string;
|
|
11
|
+
locale?: string;
|
|
12
|
+
confidence?: number;
|
|
13
|
+
words?: Array<{
|
|
14
|
+
text: string;
|
|
15
|
+
offsetMilliseconds: number;
|
|
16
|
+
durationMilliseconds: number;
|
|
17
|
+
}>;
|
|
18
|
+
}>;
|
|
19
|
+
}
|
|
20
|
+
export interface AzureVoiceListEntry {
|
|
21
|
+
ShortName: string;
|
|
22
|
+
DisplayName: string;
|
|
23
|
+
Locale: string;
|
|
24
|
+
Gender?: string;
|
|
25
|
+
VoiceType?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface AzureBatchTranscriptionResult {
|
|
28
|
+
durationMilliseconds?: number;
|
|
29
|
+
combinedRecognizedPhrases?: Array<{
|
|
30
|
+
display: string;
|
|
31
|
+
}>;
|
|
32
|
+
recognizedPhrases?: Array<{
|
|
33
|
+
offsetInTicks: number;
|
|
34
|
+
durationInTicks: number;
|
|
35
|
+
text: string;
|
|
36
|
+
locale?: string;
|
|
37
|
+
nBest?: Array<{
|
|
38
|
+
display: string;
|
|
39
|
+
words?: Array<{
|
|
40
|
+
word: string;
|
|
41
|
+
offsetInTicks: number;
|
|
42
|
+
durationInTicks: number;
|
|
43
|
+
confidence: number;
|
|
44
|
+
}>;
|
|
45
|
+
}>;
|
|
46
|
+
}>;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/providers/azure/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,8BAA8B;IAC3C,oBAAoB,EAAE,MAAM,CAAC;IAC7B,eAAe,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IAC5D,OAAO,CAAC,EAAE,KAAK,CAAC;QACZ,kBAAkB,EAAE,MAAM,CAAC;QAC3B,oBAAoB,EAAE,MAAM,CAAC;QAC7B,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,KAAK,CAAC,EAAE,KAAK,CAAC;YACV,IAAI,EAAE,MAAM,CAAC;YACb,kBAAkB,EAAE,MAAM,CAAC;YAC3B,oBAAoB,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;KACN,CAAC,CAAC;CACN;AAED,MAAM,WAAW,mBAAmB;IAChC,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,6BAA6B;IAC1C,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,yBAAyB,CAAC,EAAE,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACvD,iBAAiB,CAAC,EAAE,KAAK,CAAC;QACtB,aAAa,EAAE,MAAM,CAAC;QACtB,eAAe,EAAE,MAAM,CAAC;QACxB,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,KAAK,CAAC,EAAE,KAAK,CAAC;YACV,OAAO,EAAE,MAAM,CAAC;YAChB,KAAK,CAAC,EAAE,KAAK,CAAC;gBACV,IAAI,EAAE,MAAM,CAAC;gBACb,aAAa,EAAE,MAAM,CAAC;gBACtB,eAAe,EAAE,MAAM,CAAC;gBACxB,UAAU,EAAE,MAAM,CAAC;aACtB,CAAC,CAAC;SACN,CAAC,CAAC;KACN,CAAC,CAAC;CACN"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/providers/azure/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-voices.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/fetch-voices.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAKhE,wBAAsB,WAAW,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CA0C9E"}
|