@speech-sdk/core 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -21
- package/README.md +215 -269
- package/dist/__tests__/e2e/_save-audio.d.ts +25 -2
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +46 -10
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-utils.d.ts +2 -0
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +9 -0
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +137 -0
- package/dist/captions.d.ts.map +1 -0
- package/dist/captions.js +283 -0
- package/dist/captions.js.map +1 -0
- package/dist/conversation/stitch.d.ts +5 -0
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +37 -0
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +16 -0
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/derive-timestamps.d.ts +14 -0
- package/dist/derive-timestamps.d.ts.map +1 -0
- package/dist/derive-timestamps.js +38 -0
- package/dist/derive-timestamps.js.map +1 -0
- package/dist/errors.d.ts +25 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +28 -0
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +1 -1
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +59 -0
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +18 -1
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +73 -16
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +6 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +2 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +40 -0
- package/dist/logger.js.map +1 -0
- package/dist/provider-utils.d.ts +8 -0
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +16 -2
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +24 -0
- package/dist/providers/cartesia/alignment.d.ts.map +1 -0
- package/dist/providers/cartesia/alignment.js +23 -0
- package/dist/providers/cartesia/alignment.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +12 -2
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +137 -2
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +24 -0
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -0
- package/dist/providers/elevenlabs/alignment.js +48 -0
- package/dist/providers/elevenlabs/alignment.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +19 -4
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +83 -13
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +0 -25
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +3 -58
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +38 -0
- package/dist/providers/hume/alignment.d.ts.map +1 -0
- package/dist/providers/hume/alignment.js +31 -0
- package/dist/providers/hume/alignment.js.map +1 -0
- package/dist/providers/hume/index.d.ts +8 -1
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +75 -1
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +25 -0
- package/dist/providers/inworld/alignment.d.ts.map +1 -0
- package/dist/providers/inworld/alignment.js +23 -0
- package/dist/providers/inworld/alignment.js.map +1 -0
- package/dist/providers/inworld/index.d.ts +11 -2
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +11 -2
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +22 -0
- package/dist/providers/murf/alignment.d.ts.map +1 -0
- package/dist/providers/murf/alignment.js +17 -0
- package/dist/providers/murf/alignment.js.map +1 -0
- package/dist/providers/murf/index.d.ts +8 -1
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +10 -1
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +12 -3
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +7 -3
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +32 -0
- package/dist/providers/resemble/alignment.d.ts.map +1 -0
- package/dist/providers/resemble/alignment.js +57 -0
- package/dist/providers/resemble/alignment.js.map +1 -0
- package/dist/providers/resemble/index.d.ts +7 -1
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +13 -1
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +3 -12
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +48 -4
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +16 -0
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +10 -0
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +40 -0
- package/dist/speech-to-text-provider.d.ts.map +1 -0
- package/dist/speech-to-text-provider.js +2 -0
- package/dist/speech-to-text-provider.js.map +1 -0
- package/dist/stt-providers/openai/index.d.ts +42 -0
- package/dist/stt-providers/openai/index.d.ts.map +1 -0
- package/dist/stt-providers/openai/index.js +184 -0
- package/dist/stt-providers/openai/index.js.map +1 -0
- package/dist/timestamps.d.ts +23 -0
- package/dist/timestamps.d.ts.map +1 -0
- package/dist/timestamps.js +2 -0
- package/dist/timestamps.js.map +1 -0
- package/package.json +6 -2
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
const WHITESPACE_CHAR = /^\s$/;
|
|
2
|
+
/**
|
|
3
|
+
* Aggregate Resemble's grapheme-level timing into word-level timestamps.
|
|
4
|
+
*
|
|
5
|
+
* Algorithm: walk `graph_chars` in order. Whitespace flushes the current
|
|
6
|
+
* word and is dropped. Non-whitespace characters (letters AND punctuation)
|
|
7
|
+
* accumulate into a buffer — punctuation stays attached to its adjacent
|
|
8
|
+
* word ("Hello," is one word) to mirror the ElevenLabs aggregator.
|
|
9
|
+
*
|
|
10
|
+
* Each entry in `graph_times` is `[startSeconds, endSeconds]`; the word
|
|
11
|
+
* inherits the first character's start and the last character's end.
|
|
12
|
+
* Entries with malformed timing tuples are skipped to avoid NaN bleed.
|
|
13
|
+
*/
|
|
14
|
+
export function audioTimestampsToWordTimestamps(alignment) {
|
|
15
|
+
const chars = alignment.graph_chars;
|
|
16
|
+
const times = alignment.graph_times;
|
|
17
|
+
if (chars.length === 0) {
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
const words = [];
|
|
21
|
+
let buf = "";
|
|
22
|
+
let wordStart = 0;
|
|
23
|
+
let wordEnd = 0;
|
|
24
|
+
let inWord = false;
|
|
25
|
+
for (let i = 0; i < chars.length; i++) {
|
|
26
|
+
const c = chars[i] ?? "";
|
|
27
|
+
const isWs = WHITESPACE_CHAR.test(c);
|
|
28
|
+
if (isWs) {
|
|
29
|
+
if (inWord) {
|
|
30
|
+
words.push({ text: buf, start: wordStart, end: wordEnd });
|
|
31
|
+
buf = "";
|
|
32
|
+
inWord = false;
|
|
33
|
+
}
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
const t = times[i];
|
|
37
|
+
if (!t || t.length < 2) {
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
const s = t[0];
|
|
41
|
+
const e = t[1];
|
|
42
|
+
if (!(Number.isFinite(s) && Number.isFinite(e))) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
if (!inWord) {
|
|
46
|
+
wordStart = s;
|
|
47
|
+
inWord = true;
|
|
48
|
+
}
|
|
49
|
+
buf += c;
|
|
50
|
+
wordEnd = e;
|
|
51
|
+
}
|
|
52
|
+
if (inWord && buf.length > 0) {
|
|
53
|
+
words.push({ text: buf, start: wordStart, end: wordEnd });
|
|
54
|
+
}
|
|
55
|
+
return words;
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=alignment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/resemble/alignment.ts"],"names":[],"mappings":"AAoBA,MAAM,eAAe,GAAG,MAAM,CAAC;AAE/B;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,+BAA+B,CAC7C,SAAkC;IAElC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IACpC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IACpC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAErC,IAAI,IAAI,EAAE,CAAC;YACT,IAAI,MAAM,EAAE,CAAC;gBACX,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;gBAC1D,GAAG,GAAG,EAAE,CAAC;gBACT,MAAM,GAAG,KAAK,CAAC;YACjB,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QACD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACf,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACf,IAAI,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAChD,SAAS;QACX,CAAC;QAED,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,SAAS,GAAG,CAAC,CAAC;YACd,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,GAAG,IAAI,CAAC,CAAC;QACT,OAAO,GAAG,CAAC,CAAC;IACd,CAAC;IAED,IAAI,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
3
|
export interface ResembleSpeechProviderConfig {
|
|
3
4
|
apiKey?: string;
|
|
4
5
|
baseURL?: string;
|
|
@@ -11,7 +12,10 @@ export declare class ResembleSpeechProvider implements SpeechProvider<string, st
|
|
|
11
12
|
readonly id: "default";
|
|
12
13
|
readonly releaseDate: "2025-09-04";
|
|
13
14
|
readonly languages: readonly ["en", "ar", "da", "de", "el", "es", "fi", "fr", "he", "hi", "it", "ja", "ko", "ms", "nl", "no", "pl", "pt", "ru", "sv", "sw", "tr", "zh"];
|
|
14
|
-
readonly features: readonly ["streaming", "open-source", "inline-voice-cloning"
|
|
15
|
+
readonly features: readonly ["streaming", "open-source", "inline-voice-cloning", {
|
|
16
|
+
readonly id: "timestamps";
|
|
17
|
+
readonly mode: "native";
|
|
18
|
+
}];
|
|
15
19
|
}];
|
|
16
20
|
private readonly apiKey;
|
|
17
21
|
private readonly baseURL;
|
|
@@ -24,10 +28,12 @@ export declare class ResembleSpeechProvider implements SpeechProvider<string, st
|
|
|
24
28
|
providerOptions?: Record<string, unknown>;
|
|
25
29
|
abortSignal?: AbortSignal;
|
|
26
30
|
headers?: Record<string, string>;
|
|
31
|
+
includeTimestamps?: boolean;
|
|
27
32
|
}): Promise<{
|
|
28
33
|
audio: string;
|
|
29
34
|
mediaType: string;
|
|
30
35
|
providerMetadata?: Record<string, unknown>;
|
|
36
|
+
timestamps?: WordTimestamp[];
|
|
31
37
|
}>;
|
|
32
38
|
stream(options: {
|
|
33
39
|
modelId: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC9E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAMzD,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,YAAY,aAAa;IAElC,QAAQ,CAAC,MAAM;;;;;;;;OAoCJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAM1C,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;IA8CI,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IAqCF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;CAYjC;AAED,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAG7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMlE"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
2
|
+
import { audioTimestampsToWordTimestamps, } from "./alignment.js";
|
|
2
3
|
export class ResembleSpeechProvider {
|
|
3
4
|
id = "resemble";
|
|
4
5
|
defaultModel = "default";
|
|
@@ -31,7 +32,12 @@ export class ResembleSpeechProvider {
|
|
|
31
32
|
"tr",
|
|
32
33
|
"zh",
|
|
33
34
|
],
|
|
34
|
-
features: [
|
|
35
|
+
features: [
|
|
36
|
+
"streaming",
|
|
37
|
+
"open-source",
|
|
38
|
+
"inline-voice-cloning",
|
|
39
|
+
{ id: "timestamps", mode: "native" },
|
|
40
|
+
],
|
|
35
41
|
},
|
|
36
42
|
];
|
|
37
43
|
apiKey;
|
|
@@ -61,10 +67,16 @@ export class ResembleSpeechProvider {
|
|
|
61
67
|
signal: options.abortSignal,
|
|
62
68
|
});
|
|
63
69
|
await handleErrorResponse(response, `resemble/${options.modelId}`);
|
|
70
|
+
// Resemble always returns `audio_timestamps`; gate the projection on
|
|
71
|
+
// the caller's opt-in rather than the presence of the field.
|
|
64
72
|
const json = (await response.json());
|
|
73
|
+
const timestamps = options.includeTimestamps && json.audio_timestamps
|
|
74
|
+
? audioTimestampsToWordTimestamps(json.audio_timestamps)
|
|
75
|
+
: undefined;
|
|
65
76
|
return {
|
|
66
77
|
audio: json.audio_content,
|
|
67
78
|
mediaType: "audio/wav",
|
|
79
|
+
timestamps,
|
|
68
80
|
};
|
|
69
81
|
}
|
|
70
82
|
async stream(options) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,+BAA+B,GAEhC,MAAM,gBAAgB,CAAC;AAQxB,MAAM,OAAO,sBAAsB;IACxB,EAAE,GAAG,UAAU,CAAC;IAChB,YAAY,GAAG,SAAS,CAAC;IAEzB,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,SAAS;YACb,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE;gBACT,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;aACL;YACD,QAAQ,EAAE;gBACR,WAAW;gBACX,aAAa;gBACb,sBAAsB;gBACtB,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE;aACrC;SACF;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAoC;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,+BAA+B,CAAC;QACjE,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAQd;QAMC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,aAAa,CAAC;QAEzC,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,OAAO,CAAC,KAAK;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,aAAa,CAC1B,IAAI,CAAC,MAAM,EACX,kBAAkB,EAClB,UAAU,CACX;gBACD,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,qEAAqE;QACrE,6DAA6D;QAC7D,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAGlC,CAAC;QAEF,MAAM,UAAU,GACd,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC,gBAAgB;YAChD,CAAC,CAAC,+BAA+B,CAAC,IAAI,CAAC,gBAAgB,CAAC;YACxD,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,aAAa;YACzB,SAAS,EAAE,WAAW;YACtB,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,SAAS,CAAC;QAErC,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,OAAO,CAAC,KAAK;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,aAAa,CAC1B,IAAI,CAAC,MAAM,EACX,kBAAkB,EAClB,UAAU,CACX;gBACD,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,YAAY,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACvE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW;SAC/D,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,qEAAqE;YACrE,oEAAoE;YACpE,qCAAqC;YACrC,OAAO;gBACL,eAAe,EAAE,EAAE,SAAS,EAAE,QAAQ,EAAE;gBACxC,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;CACF;AAED,MAAM,UAAU,cAAc,CAAC,SAAuC,EAAE;IACtE,MAAM,QAAQ,GAAG,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAEpD,OAAO,SAAS,QAAQ,CAAC,OAAgB;QACvC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolve-provider.d.ts","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"resolve-provider.d.ts","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"AAeA,OAAO,KAAK,EAAE,aAAa,EAAkB,MAAM,sBAAsB,CAAC;AAgD1E,wBAAgB,YAAY,CAC1B,KAAK,EAAE,MAAM,GAAG,aAAa,EAC7B,OAAO,CAAC,EAAE;IAAE,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5B,aAAa,CAWf"}
|
package/dist/resolve-provider.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { SpeechSDKError } from "./errors.js";
|
|
2
|
+
import { parseProviderModelSpec } from "./provider-utils.js";
|
|
2
3
|
import { CartesiaSpeechProvider } from "./providers/cartesia/index.js";
|
|
3
4
|
import { DeepgramSpeechProvider } from "./providers/deepgram/index.js";
|
|
4
5
|
import { ElevenLabsSpeechProvider } from "./providers/elevenlabs/index.js";
|
|
@@ -55,21 +56,11 @@ export function resolveModel(model, options) {
|
|
|
55
56
|
if (isResolvedModel(model)) {
|
|
56
57
|
return model;
|
|
57
58
|
}
|
|
58
|
-
const
|
|
59
|
-
let providerName;
|
|
60
|
-
let modelId;
|
|
61
|
-
if (slashIndex === -1) {
|
|
62
|
-
providerName = model;
|
|
63
|
-
modelId = undefined;
|
|
64
|
-
}
|
|
65
|
-
else {
|
|
66
|
-
providerName = model.slice(0, slashIndex);
|
|
67
|
-
modelId = model.slice(slashIndex + 1);
|
|
68
|
-
}
|
|
59
|
+
const { providerName, modelId } = parseProviderModelSpec(model);
|
|
69
60
|
const provider = createBuiltinProvider(providerName, options);
|
|
70
61
|
return {
|
|
71
62
|
provider,
|
|
72
|
-
modelId: modelId
|
|
63
|
+
modelId: modelId ?? provider.defaultModel,
|
|
73
64
|
};
|
|
74
65
|
}
|
|
75
66
|
//# sourceMappingURL=resolve-provider.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolve-provider.js","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,wBAAwB,EAAE,MAAM,iCAAiC,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAG7D,SAAS,eAAe,CAAC,KAAc;IACrC,OAAO,CACL,KAAK,IAAI,IAAI;QACb,OAAO,KAAK,KAAK,QAAQ;QACzB,UAAU,IAAI,KAAK;QACnB,SAAS,IAAI,KAAK,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAC5B,IAAY,EACZ,OAA6B;IAE7B,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACjE,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,QAAQ;YACX,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAC1C,KAAK,YAAY;YACf,OAAO,IAAI,wBAAwB,CAAC,MAAM,CAAC,CAAC;QAC9C,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,MAAM;YACT,OAAO,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;QACxC,KAAK,SAAS;YACZ,OAAO,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,QAAQ;YACX,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAC1C,KAAK,YAAY;YACf,OAAO,IAAI,uBAAuB,CAAC,MAAM,CAAC,CAAC;QAC7C,KAAK,MAAM;YACT,OAAO,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;QACxC,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,QAAQ;YACX,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACvC,KAAK,SAAS;YACZ,OAAO,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,KAAK;YACR,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACvC;YACE,MAAM,IAAI,cAAc,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;IAC1D,CAAC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,KAA6B,EAC7B,OAA6B;IAE7B,IAAI,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,
|
|
1
|
+
{"version":3,"file":"resolve-provider.js","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,EAAE,sBAAsB,EAAE,MAAM,qBAAqB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,wBAAwB,EAAE,MAAM,iCAAiC,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAG7D,SAAS,eAAe,CAAC,KAAc;IACrC,OAAO,CACL,KAAK,IAAI,IAAI;QACb,OAAO,KAAK,KAAK,QAAQ;QACzB,UAAU,IAAI,KAAK;QACnB,SAAS,IAAI,KAAK,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAC5B,IAAY,EACZ,OAA6B;IAE7B,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACjE,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,QAAQ;YACX,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAC1C,KAAK,YAAY;YACf,OAAO,IAAI,wBAAwB,CAAC,MAAM,CAAC,CAAC;QAC9C,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,MAAM;YACT,OAAO,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;QACxC,KAAK,SAAS;YACZ,OAAO,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,QAAQ;YACX,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAC1C,KAAK,YAAY;YACf,OAAO,IAAI,uBAAuB,CAAC,MAAM,CAAC,CAAC;QAC7C,KAAK,MAAM;YACT,OAAO,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;QACxC,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,QAAQ;YACX,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACvC,KAAK,SAAS;YACZ,OAAO,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,KAAK;YACR,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACvC;YACE,MAAM,IAAI,cAAc,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;IAC1D,CAAC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,KAA6B,EAC7B,OAA6B;IAE7B,IAAI,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,EAAE,YAAY,EAAE,OAAO,EAAE,GAAG,sBAAsB,CAAC,KAAK,CAAC,CAAC;IAChE,MAAM,QAAQ,GAAG,qBAAqB,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;IAC9D,OAAO;QACL,QAAQ;QACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;KAC1C,CAAC;AACJ,CAAC"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { WordTimestamp } from "./timestamps.js";
|
|
1
2
|
export type Voice = string | {
|
|
2
3
|
url: string;
|
|
3
4
|
} | {
|
|
@@ -6,13 +7,31 @@ export type Voice = string | {
|
|
|
6
7
|
/**
|
|
7
8
|
* A capability supported by a model. Today every feature is just an id (a
|
|
8
9
|
* string), meaning "this model has feature X". The union also accepts an
|
|
9
|
-
* object form `{ id, ...params }` so
|
|
10
|
-
*
|
|
11
|
-
*
|
|
10
|
+
* object form `{ id, ...params }` so features that need parameters (e.g.
|
|
11
|
+
* `timestamps` with a `mode`) can extend the type without breaking
|
|
12
|
+
* existing string-based features.
|
|
12
13
|
*/
|
|
13
|
-
export type Feature = string | {
|
|
14
|
+
export type Feature = string | TimestampsFeature | {
|
|
14
15
|
readonly id: string;
|
|
15
16
|
};
|
|
17
|
+
/**
|
|
18
|
+
* Per-model word-timestamp capability.
|
|
19
|
+
*
|
|
20
|
+
* - `"native"`: the TTS endpoint returns word-level alignment directly in
|
|
21
|
+
* its response (e.g., ElevenLabs `/with-timestamps`, Cartesia SSE).
|
|
22
|
+
* - `"derived"`: no native alignment; `timestamps: "on"` pipes the generated
|
|
23
|
+
* audio through an STT round-trip to produce word timings. Extra cost and
|
|
24
|
+
* latency, but works with any provider that has a usable STT API.
|
|
25
|
+
*
|
|
26
|
+
* Providers without any viable path (same-vendor STT missing or word-level
|
|
27
|
+
* unavailable) declare no TIMESTAMPS feature; `timestamps: "on"` routes them
|
|
28
|
+
* through the default `timestampProvider` (OpenAI Whisper) with a clear
|
|
29
|
+
* error when no fallback key is configured.
|
|
30
|
+
*/
|
|
31
|
+
export interface TimestampsFeature {
|
|
32
|
+
readonly id: "timestamps";
|
|
33
|
+
readonly mode: "native" | "derived";
|
|
34
|
+
}
|
|
16
35
|
export interface ModelInfo {
|
|
17
36
|
readonly features: readonly Feature[];
|
|
18
37
|
readonly id: string;
|
|
@@ -25,6 +44,7 @@ export declare const FEATURES: {
|
|
|
25
44
|
readonly AUDIO_TAGS: "audio-tags";
|
|
26
45
|
readonly INLINE_VOICE_CLONING: "inline-voice-cloning";
|
|
27
46
|
readonly OPEN_SOURCE: "open-source";
|
|
47
|
+
readonly TIMESTAMPS: "timestamps";
|
|
28
48
|
};
|
|
29
49
|
export declare function hasFeature(model: ModelInfo, id: string): boolean;
|
|
30
50
|
export declare function getFeature<T extends {
|
|
@@ -44,11 +64,20 @@ export interface SpeechProvider<TModel extends string = string, TVoice extends V
|
|
|
44
64
|
providerOptions?: Record<string, unknown>;
|
|
45
65
|
abortSignal?: AbortSignal;
|
|
46
66
|
headers?: Record<string, string>;
|
|
67
|
+
/**
|
|
68
|
+
* Hint from the orchestrator that the caller wants word timestamps. A
|
|
69
|
+
* provider that supports native alignment should switch to its timestamp
|
|
70
|
+
* endpoint (e.g., ElevenLabs `/with-timestamps`) and populate `timestamps`
|
|
71
|
+
* in the return. Providers without native support ignore this flag; the
|
|
72
|
+
* orchestrator then routes through an STT fallback.
|
|
73
|
+
*/
|
|
74
|
+
includeTimestamps?: boolean;
|
|
47
75
|
}): Promise<{
|
|
48
76
|
audio: string | Uint8Array;
|
|
49
77
|
audioDurationMs?: number;
|
|
50
78
|
mediaType: string;
|
|
51
79
|
providerMetadata?: Record<string, unknown>;
|
|
80
|
+
timestamps?: WordTimestamp[];
|
|
52
81
|
}>;
|
|
53
82
|
generateDialogue?(options: {
|
|
54
83
|
modelId: string;
|
|
@@ -59,11 +88,20 @@ export interface SpeechProvider<TModel extends string = string, TVoice extends V
|
|
|
59
88
|
providerOptions?: Record<string, unknown>;
|
|
60
89
|
abortSignal?: AbortSignal;
|
|
61
90
|
headers?: Record<string, string>;
|
|
91
|
+
/**
|
|
92
|
+
* Hint that the caller wants word timestamps. A dialogue provider with a
|
|
93
|
+
* native timestamp endpoint (e.g., ElevenLabs text-to-dialogue with
|
|
94
|
+
* alignment) should switch to it and populate `timestamps` in the
|
|
95
|
+
* return. Providers without native support ignore the flag; the
|
|
96
|
+
* conversation orchestrator then falls back to STT on the mixed audio.
|
|
97
|
+
*/
|
|
98
|
+
includeTimestamps?: boolean;
|
|
62
99
|
}): Promise<{
|
|
63
100
|
audio: string | Uint8Array;
|
|
64
101
|
audioDurationMs?: number;
|
|
65
102
|
mediaType: string;
|
|
66
103
|
providerMetadata?: Record<string, unknown>;
|
|
104
|
+
timestamps?: WordTimestamp[];
|
|
67
105
|
}>;
|
|
68
106
|
getStitchOptions?(modelId: string): {
|
|
69
107
|
providerOptions: Record<string, unknown>;
|
|
@@ -93,4 +131,10 @@ export interface ResolvedModel<TVoice extends Voice = Voice> {
|
|
|
93
131
|
modelId: string;
|
|
94
132
|
provider: SpeechProvider<string, TVoice>;
|
|
95
133
|
}
|
|
134
|
+
/**
|
|
135
|
+
* Returns true when the resolved model declares `{ id: "timestamps", mode: "native" }`
|
|
136
|
+
* in its features (i.e., its TTS endpoint returns alignment data directly in the
|
|
137
|
+
* response, no STT round-trip needed).
|
|
138
|
+
*/
|
|
139
|
+
export declare function modelDeclaresNativeTimestamps(resolved: ResolvedModel): boolean;
|
|
96
140
|
//# sourceMappingURL=speech-provider.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-provider.d.ts","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,KAAK,EAAE,MAAM,GAAG,UAAU,CAAA;CAAE,CAAC;AAE9E;;;;;;GAMG;AACH,MAAM,MAAM,OAAO,GAAG,MAAM,GAAG;IAAE,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"speech-provider.d.ts","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,KAAK,EAAE,MAAM,GAAG,UAAU,CAAA;CAAE,CAAC;AAE9E;;;;;;GAMG;AACH,MAAM,MAAM,OAAO,GAAG,MAAM,GAAG,iBAAiB,GAAG;IAAE,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC;AAE3E;;;;;;;;;;;;;GAaG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,EAAE,YAAY,CAAC;IAC1B,QAAQ,CAAC,IAAI,EAAE,QAAQ,GAAG,SAAS,CAAC;CACrC;AAED,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,QAAQ,EAAE,SAAS,OAAO,EAAE,CAAC;IACtC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,2EAA2E;AAC3E,eAAO,MAAM,QAAQ;;;;;;CAMX,CAAC;AAEX,wBAAgB,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAOhE;AAED,wBAAgB,UAAU,CAAC,CAAC,SAAS;IAAE,EAAE,EAAE,MAAM,CAAA;CAAE,EACjD,KAAK,EAAE,SAAS,EAChB,EAAE,EAAE,MAAM,GACT,CAAC,GAAG,SAAS,CAOf;AAED,MAAM,WAAW,cAAc,CAC7B,MAAM,SAAS,MAAM,GAAG,MAAM,EAC9B,MAAM,SAAS,KAAK,GAAG,KAAK;IAE5B,YAAY,EAAE,MAAM,CAAC;IAErB,oBAAoB,CAAC,CAAC,OAAO,EAAE,MAAM,GACjC;QACE,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,aAAa,CAAC,EAAE,MAAM,CAAC;KACxB,GACD,SAAS,CAAC;IAEd,QAAQ,CAAC,OAAO,EAAE;QAChB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC;;;;;;WAMG;QACH,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;QAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC,CAAC;IAEH,gBAAgB,CAAC,CAAC,OAAO,EAAE;QACzB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,SAAS;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;QAClD,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC;;;;;;WAMG;QACH,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;QAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC,CAAC;IAEH,gBAAgB,CAAC,CAAC,OAAO,EAAE,MAAM,GAC7B;QACE,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,GACD,SAAS,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,SAAS,SAAS,EAAE,CAAC;IAE7B,gBAAgB,CAAC,CACf,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAExC,MAAM,CAAC,CAAC,OAAO,EAAE;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,aAAa,CAAC,MAAM,SAAS,KAAK,GAAG,KAAK;IACzD,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC1C;AAED;;;;GAIG;AACH,wBAAgB,6BAA6B,CAC3C,QAAQ,EAAE,aAAa,GACtB,OAAO,CAWT"}
|
package/dist/speech-provider.js
CHANGED
|
@@ -4,6 +4,7 @@ export const FEATURES = {
|
|
|
4
4
|
AUDIO_TAGS: "audio-tags",
|
|
5
5
|
INLINE_VOICE_CLONING: "inline-voice-cloning",
|
|
6
6
|
OPEN_SOURCE: "open-source",
|
|
7
|
+
TIMESTAMPS: "timestamps",
|
|
7
8
|
};
|
|
8
9
|
export function hasFeature(model, id) {
|
|
9
10
|
for (const f of model.features) {
|
|
@@ -21,4 +22,19 @@ export function getFeature(model, id) {
|
|
|
21
22
|
}
|
|
22
23
|
return undefined;
|
|
23
24
|
}
|
|
25
|
+
/**
|
|
26
|
+
* Returns true when the resolved model declares `{ id: "timestamps", mode: "native" }`
|
|
27
|
+
* in its features (i.e., its TTS endpoint returns alignment data directly in the
|
|
28
|
+
* response, no STT round-trip needed).
|
|
29
|
+
*/
|
|
30
|
+
export function modelDeclaresNativeTimestamps(resolved) {
|
|
31
|
+
// `.models` is required by the SpeechProvider interface but we use optional
|
|
32
|
+
// chaining so tests/mocks that omit it don't crash here.
|
|
33
|
+
const modelInfo = resolved.provider.models?.find((m) => m.id === resolved.modelId);
|
|
34
|
+
if (!modelInfo) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
const feature = getFeature(modelInfo, "timestamps");
|
|
38
|
+
return feature?.mode === "native";
|
|
39
|
+
}
|
|
24
40
|
//# sourceMappingURL=speech-provider.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-provider.js","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"speech-provider.js","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAuCA,2EAA2E;AAC3E,MAAM,CAAC,MAAM,QAAQ,GAAG;IACtB,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,YAAY;IACxB,oBAAoB,EAAE,sBAAsB;IAC5C,WAAW,EAAE,aAAa;IAC1B,UAAU,EAAE,YAAY;CAChB,CAAC;AAEX,MAAM,UAAU,UAAU,CAAC,KAAgB,EAAE,EAAU;IACrD,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC/B,IAAI,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YACnD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,UAAU,CACxB,KAAgB,EAChB,EAAU;IAEV,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC/B,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YACzC,OAAO,CAAM,CAAC;QAChB,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AA+FD;;;;GAIG;AACH,MAAM,UAAU,6BAA6B,CAC3C,QAAuB;IAEvB,4EAA4E;IAC5E,yDAAyD;IACzD,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAC9C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,OAAO,GAAG,UAAU,CAAoB,SAAS,EAAE,YAAY,CAAC,CAAC;IACvE,OAAO,OAAO,EAAE,IAAI,KAAK,QAAQ,CAAC;AACpC,CAAC"}
|
package/dist/speech-result.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { SpeechMetadata } from "./metadata.js";
|
|
2
|
+
import type { WordTimestamp } from "./timestamps.js";
|
|
2
3
|
export interface GeneratedAudioFile {
|
|
3
4
|
readonly base64: string;
|
|
4
5
|
readonly mediaType: string;
|
|
@@ -8,6 +9,15 @@ export interface SpeechResult {
|
|
|
8
9
|
readonly audio: GeneratedAudioFile;
|
|
9
10
|
readonly metadata: SpeechMetadata;
|
|
10
11
|
readonly providerMetadata?: Record<string, unknown>;
|
|
12
|
+
/**
|
|
13
|
+
* Word-level alignment data. Populated when `timestamps: "on"` or when
|
|
14
|
+
* `timestamps: "auto"` (default) is combined with a TTS provider that
|
|
15
|
+
* returns alignment natively. Undefined otherwise.
|
|
16
|
+
*
|
|
17
|
+
* Timestamps are always word-granularity with start/end in seconds.
|
|
18
|
+
* Character- or phoneme-level native data is aggregated internally.
|
|
19
|
+
*/
|
|
20
|
+
readonly timestamps?: readonly WordTimestamp[];
|
|
11
21
|
readonly warnings?: string[];
|
|
12
22
|
}
|
|
13
23
|
export declare class DefaultGeneratedAudioFile implements GeneratedAudioFile {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-result.d.ts","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"speech-result.d.ts","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,KAAK,EAAE,kBAAkB,CAAC;IACnC,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAClC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpD;;;;;;;OAOG;IACH,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IAC/C,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,qBAAa,yBAA0B,YAAW,kBAAkB;IAClE,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAE3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsB;IAC5C,OAAO,CAAC,WAAW,CAAC,CAAa;IACjC,OAAO,CAAC,OAAO,CAAC,CAAS;gBAEb,EACV,IAAI,EACJ,SAAS,GACV,EAAE;QAAE,IAAI,EAAE,MAAM,GAAG,UAAU,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE;IAKnD,IAAI,UAAU,IAAI,UAAU,CAe3B;IAED,IAAI,MAAM,IAAI,MAAM,CAcnB;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-result.js","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"speech-result.js","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAyBA,MAAM,OAAO,yBAAyB;IAC3B,SAAS,CAAS;IAEV,KAAK,CAAsB;IACpC,WAAW,CAAc;IACzB,OAAO,CAAU;IAEzB,YAAY,EACV,IAAI,EACJ,SAAS,GACwC;QACjD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,IAAI,UAAU;QACZ,IAAI,IAAI,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7B,OAAO,IAAI,CAAC,WAAW,CAAC;QAC1B,CAAC;QACD,IAAI,IAAI,CAAC,KAAK,YAAY,UAAU,EAAE,CAAC;YACrC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC;QAChC,CAAC;aAAM,CAAC;YACN,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACtC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACxC,CAAC;YACD,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;QAC3B,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,IAAI,MAAM;QACR,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,IAAI,YAAY,GAAG,EAAE,CAAC;YACtB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC9B,YAAY,IAAI,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YAC5C,CAAC;YACD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC;QACpC,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type { WordTimestamp } from "./timestamps.js";
|
|
2
|
+
/**
|
|
3
|
+
* Minimal info about an STT model. Parallels `ModelInfo` on the TTS side.
|
|
4
|
+
*/
|
|
5
|
+
export interface STTModelInfo {
|
|
6
|
+
readonly id: string;
|
|
7
|
+
readonly languages: readonly string[];
|
|
8
|
+
readonly releaseDate: string;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Transcribes generated audio and returns word-level timestamps. This is the
|
|
12
|
+
* "derived" path for `timestamps: "on"` — used when the TTS provider doesn't
|
|
13
|
+
* return alignment data natively.
|
|
14
|
+
*
|
|
15
|
+
* Providers return `WordTimestamp[]` with start/end in seconds. Normalization
|
|
16
|
+
* (ms → seconds, char/phoneme aggregation, tuple → object) happens inside the
|
|
17
|
+
* provider adapter so the public surface is uniform.
|
|
18
|
+
*/
|
|
19
|
+
export interface SpeechToTextProvider {
|
|
20
|
+
readonly defaultModel: string;
|
|
21
|
+
readonly id: string;
|
|
22
|
+
readonly models: readonly STTModelInfo[];
|
|
23
|
+
transcribe(options: {
|
|
24
|
+
modelId: string;
|
|
25
|
+
audio: Uint8Array;
|
|
26
|
+
mediaType: string;
|
|
27
|
+
language?: string;
|
|
28
|
+
abortSignal?: AbortSignal;
|
|
29
|
+
headers?: Record<string, string>;
|
|
30
|
+
}): Promise<{
|
|
31
|
+
timestamps: WordTimestamp[];
|
|
32
|
+
text?: string;
|
|
33
|
+
providerMetadata?: Record<string, unknown>;
|
|
34
|
+
}>;
|
|
35
|
+
}
|
|
36
|
+
export interface ResolvedSTTModel {
|
|
37
|
+
readonly modelId: string;
|
|
38
|
+
readonly provider: SpeechToTextProvider;
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=speech-to-text-provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"speech-to-text-provider.d.ts","sourceRoot":"","sources":["../src/speech-to-text-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,MAAM,EAAE,SAAS,YAAY,EAAE,CAAC;IAEzC,UAAU,CAAC,OAAO,EAAE;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,UAAU,EAAE,aAAa,EAAE,CAAC;QAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,QAAQ,EAAE,oBAAoB,CAAC;CACzC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"speech-to-text-provider.js","sourceRoot":"","sources":["../src/speech-to-text-provider.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { ResolvedSTTModel, SpeechToTextProvider } from "../../speech-to-text-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
3
|
+
export interface OpenAISpeechToTextProviderConfig {
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
baseURL?: string;
|
|
6
|
+
fetch?: typeof globalThis.fetch;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* OpenAI Whisper / gpt-4o-transcribe adapter for the SDK's derived-timestamps
|
|
10
|
+
* path. Uses `/v1/audio/transcriptions` with `timestamp_granularities: ["word"]`
|
|
11
|
+
* and `response_format: "verbose_json"`.
|
|
12
|
+
*
|
|
13
|
+
* Note: `gpt-4o-transcribe-diarize` is intentionally not listed — that
|
|
14
|
+
* variant does not support `timestamp_granularities`.
|
|
15
|
+
*/
|
|
16
|
+
export declare class OpenAISpeechToTextProvider implements SpeechToTextProvider {
|
|
17
|
+
readonly id = "openai";
|
|
18
|
+
readonly defaultModel = "whisper-1";
|
|
19
|
+
readonly models: readonly [{
|
|
20
|
+
readonly id: "whisper-1";
|
|
21
|
+
readonly releaseDate: "2023-03-01";
|
|
22
|
+
readonly languages: readonly ["af", "ar", "az", "be", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en", "es", "et", "fa", "fi", "fr", "gl", "he", "hi", "hr", "hu", "hy", "id", "is", "it", "ja", "kk", "kn", "ko", "lt", "lv", "mi", "mk", "mr", "ms", "ne", "nl", "no", "pl", "pt", "ro", "ru", "sk", "sl", "sr", "sv", "sw", "ta", "th", "tl", "tr", "uk", "ur", "vi", "zh"];
|
|
23
|
+
}];
|
|
24
|
+
private readonly apiKey;
|
|
25
|
+
private readonly baseURL;
|
|
26
|
+
private readonly fetchFn;
|
|
27
|
+
constructor(config?: OpenAISpeechToTextProviderConfig);
|
|
28
|
+
transcribe(options: {
|
|
29
|
+
modelId: string;
|
|
30
|
+
audio: Uint8Array;
|
|
31
|
+
mediaType: string;
|
|
32
|
+
language?: string;
|
|
33
|
+
abortSignal?: AbortSignal;
|
|
34
|
+
headers?: Record<string, string>;
|
|
35
|
+
}): Promise<{
|
|
36
|
+
timestamps: WordTimestamp[];
|
|
37
|
+
text?: string;
|
|
38
|
+
providerMetadata?: Record<string, unknown>;
|
|
39
|
+
}>;
|
|
40
|
+
}
|
|
41
|
+
export declare function createOpenAISTT(config?: OpenAISpeechToTextProviderConfig): (modelId?: string) => ResolvedSTTModel;
|
|
42
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/stt-providers/openai/index.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EACV,gBAAgB,EAChB,oBAAoB,EACrB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,MAAM,WAAW,gCAAgC;IAC/C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAiED;;;;;;;GAOG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,YAAY,eAAe;IAMpC,QAAQ,CAAC,MAAM;;;;OAMJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,GAAE,gCAAqC;IAMnD,UAAU,CAAC,OAAO,EAAE;QACxB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,UAAU,EAAE,aAAa,EAAE,CAAC;QAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAsDH;AAED,wBAAgB,eAAe,CAAC,MAAM,GAAE,gCAAqC,IAGjD,UAAU,MAAM,KAAG,gBAAgB,CAM9D"}
|