@speech-sdk/core 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -21
- package/README.md +215 -269
- package/dist/__tests__/e2e/_save-audio.d.ts +51 -2
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +139 -11
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-utils.d.ts +2 -0
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +9 -0
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +137 -0
- package/dist/captions.d.ts.map +1 -0
- package/dist/captions.js +283 -0
- package/dist/captions.js.map +1 -0
- package/dist/conversation/stitch.d.ts +5 -0
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +37 -0
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +16 -0
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +0 -6
- package/dist/conversation/validate.js.map +1 -1
- package/dist/derive-timestamps.d.ts +14 -0
- package/dist/derive-timestamps.d.ts.map +1 -0
- package/dist/derive-timestamps.js +38 -0
- package/dist/derive-timestamps.js.map +1 -0
- package/dist/errors.d.ts +25 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +28 -0
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +2 -1
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +72 -0
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +18 -1
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +73 -16
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +6 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +2 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +40 -0
- package/dist/logger.js.map +1 -0
- package/dist/provider-utils.d.ts +8 -0
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +16 -2
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +24 -0
- package/dist/providers/cartesia/alignment.d.ts.map +1 -0
- package/dist/providers/cartesia/alignment.js +23 -0
- package/dist/providers/cartesia/alignment.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +12 -2
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +137 -2
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +24 -0
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -0
- package/dist/providers/elevenlabs/alignment.js +48 -0
- package/dist/providers/elevenlabs/alignment.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +19 -4
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +83 -13
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +0 -25
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +3 -58
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +38 -0
- package/dist/providers/hume/alignment.d.ts.map +1 -0
- package/dist/providers/hume/alignment.js +31 -0
- package/dist/providers/hume/alignment.js.map +1 -0
- package/dist/providers/hume/index.d.ts +8 -1
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +75 -1
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +25 -0
- package/dist/providers/inworld/alignment.d.ts.map +1 -0
- package/dist/providers/inworld/alignment.js +23 -0
- package/dist/providers/inworld/alignment.js.map +1 -0
- package/dist/providers/inworld/index.d.ts +11 -2
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +11 -2
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +22 -0
- package/dist/providers/murf/alignment.d.ts.map +1 -0
- package/dist/providers/murf/alignment.js +17 -0
- package/dist/providers/murf/alignment.js.map +1 -0
- package/dist/providers/murf/index.d.ts +8 -1
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +10 -1
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +12 -3
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +7 -3
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +32 -0
- package/dist/providers/resemble/alignment.d.ts.map +1 -0
- package/dist/providers/resemble/alignment.js +57 -0
- package/dist/providers/resemble/alignment.js.map +1 -0
- package/dist/providers/resemble/index.d.ts +7 -1
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +13 -1
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +3 -12
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +48 -4
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +16 -0
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +10 -0
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +40 -0
- package/dist/speech-to-text-provider.d.ts.map +1 -0
- package/dist/speech-to-text-provider.js +2 -0
- package/dist/speech-to-text-provider.js.map +1 -0
- package/dist/stt-providers/openai/index.d.ts +42 -0
- package/dist/stt-providers/openai/index.d.ts.map +1 -0
- package/dist/stt-providers/openai/index.js +184 -0
- package/dist/stt-providers/openai/index.js.map +1 -0
- package/dist/timestamps.d.ts +23 -0
- package/dist/timestamps.d.ts.map +1 -0
- package/dist/timestamps.js +2 -0
- package/dist/timestamps.js.map +1 -0
- package/package.json +6 -2
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
3
|
export interface MurfSpeechProviderConfig {
|
|
3
4
|
apiKey?: string;
|
|
4
5
|
baseURL?: string;
|
|
@@ -11,7 +12,10 @@ export declare class MurfSpeechProvider implements SpeechProvider<string, string
|
|
|
11
12
|
readonly id: "GEN2";
|
|
12
13
|
readonly releaseDate: "2025-01-01";
|
|
13
14
|
readonly languages: readonly ["en", "de", "es", "fr", "zh", "ar", "hi", "bn", "ta", "pt", "it", "ja", "ko", "nl", "pl", "ru", "sv", "tr", "id", "ms", "tl", "cs", "fi", "th", "vi", "da", "no", "ro", "el", "hu", "uk", "sk", "bg"];
|
|
14
|
-
readonly features: readonly ["streaming"
|
|
15
|
+
readonly features: readonly ["streaming", {
|
|
16
|
+
readonly id: "timestamps";
|
|
17
|
+
readonly mode: "native";
|
|
18
|
+
}];
|
|
15
19
|
}, {
|
|
16
20
|
readonly id: "FALCON";
|
|
17
21
|
readonly releaseDate: "2025-01-01";
|
|
@@ -29,10 +33,13 @@ export declare class MurfSpeechProvider implements SpeechProvider<string, string
|
|
|
29
33
|
providerOptions?: Record<string, unknown>;
|
|
30
34
|
abortSignal?: AbortSignal;
|
|
31
35
|
headers?: Record<string, string>;
|
|
36
|
+
includeTimestamps?: boolean;
|
|
32
37
|
}): Promise<{
|
|
33
38
|
audio: string | Uint8Array;
|
|
39
|
+
audioDurationMs?: number;
|
|
34
40
|
mediaType: string;
|
|
35
41
|
providerMetadata?: Record<string, unknown>;
|
|
42
|
+
timestamps?: WordTimestamp[];
|
|
36
43
|
}>;
|
|
37
44
|
stream(options: {
|
|
38
45
|
modelId: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/murf/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/murf/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC9E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAMzD,MAAM,WAAW,wBAAwB;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,kBAAmB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IACvE,QAAQ,CAAC,EAAE,UAAU;IACrB,QAAQ,CAAC,YAAY,UAAU;IAE/B,QAAQ,CAAC,MAAM;;;;;;;;;;;;;OA+CJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,wBAAwB;IAMtC,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;QAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;IA8DI,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IAkCF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;CAYjC;AAED,wBAAgB,UAAU,CAAC,MAAM,GAAE,wBAA6B,IAGzC,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAM9D"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
2
|
+
import { wordDurationsToWordTimestamps, } from "./alignment.js";
|
|
2
3
|
export class MurfSpeechProvider {
|
|
3
4
|
id = "murf";
|
|
4
5
|
defaultModel = "GEN2";
|
|
@@ -41,7 +42,7 @@ export class MurfSpeechProvider {
|
|
|
41
42
|
"sk",
|
|
42
43
|
"bg",
|
|
43
44
|
],
|
|
44
|
-
features: ["streaming"],
|
|
45
|
+
features: ["streaming", { id: "timestamps", mode: "native" }],
|
|
45
46
|
},
|
|
46
47
|
{
|
|
47
48
|
id: "FALCON",
|
|
@@ -95,9 +96,17 @@ export class MurfSpeechProvider {
|
|
|
95
96
|
};
|
|
96
97
|
}
|
|
97
98
|
const json = (await response.json());
|
|
99
|
+
const audioDurationMs = typeof json.audioLengthInSeconds === "number"
|
|
100
|
+
? Math.round(json.audioLengthInSeconds * 1000)
|
|
101
|
+
: undefined;
|
|
102
|
+
const timestamps = options.includeTimestamps && json.wordDurations
|
|
103
|
+
? wordDurationsToWordTimestamps(json.wordDurations)
|
|
104
|
+
: undefined;
|
|
98
105
|
return {
|
|
99
106
|
audio: json.encodedAudio,
|
|
107
|
+
audioDurationMs,
|
|
100
108
|
mediaType: "audio/wav",
|
|
109
|
+
timestamps,
|
|
101
110
|
};
|
|
102
111
|
}
|
|
103
112
|
async stream(options) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/murf/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/murf/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EAEL,6BAA6B,GAC9B,MAAM,gBAAgB,CAAC;AAQxB,MAAM,OAAO,kBAAkB;IACpB,EAAE,GAAG,MAAM,CAAC;IACZ,YAAY,GAAG,MAAM,CAAC;IAEtB,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,MAAM;YACV,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE;gBACT,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;aACL;YACD,QAAQ,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;SAC9D;QACD;YACE,EAAE,EAAE,QAAQ;YACZ,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,CAAC,IAAI,CAAC;YACjB,QAAQ,EAAE,CAAC,WAAW,CAAC;SACxB;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAgC;QAC1C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,wBAAwB,CAAC;QAC1D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAQd;QAOC,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,KAAK,QAAQ,CAAC;QAC9C,MAAM,GAAG,GAAG,QAAQ;YAClB,CAAC,CAAC,GAAG,IAAI,CAAC,OAAO,gBAAgB;YACjC,CAAC,CAAC,GAAG,IAAI,CAAC,OAAO,kBAAkB,CAAC;QAEtC,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,OAAO,EAAE,OAAO,CAAC,KAAK;YACtB,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC;QAC7B,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,SAAS,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,CAAC;gBAC7D,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAE/D,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;YACjD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW,CAAC;YACtE,OAAO;gBACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;gBAClC,SAAS;aACV,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAIlC,CAAC;QACF,MAAM,eAAe,GACnB,OAAO,IAAI,CAAC,oBAAoB,KAAK,QAAQ;YAC3C,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,oBAAoB,GAAG,IAAI,CAAC;YAC9C,CAAC,CAAC,SAAS,CAAC;QAChB,MAAM,UAAU,GACd,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC,aAAa;YAC7C,CAAC,CAAC,6BAA6B,CAAC,IAAI,CAAC,aAAa,CAAC;YACnD,CAAC,CAAC,SAAS,CAAC;QAChB,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,YAAY;YACxB,eAAe;YACf,SAAS,EAAE,WAAW;YACtB,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,gBAAgB,CAAC;QAE5C,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,OAAO,EAAE,OAAO,CAAC,KAAK;YACtB,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,KAAK,EAAE,OAAO,CAAC,OAAO;SACvB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,SAAS,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,CAAC;gBAC7D,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAE/D,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,QAAQ,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACnE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW;SAC/D,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,yEAAyE;YACzE,uEAAuE;YACvE,YAAY;YACZ,OAAO;gBACL,eAAe,EAAE,EAAE;gBACnB,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;CACF;AAED,MAAM,UAAU,UAAU,CAAC,SAAmC,EAAE;IAC9D,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAEhD,OAAO,SAAS,IAAI,CAAC,OAAgB;QACnC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -12,17 +12,26 @@ export declare class OpenAISpeechProvider implements SpeechProvider<string, stri
|
|
|
12
12
|
readonly id: "gpt-4o-mini-tts";
|
|
13
13
|
readonly releaseDate: "2025-03-20";
|
|
14
14
|
readonly languages: readonly ["af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en", "es", "et", "fi", "fr", "gl", "gu", "he", "hi", "hr", "hu", "id", "is", "it", "ja", "jv", "ka", "kk", "km", "kn", "ko", "lo", "lt", "lv", "mk", "ml", "mn", "mr", "ms", "my", "ne", "nl", "no", "pa", "pl", "pt", "ro", "ru", "si", "sk", "sl", "so", "sq", "sr", "su", "sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh"];
|
|
15
|
-
readonly features: readonly ["streaming", "audio-tags"
|
|
15
|
+
readonly features: readonly ["streaming", "audio-tags", {
|
|
16
|
+
readonly id: "timestamps";
|
|
17
|
+
readonly mode: "derived";
|
|
18
|
+
}];
|
|
16
19
|
}, {
|
|
17
20
|
readonly id: "tts-1";
|
|
18
21
|
readonly releaseDate: "2023-11-06";
|
|
19
22
|
readonly languages: readonly ["af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en", "es", "et", "fi", "fr", "gl", "gu", "he", "hi", "hr", "hu", "id", "is", "it", "ja", "jv", "ka", "kk", "km", "kn", "ko", "lo", "lt", "lv", "mk", "ml", "mn", "mr", "ms", "my", "ne", "nl", "no", "pa", "pl", "pt", "ro", "ru", "si", "sk", "sl", "so", "sq", "sr", "su", "sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh"];
|
|
20
|
-
readonly features: readonly ["streaming"
|
|
23
|
+
readonly features: readonly ["streaming", {
|
|
24
|
+
readonly id: "timestamps";
|
|
25
|
+
readonly mode: "derived";
|
|
26
|
+
}];
|
|
21
27
|
}, {
|
|
22
28
|
readonly id: "tts-1-hd";
|
|
23
29
|
readonly releaseDate: "2023-11-06";
|
|
24
30
|
readonly languages: readonly ["af", "ar", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en", "es", "et", "fi", "fr", "gl", "gu", "he", "hi", "hr", "hu", "id", "is", "it", "ja", "jv", "ka", "kk", "km", "kn", "ko", "lo", "lt", "lv", "mk", "ml", "mn", "mr", "ms", "my", "ne", "nl", "no", "pa", "pl", "pt", "ro", "ru", "si", "sk", "sl", "so", "sq", "sr", "su", "sv", "sw", "ta", "te", "th", "tl", "tr", "uk", "ur", "vi", "zh"];
|
|
25
|
-
readonly features: readonly ["streaming"
|
|
31
|
+
readonly features: readonly ["streaming", {
|
|
32
|
+
readonly id: "timestamps";
|
|
33
|
+
readonly mode: "derived";
|
|
34
|
+
}];
|
|
26
35
|
}];
|
|
27
36
|
private readonly apiKey;
|
|
28
37
|
private readonly baseURL;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/openai/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,cAAc,EACpB,MAAM,0BAA0B,CAAC;AAGlC,MAAM,WAAW,0BAA0B;IACzC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,oBAAqB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IACzE,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,YAAY,qBAAqB;IAE1C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAoEtB;IAEX,QAAQ,CAAC,MAAM
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/openai/index.ts"],"names":[],"mappings":"AAMA,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,cAAc,EACpB,MAAM,0BAA0B,CAAC;AAGlC,MAAM,WAAW,0BAA0B;IACzC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,oBAAqB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IACzE,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,YAAY,qBAAqB;IAE1C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAoEtB;IAEX,QAAQ,CAAC,MAAM;;;;;;;;;;;;;;;;;;;;;;;;OAuBJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,0BAA0B;IAM9C,OAAO,CAAC,iBAAiB;IA8BzB,gBAAgB,CACd,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE;IAYjC,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IA0CI,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IA2CF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;CASjC;AAED,wBAAgB,YAAY,CAAC,MAAM,GAAE,0BAA+B,IAG3C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMhE"}
|
|
@@ -79,19 +79,23 @@ export class OpenAISpeechProvider {
|
|
|
79
79
|
id: "gpt-4o-mini-tts",
|
|
80
80
|
releaseDate: "2025-03-20",
|
|
81
81
|
languages: OpenAISpeechProvider.LANGUAGES,
|
|
82
|
-
features: [
|
|
82
|
+
features: [
|
|
83
|
+
"streaming",
|
|
84
|
+
"audio-tags",
|
|
85
|
+
{ id: "timestamps", mode: "derived" },
|
|
86
|
+
],
|
|
83
87
|
},
|
|
84
88
|
{
|
|
85
89
|
id: "tts-1",
|
|
86
90
|
releaseDate: "2023-11-06",
|
|
87
91
|
languages: OpenAISpeechProvider.LANGUAGES,
|
|
88
|
-
features: ["streaming"],
|
|
92
|
+
features: ["streaming", { id: "timestamps", mode: "derived" }],
|
|
89
93
|
},
|
|
90
94
|
{
|
|
91
95
|
id: "tts-1-hd",
|
|
92
96
|
releaseDate: "2023-11-06",
|
|
93
97
|
languages: OpenAISpeechProvider.LANGUAGES,
|
|
94
|
-
features: ["streaming"],
|
|
98
|
+
features: ["streaming", { id: "timestamps", mode: "derived" }],
|
|
95
99
|
},
|
|
96
100
|
];
|
|
97
101
|
apiKey;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/openai/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,GAGX,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,+BAA+B,EAAE,MAAM,mBAAmB,CAAC;AAQpE,MAAM,OAAO,oBAAoB;IACtB,EAAE,GAAG,QAAQ,CAAC;IACd,YAAY,GAAG,iBAAiB,CAAC;IAElC,MAAM,CAAU,SAAS,GAAG;QAClC,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;KACI,CAAC;IAEF,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,iBAAiB;YACrB,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,oBAAoB,CAAC,SAAS;YACzC,QAAQ,EAAE,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/openai/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,GAGX,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,+BAA+B,EAAE,MAAM,mBAAmB,CAAC;AAQpE,MAAM,OAAO,oBAAoB;IACtB,EAAE,GAAG,QAAQ,CAAC;IACd,YAAY,GAAG,iBAAiB,CAAC;IAElC,MAAM,CAAU,SAAS,GAAG;QAClC,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;KACI,CAAC;IAEF,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,iBAAiB;YACrB,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,oBAAoB,CAAC,SAAS;YACzC,QAAQ,EAAE;gBACR,WAAW;gBACX,YAAY;gBACZ,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE;aACtC;SACF;QACD;YACE,EAAE,EAAE,OAAO;YACX,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,oBAAoB,CAAC,SAAS;YACzC,QAAQ,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;SAC/D;QACD;YACE,EAAE,EAAE,UAAU;YACd,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,oBAAoB,CAAC,SAAS;YACzC,QAAQ,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;SAC/D;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAkC;QAC5C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,2BAA2B,CAAC;QAC7D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAEO,iBAAiB,CACvB,OAAe,EACf,IAAY,EACZ,eAAoD;QAEpD,IAAI,OAAO,KAAK,iBAAiB,EAAE,CAAC;YAClC,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;QAClD,CAAC;QAED,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,EAAE,OAAO,EAAE,GAC5C,+BAA+B,CAAC,IAAI,CAAC,CAAC;QAExC,MAAM,gBAAgB,GAAG,eAAe,EAAE,YAAY,CAAC;QACvD,MAAM,mBAAmB,GACvB,OAAO,gBAAgB,KAAK,QAAQ,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC;YACjE,CAAC,CAAC,gBAAgB;YAClB,CAAC,CAAC,SAAS,CAAC;QAEhB,IAAI,YAAgC,CAAC;QACrC,IAAI,mBAAmB,IAAI,OAAO,EAAE,CAAC;YACnC,YAAY,GAAG,GAAG,mBAAmB,OAAO,OAAO,EAAE,CAAC;QACxD,CAAC;aAAM,IAAI,mBAAmB,EAAE,CAAC;YAC/B,YAAY,GAAG,mBAAmB,CAAC;QACrC,CAAC;aAAM,IAAI,OAAO,EAAE,CAAC;YACnB,YAAY,GAAG,OAAO,CAAC;QACzB,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;IAC1C,CAAC;IAED,gBAAgB,CACd,IAAY,EACZ,OAAe;QAEf,+DAA+D;QAC/D,+DAA+D;QAC/D,kDAAkD;QAClD,IACE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,IAAI,UAAU,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EACxE,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QACD,OAAO,cAAc,CAAC,IAAI,EAAE,UAAU,OAAO,EAAE,CAAC,CAAC;IACnD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAKC,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,iBAAiB,CACpD,OAAO,CAAC,OAAO,EACf,OAAO,CAAC,IAAI,EACZ,OAAO,CAAC,eAAe,CACxB,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,KAAK,EAAE,OAAO,CAAC,OAAO;YACtB,KAAK;YACL,KAAK,EAAE,OAAO,CAAC,KAAK;SACrB,CAAC;QACF,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;YAC/B,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACnC,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,eAAe,CAAC;QAE3C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,EAAE;gBACjF,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,UAAU,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEjE,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,YAAY,CAAC;QAEvE,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS;SACV,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,iBAAiB,CACpD,OAAO,CAAC,OAAO,EACf,OAAO,CAAC,IAAI,EACZ,OAAO,CAAC,eAAe,CACxB,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,KAAK,EAAE,OAAO,CAAC,OAAO;YACtB,KAAK;YACL,KAAK,EAAE,OAAO,CAAC,KAAK;SACrB,CAAC;QACF,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;YAC/B,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACnC,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,eAAe,CAAC;QAE3C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,EAAE;gBACjF,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,UAAU,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEjE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,UAAU,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACrE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,YAAY;SAChE,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,OAAO;gBACL,eAAe,EAAE,EAAE,eAAe,EAAE,KAAK,EAAE;gBAC3C,SAAS,EAAE,sBAAsB;aAClC,CAAC;QACJ,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;;AAGH,MAAM,UAAU,YAAY,CAAC,SAAqC,EAAE;IAClE,MAAM,QAAQ,GAAG,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAElD,OAAO,SAAS,MAAM,CAAC,OAAgB;QACrC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
+
/**
|
|
3
|
+
* Shape of the `audio_timestamps` block Resemble's `/synthesize` returns
|
|
4
|
+
* alongside `audio_content`. Two parallel pairs of arrays:
|
|
5
|
+
*
|
|
6
|
+
* - `graph_chars[i]` is the i-th grapheme (Unicode character) — including
|
|
7
|
+
* spaces and punctuation as standalone entries — and `graph_times[i]` is
|
|
8
|
+
* its `[start, end]` window in **seconds**.
|
|
9
|
+
* - `phon_chars` / `phon_times` mirror that for ARPAbet phonemes (no spaces
|
|
10
|
+
* or punctuation), kept here for typing only — the SDK aggregates from
|
|
11
|
+
* graphemes, which match input characters 1:1.
|
|
12
|
+
*/
|
|
13
|
+
export interface ResembleAudioTimestamps {
|
|
14
|
+
readonly graph_chars: readonly string[];
|
|
15
|
+
readonly graph_times: readonly (readonly number[])[];
|
|
16
|
+
readonly phon_chars?: readonly string[];
|
|
17
|
+
readonly phon_times?: readonly (readonly number[])[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Aggregate Resemble's grapheme-level timing into word-level timestamps.
|
|
21
|
+
*
|
|
22
|
+
* Algorithm: walk `graph_chars` in order. Whitespace flushes the current
|
|
23
|
+
* word and is dropped. Non-whitespace characters (letters AND punctuation)
|
|
24
|
+
* accumulate into a buffer — punctuation stays attached to its adjacent
|
|
25
|
+
* word ("Hello," is one word) to mirror the ElevenLabs aggregator.
|
|
26
|
+
*
|
|
27
|
+
* Each entry in `graph_times` is `[startSeconds, endSeconds]`; the word
|
|
28
|
+
* inherits the first character's start and the last character's end.
|
|
29
|
+
* Entries with malformed timing tuples are skipped to avoid NaN bleed.
|
|
30
|
+
*/
|
|
31
|
+
export declare function audioTimestampsToWordTimestamps(alignment: ResembleAudioTimestamps): WordTimestamp[];
|
|
32
|
+
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;;;;;;GAUG;AACH,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,WAAW,EAAE,SAAS,MAAM,EAAE,CAAC;IACxC,QAAQ,CAAC,WAAW,EAAE,SAAS,CAAC,SAAS,MAAM,EAAE,CAAC,EAAE,CAAC;IACrD,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACxC,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,CAAC,SAAS,MAAM,EAAE,CAAC,EAAE,CAAC;CACtD;AAID;;;;;;;;;;;GAWG;AACH,wBAAgB,+BAA+B,CAC7C,SAAS,EAAE,uBAAuB,GACjC,aAAa,EAAE,CAiDjB"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
const WHITESPACE_CHAR = /^\s$/;
|
|
2
|
+
/**
|
|
3
|
+
* Aggregate Resemble's grapheme-level timing into word-level timestamps.
|
|
4
|
+
*
|
|
5
|
+
* Algorithm: walk `graph_chars` in order. Whitespace flushes the current
|
|
6
|
+
* word and is dropped. Non-whitespace characters (letters AND punctuation)
|
|
7
|
+
* accumulate into a buffer — punctuation stays attached to its adjacent
|
|
8
|
+
* word ("Hello," is one word) to mirror the ElevenLabs aggregator.
|
|
9
|
+
*
|
|
10
|
+
* Each entry in `graph_times` is `[startSeconds, endSeconds]`; the word
|
|
11
|
+
* inherits the first character's start and the last character's end.
|
|
12
|
+
* Entries with malformed timing tuples are skipped to avoid NaN bleed.
|
|
13
|
+
*/
|
|
14
|
+
export function audioTimestampsToWordTimestamps(alignment) {
|
|
15
|
+
const chars = alignment.graph_chars;
|
|
16
|
+
const times = alignment.graph_times;
|
|
17
|
+
if (chars.length === 0) {
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
const words = [];
|
|
21
|
+
let buf = "";
|
|
22
|
+
let wordStart = 0;
|
|
23
|
+
let wordEnd = 0;
|
|
24
|
+
let inWord = false;
|
|
25
|
+
for (let i = 0; i < chars.length; i++) {
|
|
26
|
+
const c = chars[i] ?? "";
|
|
27
|
+
const isWs = WHITESPACE_CHAR.test(c);
|
|
28
|
+
if (isWs) {
|
|
29
|
+
if (inWord) {
|
|
30
|
+
words.push({ text: buf, start: wordStart, end: wordEnd });
|
|
31
|
+
buf = "";
|
|
32
|
+
inWord = false;
|
|
33
|
+
}
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
const t = times[i];
|
|
37
|
+
if (!t || t.length < 2) {
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
const s = t[0];
|
|
41
|
+
const e = t[1];
|
|
42
|
+
if (!(Number.isFinite(s) && Number.isFinite(e))) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
if (!inWord) {
|
|
46
|
+
wordStart = s;
|
|
47
|
+
inWord = true;
|
|
48
|
+
}
|
|
49
|
+
buf += c;
|
|
50
|
+
wordEnd = e;
|
|
51
|
+
}
|
|
52
|
+
if (inWord && buf.length > 0) {
|
|
53
|
+
words.push({ text: buf, start: wordStart, end: wordEnd });
|
|
54
|
+
}
|
|
55
|
+
return words;
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=alignment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/resemble/alignment.ts"],"names":[],"mappings":"AAoBA,MAAM,eAAe,GAAG,MAAM,CAAC;AAE/B;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,+BAA+B,CAC7C,SAAkC;IAElC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IACpC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IACpC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAErC,IAAI,IAAI,EAAE,CAAC;YACT,IAAI,MAAM,EAAE,CAAC;gBACX,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;gBAC1D,GAAG,GAAG,EAAE,CAAC;gBACT,MAAM,GAAG,KAAK,CAAC;YACjB,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QACD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACf,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACf,IAAI,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAChD,SAAS;QACX,CAAC;QAED,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,SAAS,GAAG,CAAC,CAAC;YACd,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,GAAG,IAAI,CAAC,CAAC;QACT,OAAO,GAAG,CAAC,CAAC;IACd,CAAC;IAED,IAAI,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
3
|
export interface ResembleSpeechProviderConfig {
|
|
3
4
|
apiKey?: string;
|
|
4
5
|
baseURL?: string;
|
|
@@ -11,7 +12,10 @@ export declare class ResembleSpeechProvider implements SpeechProvider<string, st
|
|
|
11
12
|
readonly id: "default";
|
|
12
13
|
readonly releaseDate: "2025-09-04";
|
|
13
14
|
readonly languages: readonly ["en", "ar", "da", "de", "el", "es", "fi", "fr", "he", "hi", "it", "ja", "ko", "ms", "nl", "no", "pl", "pt", "ru", "sv", "sw", "tr", "zh"];
|
|
14
|
-
readonly features: readonly ["streaming", "open-source", "inline-voice-cloning"
|
|
15
|
+
readonly features: readonly ["streaming", "open-source", "inline-voice-cloning", {
|
|
16
|
+
readonly id: "timestamps";
|
|
17
|
+
readonly mode: "native";
|
|
18
|
+
}];
|
|
15
19
|
}];
|
|
16
20
|
private readonly apiKey;
|
|
17
21
|
private readonly baseURL;
|
|
@@ -24,10 +28,12 @@ export declare class ResembleSpeechProvider implements SpeechProvider<string, st
|
|
|
24
28
|
providerOptions?: Record<string, unknown>;
|
|
25
29
|
abortSignal?: AbortSignal;
|
|
26
30
|
headers?: Record<string, string>;
|
|
31
|
+
includeTimestamps?: boolean;
|
|
27
32
|
}): Promise<{
|
|
28
33
|
audio: string;
|
|
29
34
|
mediaType: string;
|
|
30
35
|
providerMetadata?: Record<string, unknown>;
|
|
36
|
+
timestamps?: WordTimestamp[];
|
|
31
37
|
}>;
|
|
32
38
|
stream(options: {
|
|
33
39
|
modelId: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC9E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAMzD,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,YAAY,aAAa;IAElC,QAAQ,CAAC,MAAM;;;;;;;;OAoCJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAM1C,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;IA8CI,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IAqCF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;CAYjC;AAED,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAG7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMlE"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
2
|
+
import { audioTimestampsToWordTimestamps, } from "./alignment.js";
|
|
2
3
|
export class ResembleSpeechProvider {
|
|
3
4
|
id = "resemble";
|
|
4
5
|
defaultModel = "default";
|
|
@@ -31,7 +32,12 @@ export class ResembleSpeechProvider {
|
|
|
31
32
|
"tr",
|
|
32
33
|
"zh",
|
|
33
34
|
],
|
|
34
|
-
features: [
|
|
35
|
+
features: [
|
|
36
|
+
"streaming",
|
|
37
|
+
"open-source",
|
|
38
|
+
"inline-voice-cloning",
|
|
39
|
+
{ id: "timestamps", mode: "native" },
|
|
40
|
+
],
|
|
35
41
|
},
|
|
36
42
|
];
|
|
37
43
|
apiKey;
|
|
@@ -61,10 +67,16 @@ export class ResembleSpeechProvider {
|
|
|
61
67
|
signal: options.abortSignal,
|
|
62
68
|
});
|
|
63
69
|
await handleErrorResponse(response, `resemble/${options.modelId}`);
|
|
70
|
+
// Resemble always returns `audio_timestamps`; gate the projection on
|
|
71
|
+
// the caller's opt-in rather than the presence of the field.
|
|
64
72
|
const json = (await response.json());
|
|
73
|
+
const timestamps = options.includeTimestamps && json.audio_timestamps
|
|
74
|
+
? audioTimestampsToWordTimestamps(json.audio_timestamps)
|
|
75
|
+
: undefined;
|
|
65
76
|
return {
|
|
66
77
|
audio: json.audio_content,
|
|
67
78
|
mediaType: "audio/wav",
|
|
79
|
+
timestamps,
|
|
68
80
|
};
|
|
69
81
|
}
|
|
70
82
|
async stream(options) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,+BAA+B,GAEhC,MAAM,gBAAgB,CAAC;AAQxB,MAAM,OAAO,sBAAsB;IACxB,EAAE,GAAG,UAAU,CAAC;IAChB,YAAY,GAAG,SAAS,CAAC;IAEzB,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,SAAS;YACb,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE;gBACT,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;aACL;YACD,QAAQ,EAAE;gBACR,WAAW;gBACX,aAAa;gBACb,sBAAsB;gBACtB,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE;aACrC;SACF;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAoC;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,+BAA+B,CAAC;QACjE,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAQd;QAMC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,aAAa,CAAC;QAEzC,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,OAAO,CAAC,KAAK;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,aAAa,CAC1B,IAAI,CAAC,MAAM,EACX,kBAAkB,EAClB,UAAU,CACX;gBACD,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,qEAAqE;QACrE,6DAA6D;QAC7D,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAGlC,CAAC;QAEF,MAAM,UAAU,GACd,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC,gBAAgB;YAChD,CAAC,CAAC,+BAA+B,CAAC,IAAI,CAAC,gBAAgB,CAAC;YACxD,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,aAAa;YACzB,SAAS,EAAE,WAAW;YACtB,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,SAAS,CAAC;QAErC,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,OAAO,CAAC,KAAK;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,aAAa,CAC1B,IAAI,CAAC,MAAM,EACX,kBAAkB,EAClB,UAAU,CACX;gBACD,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,YAAY,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACvE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW;SAC/D,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,qEAAqE;YACrE,oEAAoE;YACpE,qCAAqC;YACrC,OAAO;gBACL,eAAe,EAAE,EAAE,SAAS,EAAE,QAAQ,EAAE;gBACxC,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;CACF;AAED,MAAM,UAAU,cAAc,CAAC,SAAuC,EAAE;IACtE,MAAM,QAAQ,GAAG,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAEpD,OAAO,SAAS,QAAQ,CAAC,OAAgB;QACvC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolve-provider.d.ts","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"resolve-provider.d.ts","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"AAeA,OAAO,KAAK,EAAE,aAAa,EAAkB,MAAM,sBAAsB,CAAC;AAgD1E,wBAAgB,YAAY,CAC1B,KAAK,EAAE,MAAM,GAAG,aAAa,EAC7B,OAAO,CAAC,EAAE;IAAE,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,GAC5B,aAAa,CAWf"}
|
package/dist/resolve-provider.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { SpeechSDKError } from "./errors.js";
|
|
2
|
+
import { parseProviderModelSpec } from "./provider-utils.js";
|
|
2
3
|
import { CartesiaSpeechProvider } from "./providers/cartesia/index.js";
|
|
3
4
|
import { DeepgramSpeechProvider } from "./providers/deepgram/index.js";
|
|
4
5
|
import { ElevenLabsSpeechProvider } from "./providers/elevenlabs/index.js";
|
|
@@ -55,21 +56,11 @@ export function resolveModel(model, options) {
|
|
|
55
56
|
if (isResolvedModel(model)) {
|
|
56
57
|
return model;
|
|
57
58
|
}
|
|
58
|
-
const
|
|
59
|
-
let providerName;
|
|
60
|
-
let modelId;
|
|
61
|
-
if (slashIndex === -1) {
|
|
62
|
-
providerName = model;
|
|
63
|
-
modelId = undefined;
|
|
64
|
-
}
|
|
65
|
-
else {
|
|
66
|
-
providerName = model.slice(0, slashIndex);
|
|
67
|
-
modelId = model.slice(slashIndex + 1);
|
|
68
|
-
}
|
|
59
|
+
const { providerName, modelId } = parseProviderModelSpec(model);
|
|
69
60
|
const provider = createBuiltinProvider(providerName, options);
|
|
70
61
|
return {
|
|
71
62
|
provider,
|
|
72
|
-
modelId: modelId
|
|
63
|
+
modelId: modelId ?? provider.defaultModel,
|
|
73
64
|
};
|
|
74
65
|
}
|
|
75
66
|
//# sourceMappingURL=resolve-provider.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolve-provider.js","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,wBAAwB,EAAE,MAAM,iCAAiC,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAG7D,SAAS,eAAe,CAAC,KAAc;IACrC,OAAO,CACL,KAAK,IAAI,IAAI;QACb,OAAO,KAAK,KAAK,QAAQ;QACzB,UAAU,IAAI,KAAK;QACnB,SAAS,IAAI,KAAK,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAC5B,IAAY,EACZ,OAA6B;IAE7B,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACjE,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,QAAQ;YACX,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAC1C,KAAK,YAAY;YACf,OAAO,IAAI,wBAAwB,CAAC,MAAM,CAAC,CAAC;QAC9C,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,MAAM;YACT,OAAO,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;QACxC,KAAK,SAAS;YACZ,OAAO,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,QAAQ;YACX,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAC1C,KAAK,YAAY;YACf,OAAO,IAAI,uBAAuB,CAAC,MAAM,CAAC,CAAC;QAC7C,KAAK,MAAM;YACT,OAAO,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;QACxC,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,QAAQ;YACX,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACvC,KAAK,SAAS;YACZ,OAAO,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,KAAK;YACR,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACvC;YACE,MAAM,IAAI,cAAc,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;IAC1D,CAAC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,KAA6B,EAC7B,OAA6B;IAE7B,IAAI,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,
|
|
1
|
+
{"version":3,"file":"resolve-provider.js","sourceRoot":"","sources":["../src/resolve-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,EAAE,sBAAsB,EAAE,MAAM,qBAAqB,CAAC;AAC7D,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,wBAAwB,EAAE,MAAM,iCAAiC,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAG7D,SAAS,eAAe,CAAC,KAAc;IACrC,OAAO,CACL,KAAK,IAAI,IAAI;QACb,OAAO,KAAK,KAAK,QAAQ;QACzB,UAAU,IAAI,KAAK;QACnB,SAAS,IAAI,KAAK,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAC5B,IAAY,EACZ,OAA6B;IAE7B,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACjE,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,QAAQ;YACX,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAC1C,KAAK,YAAY;YACf,OAAO,IAAI,wBAAwB,CAAC,MAAM,CAAC,CAAC;QAC9C,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,MAAM;YACT,OAAO,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;QACxC,KAAK,SAAS;YACZ,OAAO,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,QAAQ;YACX,OAAO,IAAI,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAC1C,KAAK,YAAY;YACf,OAAO,IAAI,uBAAuB,CAAC,MAAM,CAAC,CAAC;QAC7C,KAAK,MAAM;YACT,OAAO,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;QACxC,KAAK,UAAU;YACb,OAAO,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;QAC5C,KAAK,QAAQ;YACX,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACvC,KAAK,SAAS;YACZ,OAAO,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,KAAK;YACR,OAAO,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;QACvC;YACE,MAAM,IAAI,cAAc,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;IAC1D,CAAC;AACH,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,KAA6B,EAC7B,OAA6B;IAE7B,IAAI,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,EAAE,YAAY,EAAE,OAAO,EAAE,GAAG,sBAAsB,CAAC,KAAK,CAAC,CAAC;IAChE,MAAM,QAAQ,GAAG,qBAAqB,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;IAC9D,OAAO;QACL,QAAQ;QACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;KAC1C,CAAC;AACJ,CAAC"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import type { WordTimestamp } from "./timestamps.js";
|
|
1
2
|
export type Voice = string | {
|
|
2
3
|
url: string;
|
|
3
4
|
} | {
|
|
@@ -6,13 +7,31 @@ export type Voice = string | {
|
|
|
6
7
|
/**
|
|
7
8
|
* A capability supported by a model. Today every feature is just an id (a
|
|
8
9
|
* string), meaning "this model has feature X". The union also accepts an
|
|
9
|
-
* object form `{ id, ...params }` so
|
|
10
|
-
*
|
|
11
|
-
*
|
|
10
|
+
* object form `{ id, ...params }` so features that need parameters (e.g.
|
|
11
|
+
* `timestamps` with a `mode`) can extend the type without breaking
|
|
12
|
+
* existing string-based features.
|
|
12
13
|
*/
|
|
13
|
-
export type Feature = string | {
|
|
14
|
+
export type Feature = string | TimestampsFeature | {
|
|
14
15
|
readonly id: string;
|
|
15
16
|
};
|
|
17
|
+
/**
|
|
18
|
+
* Per-model word-timestamp capability.
|
|
19
|
+
*
|
|
20
|
+
* - `"native"`: the TTS endpoint returns word-level alignment directly in
|
|
21
|
+
* its response (e.g., ElevenLabs `/with-timestamps`, Cartesia SSE).
|
|
22
|
+
* - `"derived"`: no native alignment; `timestamps: "on"` pipes the generated
|
|
23
|
+
* audio through an STT round-trip to produce word timings. Extra cost and
|
|
24
|
+
* latency, but works with any provider that has a usable STT API.
|
|
25
|
+
*
|
|
26
|
+
* Providers without any viable path (same-vendor STT missing or word-level
|
|
27
|
+
* unavailable) declare no TIMESTAMPS feature; `timestamps: "on"` routes them
|
|
28
|
+
* through the default `timestampProvider` (OpenAI Whisper) with a clear
|
|
29
|
+
* error when no fallback key is configured.
|
|
30
|
+
*/
|
|
31
|
+
export interface TimestampsFeature {
|
|
32
|
+
readonly id: "timestamps";
|
|
33
|
+
readonly mode: "native" | "derived";
|
|
34
|
+
}
|
|
16
35
|
export interface ModelInfo {
|
|
17
36
|
readonly features: readonly Feature[];
|
|
18
37
|
readonly id: string;
|
|
@@ -25,6 +44,7 @@ export declare const FEATURES: {
|
|
|
25
44
|
readonly AUDIO_TAGS: "audio-tags";
|
|
26
45
|
readonly INLINE_VOICE_CLONING: "inline-voice-cloning";
|
|
27
46
|
readonly OPEN_SOURCE: "open-source";
|
|
47
|
+
readonly TIMESTAMPS: "timestamps";
|
|
28
48
|
};
|
|
29
49
|
export declare function hasFeature(model: ModelInfo, id: string): boolean;
|
|
30
50
|
export declare function getFeature<T extends {
|
|
@@ -44,11 +64,20 @@ export interface SpeechProvider<TModel extends string = string, TVoice extends V
|
|
|
44
64
|
providerOptions?: Record<string, unknown>;
|
|
45
65
|
abortSignal?: AbortSignal;
|
|
46
66
|
headers?: Record<string, string>;
|
|
67
|
+
/**
|
|
68
|
+
* Hint from the orchestrator that the caller wants word timestamps. A
|
|
69
|
+
* provider that supports native alignment should switch to its timestamp
|
|
70
|
+
* endpoint (e.g., ElevenLabs `/with-timestamps`) and populate `timestamps`
|
|
71
|
+
* in the return. Providers without native support ignore this flag; the
|
|
72
|
+
* orchestrator then routes through an STT fallback.
|
|
73
|
+
*/
|
|
74
|
+
includeTimestamps?: boolean;
|
|
47
75
|
}): Promise<{
|
|
48
76
|
audio: string | Uint8Array;
|
|
49
77
|
audioDurationMs?: number;
|
|
50
78
|
mediaType: string;
|
|
51
79
|
providerMetadata?: Record<string, unknown>;
|
|
80
|
+
timestamps?: WordTimestamp[];
|
|
52
81
|
}>;
|
|
53
82
|
generateDialogue?(options: {
|
|
54
83
|
modelId: string;
|
|
@@ -59,11 +88,20 @@ export interface SpeechProvider<TModel extends string = string, TVoice extends V
|
|
|
59
88
|
providerOptions?: Record<string, unknown>;
|
|
60
89
|
abortSignal?: AbortSignal;
|
|
61
90
|
headers?: Record<string, string>;
|
|
91
|
+
/**
|
|
92
|
+
* Hint that the caller wants word timestamps. A dialogue provider with a
|
|
93
|
+
* native timestamp endpoint (e.g., ElevenLabs text-to-dialogue with
|
|
94
|
+
* alignment) should switch to it and populate `timestamps` in the
|
|
95
|
+
* return. Providers without native support ignore the flag; the
|
|
96
|
+
* conversation orchestrator then falls back to STT on the mixed audio.
|
|
97
|
+
*/
|
|
98
|
+
includeTimestamps?: boolean;
|
|
62
99
|
}): Promise<{
|
|
63
100
|
audio: string | Uint8Array;
|
|
64
101
|
audioDurationMs?: number;
|
|
65
102
|
mediaType: string;
|
|
66
103
|
providerMetadata?: Record<string, unknown>;
|
|
104
|
+
timestamps?: WordTimestamp[];
|
|
67
105
|
}>;
|
|
68
106
|
getStitchOptions?(modelId: string): {
|
|
69
107
|
providerOptions: Record<string, unknown>;
|
|
@@ -93,4 +131,10 @@ export interface ResolvedModel<TVoice extends Voice = Voice> {
|
|
|
93
131
|
modelId: string;
|
|
94
132
|
provider: SpeechProvider<string, TVoice>;
|
|
95
133
|
}
|
|
134
|
+
/**
|
|
135
|
+
* Returns true when the resolved model declares `{ id: "timestamps", mode: "native" }`
|
|
136
|
+
* in its features (i.e., its TTS endpoint returns alignment data directly in the
|
|
137
|
+
* response, no STT round-trip needed).
|
|
138
|
+
*/
|
|
139
|
+
export declare function modelDeclaresNativeTimestamps(resolved: ResolvedModel): boolean;
|
|
96
140
|
//# sourceMappingURL=speech-provider.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-provider.d.ts","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,KAAK,EAAE,MAAM,GAAG,UAAU,CAAA;CAAE,CAAC;AAE9E;;;;;;GAMG;AACH,MAAM,MAAM,OAAO,GAAG,MAAM,GAAG;IAAE,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"speech-provider.d.ts","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,KAAK,EAAE,MAAM,GAAG,UAAU,CAAA;CAAE,CAAC;AAE9E;;;;;;GAMG;AACH,MAAM,MAAM,OAAO,GAAG,MAAM,GAAG,iBAAiB,GAAG;IAAE,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC;AAE3E;;;;;;;;;;;;;GAaG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,EAAE,YAAY,CAAC;IAC1B,QAAQ,CAAC,IAAI,EAAE,QAAQ,GAAG,SAAS,CAAC;CACrC;AAED,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,QAAQ,EAAE,SAAS,OAAO,EAAE,CAAC;IACtC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,2EAA2E;AAC3E,eAAO,MAAM,QAAQ;;;;;;CAMX,CAAC;AAEX,wBAAgB,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAOhE;AAED,wBAAgB,UAAU,CAAC,CAAC,SAAS;IAAE,EAAE,EAAE,MAAM,CAAA;CAAE,EACjD,KAAK,EAAE,SAAS,EAChB,EAAE,EAAE,MAAM,GACT,CAAC,GAAG,SAAS,CAOf;AAED,MAAM,WAAW,cAAc,CAC7B,MAAM,SAAS,MAAM,GAAG,MAAM,EAC9B,MAAM,SAAS,KAAK,GAAG,KAAK;IAE5B,YAAY,EAAE,MAAM,CAAC;IAErB,oBAAoB,CAAC,CAAC,OAAO,EAAE,MAAM,GACjC;QACE,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,aAAa,CAAC,EAAE,MAAM,CAAC;KACxB,GACD,SAAS,CAAC;IAEd,QAAQ,CAAC,OAAO,EAAE;QAChB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC;;;;;;WAMG;QACH,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;QAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC,CAAC;IAEH,gBAAgB,CAAC,CAAC,OAAO,EAAE;QACzB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,SAAS;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;QAClD,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC;;;;;;WAMG;QACH,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;QAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC,CAAC;IAEH,gBAAgB,CAAC,CAAC,OAAO,EAAE,MAAM,GAC7B;QACE,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,GACD,SAAS,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,SAAS,SAAS,EAAE,CAAC;IAE7B,gBAAgB,CAAC,CACf,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAExC,MAAM,CAAC,CAAC,OAAO,EAAE;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,aAAa,CAAC,MAAM,SAAS,KAAK,GAAG,KAAK;IACzD,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC1C;AAED;;;;GAIG;AACH,wBAAgB,6BAA6B,CAC3C,QAAQ,EAAE,aAAa,GACtB,OAAO,CAWT"}
|
package/dist/speech-provider.js
CHANGED
|
@@ -4,6 +4,7 @@ export const FEATURES = {
|
|
|
4
4
|
AUDIO_TAGS: "audio-tags",
|
|
5
5
|
INLINE_VOICE_CLONING: "inline-voice-cloning",
|
|
6
6
|
OPEN_SOURCE: "open-source",
|
|
7
|
+
TIMESTAMPS: "timestamps",
|
|
7
8
|
};
|
|
8
9
|
export function hasFeature(model, id) {
|
|
9
10
|
for (const f of model.features) {
|
|
@@ -21,4 +22,19 @@ export function getFeature(model, id) {
|
|
|
21
22
|
}
|
|
22
23
|
return undefined;
|
|
23
24
|
}
|
|
25
|
+
/**
|
|
26
|
+
* Returns true when the resolved model declares `{ id: "timestamps", mode: "native" }`
|
|
27
|
+
* in its features (i.e., its TTS endpoint returns alignment data directly in the
|
|
28
|
+
* response, no STT round-trip needed).
|
|
29
|
+
*/
|
|
30
|
+
export function modelDeclaresNativeTimestamps(resolved) {
|
|
31
|
+
// `.models` is required by the SpeechProvider interface but we use optional
|
|
32
|
+
// chaining so tests/mocks that omit it don't crash here.
|
|
33
|
+
const modelInfo = resolved.provider.models?.find((m) => m.id === resolved.modelId);
|
|
34
|
+
if (!modelInfo) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
const feature = getFeature(modelInfo, "timestamps");
|
|
38
|
+
return feature?.mode === "native";
|
|
39
|
+
}
|
|
24
40
|
//# sourceMappingURL=speech-provider.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-provider.js","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"speech-provider.js","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAuCA,2EAA2E;AAC3E,MAAM,CAAC,MAAM,QAAQ,GAAG;IACtB,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,YAAY;IACxB,oBAAoB,EAAE,sBAAsB;IAC5C,WAAW,EAAE,aAAa;IAC1B,UAAU,EAAE,YAAY;CAChB,CAAC;AAEX,MAAM,UAAU,UAAU,CAAC,KAAgB,EAAE,EAAU;IACrD,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC/B,IAAI,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YACnD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,UAAU,CACxB,KAAgB,EAChB,EAAU;IAEV,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;QAC/B,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YACzC,OAAO,CAAM,CAAC;QAChB,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AA+FD;;;;GAIG;AACH,MAAM,UAAU,6BAA6B,CAC3C,QAAuB;IAEvB,4EAA4E;IAC5E,yDAAyD;IACzD,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAC9C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,OAAO,GAAG,UAAU,CAAoB,SAAS,EAAE,YAAY,CAAC,CAAC;IACvE,OAAO,OAAO,EAAE,IAAI,KAAK,QAAQ,CAAC;AACpC,CAAC"}
|
package/dist/speech-result.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { SpeechMetadata } from "./metadata.js";
|
|
2
|
+
import type { WordTimestamp } from "./timestamps.js";
|
|
2
3
|
export interface GeneratedAudioFile {
|
|
3
4
|
readonly base64: string;
|
|
4
5
|
readonly mediaType: string;
|
|
@@ -8,6 +9,15 @@ export interface SpeechResult {
|
|
|
8
9
|
readonly audio: GeneratedAudioFile;
|
|
9
10
|
readonly metadata: SpeechMetadata;
|
|
10
11
|
readonly providerMetadata?: Record<string, unknown>;
|
|
12
|
+
/**
|
|
13
|
+
* Word-level alignment data. Populated when `timestamps: "on"` or when
|
|
14
|
+
* `timestamps: "auto"` (default) is combined with a TTS provider that
|
|
15
|
+
* returns alignment natively. Undefined otherwise.
|
|
16
|
+
*
|
|
17
|
+
* Timestamps are always word-granularity with start/end in seconds.
|
|
18
|
+
* Character- or phoneme-level native data is aggregated internally.
|
|
19
|
+
*/
|
|
20
|
+
readonly timestamps?: readonly WordTimestamp[];
|
|
11
21
|
readonly warnings?: string[];
|
|
12
22
|
}
|
|
13
23
|
export declare class DefaultGeneratedAudioFile implements GeneratedAudioFile {
|