@speech-sdk/core 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -21
- package/README.md +215 -269
- package/dist/__tests__/e2e/_save-audio.d.ts +51 -2
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +139 -11
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-utils.d.ts +2 -0
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +9 -0
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +137 -0
- package/dist/captions.d.ts.map +1 -0
- package/dist/captions.js +283 -0
- package/dist/captions.js.map +1 -0
- package/dist/conversation/stitch.d.ts +5 -0
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +37 -0
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +16 -0
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +0 -6
- package/dist/conversation/validate.js.map +1 -1
- package/dist/derive-timestamps.d.ts +14 -0
- package/dist/derive-timestamps.d.ts.map +1 -0
- package/dist/derive-timestamps.js +38 -0
- package/dist/derive-timestamps.js.map +1 -0
- package/dist/errors.d.ts +25 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +28 -0
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +2 -1
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +72 -0
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +18 -1
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +73 -16
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +6 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +2 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +40 -0
- package/dist/logger.js.map +1 -0
- package/dist/provider-utils.d.ts +8 -0
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +16 -2
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +24 -0
- package/dist/providers/cartesia/alignment.d.ts.map +1 -0
- package/dist/providers/cartesia/alignment.js +23 -0
- package/dist/providers/cartesia/alignment.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +12 -2
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +137 -2
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +24 -0
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -0
- package/dist/providers/elevenlabs/alignment.js +48 -0
- package/dist/providers/elevenlabs/alignment.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +19 -4
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +83 -13
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +0 -25
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +3 -58
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +38 -0
- package/dist/providers/hume/alignment.d.ts.map +1 -0
- package/dist/providers/hume/alignment.js +31 -0
- package/dist/providers/hume/alignment.js.map +1 -0
- package/dist/providers/hume/index.d.ts +8 -1
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +75 -1
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +25 -0
- package/dist/providers/inworld/alignment.d.ts.map +1 -0
- package/dist/providers/inworld/alignment.js +23 -0
- package/dist/providers/inworld/alignment.js.map +1 -0
- package/dist/providers/inworld/index.d.ts +11 -2
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +11 -2
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +22 -0
- package/dist/providers/murf/alignment.d.ts.map +1 -0
- package/dist/providers/murf/alignment.js +17 -0
- package/dist/providers/murf/alignment.js.map +1 -0
- package/dist/providers/murf/index.d.ts +8 -1
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +10 -1
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +12 -3
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +7 -3
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +32 -0
- package/dist/providers/resemble/alignment.d.ts.map +1 -0
- package/dist/providers/resemble/alignment.js +57 -0
- package/dist/providers/resemble/alignment.js.map +1 -0
- package/dist/providers/resemble/index.d.ts +7 -1
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +13 -1
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +3 -12
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +48 -4
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +16 -0
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +10 -0
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +40 -0
- package/dist/speech-to-text-provider.d.ts.map +1 -0
- package/dist/speech-to-text-provider.js +2 -0
- package/dist/speech-to-text-provider.js.map +1 -0
- package/dist/stt-providers/openai/index.d.ts +42 -0
- package/dist/stt-providers/openai/index.d.ts.map +1 -0
- package/dist/stt-providers/openai/index.js +184 -0
- package/dist/stt-providers/openai/index.js.map +1 -0
- package/dist/timestamps.d.ts +23 -0
- package/dist/timestamps.d.ts.map +1 -0
- package/dist/timestamps.js +2 -0
- package/dist/timestamps.js.map +1 -0
- package/package.json +6 -2
|
@@ -16,12 +16,6 @@ export class FalSpeechProvider {
|
|
|
16
16
|
languages: ["en", "fr", "ko", "ja", "zh"],
|
|
17
17
|
features: ["open-source"],
|
|
18
18
|
},
|
|
19
|
-
{
|
|
20
|
-
id: "dia-tts",
|
|
21
|
-
releaseDate: "2025-04-21",
|
|
22
|
-
languages: ["en"],
|
|
23
|
-
features: ["open-source", "inline-voice-cloning"],
|
|
24
|
-
},
|
|
25
19
|
{
|
|
26
20
|
id: "orpheus-tts",
|
|
27
21
|
releaseDate: "2025-03-18",
|
|
@@ -93,9 +87,9 @@ export class FalSpeechProvider {
|
|
|
93
87
|
return Promise.reject(new StreamingNotSupportedError(`fal-ai/${options.modelId}`));
|
|
94
88
|
}
|
|
95
89
|
getStitchOptions(modelId) {
|
|
96
|
-
// All currently-listed fal models (
|
|
97
|
-
//
|
|
98
|
-
//
|
|
90
|
+
// All currently-listed fal models (orpheus-tts, f5-tts, kokoro) return
|
|
91
|
+
// WAV (16-bit mono PCM in a RIFF container) at fal's CDN URL. Pass
|
|
92
|
+
// through providerOptions empty — fal exposes no format selector.
|
|
99
93
|
if (this.models.some((m) => m.id === modelId)) {
|
|
100
94
|
return {
|
|
101
95
|
providerOptions: {},
|
|
@@ -104,55 +98,6 @@ export class FalSpeechProvider {
|
|
|
104
98
|
}
|
|
105
99
|
return undefined;
|
|
106
100
|
}
|
|
107
|
-
dialogueCapabilities(modelId) {
|
|
108
|
-
if (modelId === "dia-tts") {
|
|
109
|
-
return { minVoices: 1, maxVoices: 2 };
|
|
110
|
-
}
|
|
111
|
-
return undefined;
|
|
112
|
-
}
|
|
113
|
-
async generateDialogue(options) {
|
|
114
|
-
if (options.modelId !== "dia-tts") {
|
|
115
|
-
throw new Error(`fal-ai/${options.modelId} does not support native dialogue; use dia-tts.`);
|
|
116
|
-
}
|
|
117
|
-
const voiceKeyOf = (v) => typeof v === "string" ? `s:${v}` : `u:${v.url}`;
|
|
118
|
-
const voiceToTag = new Map();
|
|
119
|
-
const tagged = [];
|
|
120
|
-
for (const t of options.turns) {
|
|
121
|
-
const k = voiceKeyOf(t.voice);
|
|
122
|
-
let tag = voiceToTag.get(k);
|
|
123
|
-
if (!tag) {
|
|
124
|
-
tag = `[S${voiceToTag.size + 1}]`;
|
|
125
|
-
voiceToTag.set(k, tag);
|
|
126
|
-
}
|
|
127
|
-
tagged.push(`${tag} ${t.text}`);
|
|
128
|
-
}
|
|
129
|
-
const text = tagged.join(" ");
|
|
130
|
-
const body = {
|
|
131
|
-
...options.providerOptions,
|
|
132
|
-
text,
|
|
133
|
-
};
|
|
134
|
-
// If any turn carries a URL voice reference, forward the first one as
|
|
135
|
-
// audio_url (Dia supports one reference clip per request).
|
|
136
|
-
const firstUrlVoice = options.turns.find((t) => typeof t.voice !== "string" && "url" in t.voice)?.voice;
|
|
137
|
-
if (firstUrlVoice) {
|
|
138
|
-
body.audio_url = firstUrlVoice.url;
|
|
139
|
-
}
|
|
140
|
-
const url = `${this.baseURL}/fal-ai/${options.modelId}`;
|
|
141
|
-
const response = await this.fetchFn(url, {
|
|
142
|
-
method: "POST",
|
|
143
|
-
headers: {
|
|
144
|
-
"Content-Type": "application/json",
|
|
145
|
-
Authorization: `Key ${resolveApiKey(this.apiKey, "FAL_API_KEY", "fal")}`,
|
|
146
|
-
"X-User-Agent": SDK_USER_AGENT,
|
|
147
|
-
...options.headers,
|
|
148
|
-
},
|
|
149
|
-
body: JSON.stringify(body),
|
|
150
|
-
signal: options.abortSignal,
|
|
151
|
-
});
|
|
152
|
-
await handleErrorResponse(response, `fal-ai/${options.modelId}`);
|
|
153
|
-
const json = (await response.json());
|
|
154
|
-
return await this.fetchAudio(json, options);
|
|
155
|
-
}
|
|
156
101
|
}
|
|
157
102
|
export function createFal(config = {}) {
|
|
158
103
|
const provider = new FalSpeechProvider(config);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/fal/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,0BAA0B,EAAE,MAAM,iBAAiB,CAAC;AACvE,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AASjC,MAAM,OAAO,iBAAiB;IAGnB,EAAE,GAAG,QAAQ,CAAC;IACd,YAAY,GAAG,EAAE,CAAC;IAElB,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,QAAQ;YACZ,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACjE,QAAQ,EAAE,CAAC,aAAa,EAAE,sBAAsB,CAAC;SAClD;QACD;YACE,EAAE,EAAE,QAAQ;YACZ,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACzC,QAAQ,EAAE,CAAC,aAAa,CAAC;SAC1B;QACD;YACE,EAAE,EAAE,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/fal/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,0BAA0B,EAAE,MAAM,iBAAiB,CAAC;AACvE,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AASjC,MAAM,OAAO,iBAAiB;IAGnB,EAAE,GAAG,QAAQ,CAAC;IACd,YAAY,GAAG,EAAE,CAAC;IAElB,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,QAAQ;YACZ,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACjE,QAAQ,EAAE,CAAC,aAAa,EAAE,sBAAsB,CAAC;SAClD;QACD;YACE,EAAE,EAAE,QAAQ;YACZ,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACzC,QAAQ,EAAE,CAAC,aAAa,CAAC;SAC1B;QACD;YACE,EAAE,EAAE,aAAa;YACjB,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;YACrD,QAAQ,EAAE,CAAC,aAAa,CAAC;SAC1B;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAA+B;QACzC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,iBAAiB,CAAC;QACnD,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAKC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YACrB,MAAM,IAAI,KAAK,CACb,yFAAyF,CAC1F,CAAC;QACJ,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,OAAO,CAAC,OAAO,EAAE,CAAC;QAExD,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,IAAI,OAAO,CAAC,KAAK,IAAI,IAAI,EAAE,CAAC;YAC1B,IAAI,OAAO,OAAO,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;gBACtC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;YAC7B,CAAC;iBAAM,IAAI,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;gBAClC,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC;YACrC,CAAC;QACH,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,OAAO,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,aAAa,EAAE,KAAK,CAAC,EAAE;gBACxE,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,UAAU,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEjE,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAElC,CAAC;QACF,OAAO,MAAM,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC9C,CAAC;IAEO,KAAK,CAAC,UAAU,CACtB,IAAuD,EACvD,OAAuD;QAEvD,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE;YACvD,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,IAAI,CAAC,aAAa,CAAC,EAAE,EAAE,CAAC;YACtB,MAAM,IAAI,QAAQ,CAAC,cAAc,aAAa,CAAC,MAAM,EAAE,EAAE;gBACvD,UAAU,EAAE,aAAa,CAAC,MAAM;gBAChC,KAAK,EAAE,UAAU,OAAO,CAAC,OAAO,EAAE;gBAClC,YAAY,EAAE,MAAM,aAAa,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC;aAChE,CAAC,CAAC;QACL,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,aAAa,CAAC,WAAW,EAAE,CAAC;QACtD,oEAAoE;QACpE,sEAAsE;QACtE,uCAAuC;QACvC,MAAM,SAAS,GACb,IAAI,CAAC,KAAK,CAAC,YAAY;YACvB,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;YACzC,WAAW,CAAC;QAEd,OAAO,EAAE,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,SAAS,EAAE,CAAC;IAC3D,CAAC;IAED,MAAM,CAAC,OAA4B;QACjC,OAAO,OAAO,CAAC,MAAM,CACnB,IAAI,0BAA0B,CAAC,UAAU,OAAO,CAAC,OAAO,EAAE,CAAC,CAC5D,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,uEAAuE;QACvE,mEAAmE;QACnE,kEAAkE;QAClE,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,OAAO;gBACL,eAAe,EAAE,EAAE;gBACnB,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;CACF;AAED,MAAM,UAAU,SAAS,CAAC,SAAkC,EAAE;IAC5D,MAAM,QAAQ,GAAG,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAE/C,OAAO,SAAS,GAAG,CACjB,OAAgB;QAEhB,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
+
/**
|
|
3
|
+
* Shape of one timestamp entry inside a Hume Octave-2 `snippets[][].timestamps`
|
|
4
|
+
* array. `time.begin` and `time.end` are integer milliseconds from the start
|
|
5
|
+
* of that snippet's audio. Hume emits both `"word"` and `"phoneme"` entries
|
|
6
|
+
* when both are requested via `include_timestamp_types`.
|
|
7
|
+
*/
|
|
8
|
+
export interface HumeTimestamp {
|
|
9
|
+
readonly text: string;
|
|
10
|
+
readonly time: {
|
|
11
|
+
readonly begin: number;
|
|
12
|
+
readonly end: number;
|
|
13
|
+
};
|
|
14
|
+
readonly type: "word" | "phoneme";
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Shape of a single Hume Octave snippet (one segment of one utterance). When
|
|
18
|
+
* the SDK asks for timestamps it sets `split_utterances: false`, so each
|
|
19
|
+
* utterance produces exactly one snippet whose audio matches the top-level
|
|
20
|
+
* `generations[0].audio` byte-for-byte — meaning the timestamps inside are
|
|
21
|
+
* already relative to the full returned audio.
|
|
22
|
+
*/
|
|
23
|
+
export interface HumeSnippet {
|
|
24
|
+
readonly audio?: string;
|
|
25
|
+
readonly id?: string;
|
|
26
|
+
readonly text?: string;
|
|
27
|
+
readonly timestamps?: readonly HumeTimestamp[];
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Flatten the nested `snippets[utterance][segment].timestamps` arrays from a
|
|
31
|
+
* Hume `/v0/tts` response into a single word-level alignment array, filtering
|
|
32
|
+
* to `type: "word"` entries and converting milliseconds to seconds.
|
|
33
|
+
*
|
|
34
|
+
* Assumes the caller set `split_utterances: false` (and a single utterance),
|
|
35
|
+
* so segment-relative offsets don't need to be re-based against the full audio.
|
|
36
|
+
*/
|
|
37
|
+
export declare function snippetsToWordTimestamps(snippets: readonly (readonly HumeSnippet[])[]): WordTimestamp[];
|
|
38
|
+
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;GAKG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,IAAI,EAAE;QAAE,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC;IAChE,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC;CACnC;AAED;;;;;;GAMG;AACH,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;CAChD;AAED;;;;;;;GAOG;AACH,wBAAgB,wBAAwB,CACtC,QAAQ,EAAE,SAAS,CAAC,SAAS,WAAW,EAAE,CAAC,EAAE,GAC5C,aAAa,EAAE,CAqBjB"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Flatten the nested `snippets[utterance][segment].timestamps` arrays from a
|
|
3
|
+
* Hume `/v0/tts` response into a single word-level alignment array, filtering
|
|
4
|
+
* to `type: "word"` entries and converting milliseconds to seconds.
|
|
5
|
+
*
|
|
6
|
+
* Assumes the caller set `split_utterances: false` (and a single utterance),
|
|
7
|
+
* so segment-relative offsets don't need to be re-based against the full audio.
|
|
8
|
+
*/
|
|
9
|
+
export function snippetsToWordTimestamps(snippets) {
|
|
10
|
+
const out = [];
|
|
11
|
+
for (const utterance of snippets) {
|
|
12
|
+
for (const segment of utterance) {
|
|
13
|
+
const ts = segment.timestamps;
|
|
14
|
+
if (!ts) {
|
|
15
|
+
continue;
|
|
16
|
+
}
|
|
17
|
+
for (const entry of ts) {
|
|
18
|
+
if (entry.type !== "word") {
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
out.push({
|
|
22
|
+
text: entry.text,
|
|
23
|
+
start: entry.time.begin / 1000,
|
|
24
|
+
end: entry.time.end / 1000,
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return out;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=alignment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/hume/alignment.ts"],"names":[],"mappings":"AA4BA;;;;;;;GAOG;AACH,MAAM,UAAU,wBAAwB,CACtC,QAA6C;IAE7C,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,SAAS,IAAI,QAAQ,EAAE,CAAC;QACjC,KAAK,MAAM,OAAO,IAAI,SAAS,EAAE,CAAC;YAChC,MAAM,EAAE,GAAG,OAAO,CAAC,UAAU,CAAC;YAC9B,IAAI,CAAC,EAAE,EAAE,CAAC;gBACR,SAAS;YACX,CAAC;YACD,KAAK,MAAM,KAAK,IAAI,EAAE,EAAE,CAAC;gBACvB,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;oBAC1B,SAAS;gBACX,CAAC;gBACD,GAAG,CAAC,IAAI,CAAC;oBACP,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI;oBAC9B,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,IAAI;iBAC3B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
3
|
export interface HumeSpeechProviderConfig {
|
|
3
4
|
apiKey?: string;
|
|
4
5
|
baseURL?: string;
|
|
@@ -11,7 +12,10 @@ export declare class HumeSpeechProvider implements SpeechProvider<string, string
|
|
|
11
12
|
readonly id: "octave-2";
|
|
12
13
|
readonly releaseDate: "2025-10-01";
|
|
13
14
|
readonly languages: readonly ["en", "fr", "de", "es", "pt", "ja", "ko", "hi", "it", "ar", "ru"];
|
|
14
|
-
readonly features: readonly ["streaming", "inline-voice-cloning"
|
|
15
|
+
readonly features: readonly ["streaming", "inline-voice-cloning", {
|
|
16
|
+
readonly id: "timestamps";
|
|
17
|
+
readonly mode: "native";
|
|
18
|
+
}];
|
|
15
19
|
}, {
|
|
16
20
|
readonly id: "octave-1";
|
|
17
21
|
readonly releaseDate: "2025-03-01";
|
|
@@ -30,11 +34,14 @@ export declare class HumeSpeechProvider implements SpeechProvider<string, string
|
|
|
30
34
|
providerOptions?: Record<string, unknown>;
|
|
31
35
|
abortSignal?: AbortSignal;
|
|
32
36
|
headers?: Record<string, string>;
|
|
37
|
+
includeTimestamps?: boolean;
|
|
33
38
|
}): Promise<{
|
|
34
39
|
audio: Uint8Array;
|
|
35
40
|
mediaType: string;
|
|
36
41
|
providerMetadata?: Record<string, unknown>;
|
|
42
|
+
timestamps?: WordTimestamp[];
|
|
37
43
|
}>;
|
|
44
|
+
private generateWithTimestamps;
|
|
38
45
|
stream(options: {
|
|
39
46
|
modelId: string;
|
|
40
47
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/hume/index.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC9E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGzD,MAAM,WAAW,wBAAwB;IACvC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,kBAAmB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IACvE,QAAQ,CAAC,EAAE,UAAU;IACrB,QAAQ,CAAC,YAAY,cAAc;IAEnC,QAAQ,CAAC,MAAM;;;;;;;;;;;;;OA6BJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,wBAAwB;IAM5C,OAAO,CAAC,cAAc;IAUhB,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;YAkDY,sBAAsB;IAoE9B,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IA0CF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;;;IAgBhC,oBAAoB,CAAC,OAAO,EAAE,MAAM;;;;IAS9B,gBAAgB,CAAC,OAAO,EAAE;QAC9B,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,SAAS;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;QAClD,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAoCH;AAED,wBAAgB,UAAU,CAAC,MAAM,GAAE,wBAA6B,IAGzC,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAM9D"}
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import { base64ToUint8Array } from "../../audio-utils.js";
|
|
2
|
+
import { SpeechSDKError } from "../../errors.js";
|
|
1
3
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
4
|
+
import { snippetsToWordTimestamps } from "./alignment.js";
|
|
2
5
|
export class HumeSpeechProvider {
|
|
3
6
|
id = "hume";
|
|
4
7
|
defaultModel = "octave-2";
|
|
@@ -19,7 +22,11 @@ export class HumeSpeechProvider {
|
|
|
19
22
|
"ar",
|
|
20
23
|
"ru",
|
|
21
24
|
],
|
|
22
|
-
features: [
|
|
25
|
+
features: [
|
|
26
|
+
"streaming",
|
|
27
|
+
"inline-voice-cloning",
|
|
28
|
+
{ id: "timestamps", mode: "native" },
|
|
29
|
+
],
|
|
23
30
|
},
|
|
24
31
|
{
|
|
25
32
|
id: "octave-1",
|
|
@@ -58,6 +65,13 @@ export class HumeSpeechProvider {
|
|
|
58
65
|
if (version != null) {
|
|
59
66
|
body.version = version;
|
|
60
67
|
}
|
|
68
|
+
// Native timestamps are only documented for Octave-2 (`version: "2"`).
|
|
69
|
+
// Hume returns alignment from the JSON `/v0/tts` endpoint — `/v0/tts/file`
|
|
70
|
+
// is bytes-only — so we route through it whenever the caller asks for
|
|
71
|
+
// word timing on a model that supports it.
|
|
72
|
+
if (options.includeTimestamps && version === "2") {
|
|
73
|
+
return this.generateWithTimestamps(options, body);
|
|
74
|
+
}
|
|
61
75
|
const url = `${this.baseURL}/tts/file`;
|
|
62
76
|
const response = await this.fetchFn(url, {
|
|
63
77
|
method: "POST",
|
|
@@ -78,6 +92,48 @@ export class HumeSpeechProvider {
|
|
|
78
92
|
mediaType,
|
|
79
93
|
};
|
|
80
94
|
}
|
|
95
|
+
async generateWithTimestamps(options, baseBody) {
|
|
96
|
+
// `split_utterances: false` keeps the response to a single snippet per
|
|
97
|
+
// utterance — its audio matches the top-level `generations[0].audio`
|
|
98
|
+
// byte-for-byte, so segment-relative timestamps line up with the audio
|
|
99
|
+
// we return. `include_timestamp_types: ["word"]` opts into word-level
|
|
100
|
+
// alignment (Hume defaults to none).
|
|
101
|
+
const body = {
|
|
102
|
+
...baseBody,
|
|
103
|
+
include_timestamp_types: ["word"],
|
|
104
|
+
split_utterances: false,
|
|
105
|
+
};
|
|
106
|
+
const url = `${this.baseURL}/tts`;
|
|
107
|
+
const response = await this.fetchFn(url, {
|
|
108
|
+
method: "POST",
|
|
109
|
+
headers: {
|
|
110
|
+
"Content-Type": "application/json",
|
|
111
|
+
"X-Hume-Api-Key": resolveApiKey(this.apiKey, "HUME_API_KEY", "Hume"),
|
|
112
|
+
"X-User-Agent": SDK_USER_AGENT,
|
|
113
|
+
...options.headers,
|
|
114
|
+
},
|
|
115
|
+
body: JSON.stringify(body),
|
|
116
|
+
signal: options.abortSignal,
|
|
117
|
+
});
|
|
118
|
+
await handleErrorResponse(response, `hume/${options.modelId}`);
|
|
119
|
+
const payload = (await response.json());
|
|
120
|
+
const gen = payload.generations?.[0];
|
|
121
|
+
if (!gen?.audio) {
|
|
122
|
+
throw new SpeechSDKError(`hume/${options.modelId}: /v0/tts response missing generations[0].audio`);
|
|
123
|
+
}
|
|
124
|
+
const audio = base64ToUint8Array(gen.audio);
|
|
125
|
+
const timestamps = gen.snippets
|
|
126
|
+
? snippetsToWordTimestamps(gen.snippets)
|
|
127
|
+
: undefined;
|
|
128
|
+
// /v0/tts delivers audio as base64 in a JSON body, so there's no
|
|
129
|
+
// Content-Type for the bytes — derive it from the requested format.
|
|
130
|
+
const format = (baseBody.format ?? {});
|
|
131
|
+
return {
|
|
132
|
+
audio,
|
|
133
|
+
mediaType: humeFormatToMediaType(format.type),
|
|
134
|
+
timestamps,
|
|
135
|
+
};
|
|
136
|
+
}
|
|
81
137
|
async stream(options) {
|
|
82
138
|
const utterance = { text: options.text };
|
|
83
139
|
if (options.voice) {
|
|
@@ -177,4 +233,22 @@ export function createHume(config = {}) {
|
|
|
177
233
|
};
|
|
178
234
|
};
|
|
179
235
|
}
|
|
236
|
+
/**
|
|
237
|
+
* Map a Hume `format.type` value to a standard media type. Used when decoding
|
|
238
|
+
* base64 audio from `/v0/tts`, which delivers bytes inside a JSON body with
|
|
239
|
+
* no Content-Type hint for the audio itself. PCM is always 48 kHz mono s16
|
|
240
|
+
* (Hume's only documented PCM mode).
|
|
241
|
+
*/
|
|
242
|
+
function humeFormatToMediaType(formatType) {
|
|
243
|
+
if (!formatType) {
|
|
244
|
+
return "audio/mpeg";
|
|
245
|
+
}
|
|
246
|
+
if (formatType === "wav") {
|
|
247
|
+
return "audio/wav";
|
|
248
|
+
}
|
|
249
|
+
if (formatType === "pcm") {
|
|
250
|
+
return "audio/pcm;rate=48000";
|
|
251
|
+
}
|
|
252
|
+
return "audio/mpeg";
|
|
253
|
+
}
|
|
180
254
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/hume/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/hume/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EAAoB,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAQ5E,MAAM,OAAO,kBAAkB;IACpB,EAAE,GAAG,MAAM,CAAC;IACZ,YAAY,GAAG,UAAU,CAAC;IAE1B,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,UAAU;YACd,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE;gBACT,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;aACI;YACV,QAAQ,EAAE;gBACR,WAAW;gBACX,sBAAsB;gBACtB,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE;aACrC;SACF;QACD;YACE,EAAE,EAAE,UAAU;YACd,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,CAAC,IAAI,CAAU;YAC1B,QAAQ,EAAE,CAAC,WAAW,CAAC;SACxB;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAgC;QAC1C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,wBAAwB,CAAC;QAC1D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAEO,cAAc,CAAC,OAAe;QACpC,IAAI,OAAO,KAAK,UAAU,EAAE,CAAC;YAC3B,OAAO,GAAG,CAAC;QACb,CAAC;QACD,IAAI,OAAO,KAAK,UAAU,EAAE,CAAC;YAC3B,OAAO,GAAG,CAAC;QACb,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAQd;QAMC,MAAM,SAAS,GAA4B,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;QAClE,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,SAAS,CAAC,KAAK,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;QACjE,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAErD,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,CAAC,SAAS,CAAC;SACxB,CAAC;QAEF,IAAI,OAAO,IAAI,IAAI,EAAE,CAAC;YACpB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACzB,CAAC;QAED,uEAAuE;QACvE,2EAA2E;QAC3E,sEAAsE;QACtE,2CAA2C;QAC3C,IAAI,OAAO,CAAC,iBAAiB,IAAI,OAAO,KAAK,GAAG,EAAE,CAAC;YACjD,OAAO,IAAI,CAAC,sBAAsB,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QACpD,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,CAAC;QAEvC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,gBAAgB,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,CAAC;gBACpE,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAE/D,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,YAAY,CAAC;QAEvE,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS;SACV,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,sBAAsB,CAClC,OAKC,EACD,QAAiC;QAOjC,uEAAuE;QACvE,qEAAqE;QACrE,uEAAuE;QACvE,sEAAsE;QACtE,qCAAqC;QACrC,MAAM,IAAI,GAA4B;YACpC,GAAG,QAAQ;YACX,uBAAuB,EAAE,CAAC,MAAM,CAAC;YACjC,gBAAgB,EAAE,KAAK;SACxB,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,MAAM,CAAC;QAClC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,gBAAgB,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,CAAC;gBACpE,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAE/D,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAKrC,CAAC;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACrC,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,cAAc,CACtB,QAAQ,OAAO,CAAC,OAAO,iDAAiD,CACzE,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,UAAU,GAAG,GAAG,CAAC,QAAQ;YAC7B,CAAC,CAAC,wBAAwB,CAAC,GAAG,CAAC,QAAQ,CAAC;YACxC,CAAC,CAAC,SAAS,CAAC;QAEd,iEAAiE;QACjE,oEAAoE;QACpE,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,MAAM,IAAI,EAAE,CAAsB,CAAC;QAC5D,OAAO;YACL,KAAK;YACL,SAAS,EAAE,qBAAqB,CAAC,MAAM,CAAC,IAAI,CAAC;YAC7C,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,SAAS,GAA4B,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC;QAClE,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,SAAS,CAAC,KAAK,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;QACjE,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAErD,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,CAAC,SAAS,CAAC;SACxB,CAAC;QACF,IAAI,OAAO,IAAI,IAAI,EAAE,CAAC;YACpB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACzB,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,kBAAkB,CAAC;QAE9C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,gBAAgB,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,CAAC;gBACpE,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAE/D,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,QAAQ,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACnE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,YAAY;SAChE,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,mEAAmE;YACnE,iEAAiE;YACjE,mEAAmE;YACnE,8DAA8D;YAC9D,kEAAkE;YAClE,sBAAsB;YACtB,OAAO;gBACL,eAAe,EAAE,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE;gBAC5C,SAAS,EAAE,sBAAsB;aAClC,CAAC;QACJ,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,oBAAoB,CAAC,OAAe;QAClC,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,oEAAoE;YACpE,0CAA0C;YAC1C,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;QACxC,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,OAMtB;QAKC,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC3C,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE;SAC9C,CAAC,CAAC,CAAC;QAEJ,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU;SACX,CAAC;QACF,IAAI,OAAO,IAAI,IAAI,EAAE,CAAC;YACpB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACzB,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,WAAW,CAAC;QACvC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,gBAAgB,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,MAAM,CAAC;gBACpE,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAE/D,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,YAAY;SAChE,CAAC;IACJ,CAAC;CACF;AAED,MAAM,UAAU,UAAU,CAAC,SAAmC,EAAE;IAC9D,MAAM,QAAQ,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAEhD,OAAO,SAAS,IAAI,CAAC,OAAgB;QACnC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,SAAS,qBAAqB,CAAC,UAA8B;IAC3D,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,YAAY,CAAC;IACtB,CAAC;IACD,IAAI,UAAU,KAAK,KAAK,EAAE,CAAC;QACzB,OAAO,WAAW,CAAC;IACrB,CAAC;IACD,IAAI,UAAU,KAAK,KAAK,EAAE,CAAC;QACzB,OAAO,sBAAsB,CAAC;IAChC,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
+
/**
|
|
3
|
+
* Shape of the `timestampInfo.wordAlignment` block Inworld returns when
|
|
4
|
+
* `timestamp_type: "WORD"` is set on a synthesis request. The three primary
|
|
5
|
+
* arrays are parallel — index N is the Nth word's text, start, and end.
|
|
6
|
+
*
|
|
7
|
+
* Times are already in **seconds** (not milliseconds) so no unit conversion
|
|
8
|
+
* is needed when projecting into the SDK's `WordTimestamp[]`.
|
|
9
|
+
*
|
|
10
|
+
* `phoneticDetails` is only emitted by the 1.5 family and is unused by the
|
|
11
|
+
* SDK today — kept here for typing only.
|
|
12
|
+
*/
|
|
13
|
+
export interface InworldWordAlignment {
|
|
14
|
+
readonly phoneticDetails?: readonly unknown[];
|
|
15
|
+
readonly wordEndTimeSeconds: readonly number[];
|
|
16
|
+
readonly wordStartTimeSeconds: readonly number[];
|
|
17
|
+
readonly words: readonly string[];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Project Inworld's parallel word alignment arrays into the SDK's
|
|
21
|
+
* `WordTimestamp[]`. Skips entries past the shortest array length so a
|
|
22
|
+
* malformed response can't produce undefined start/end values.
|
|
23
|
+
*/
|
|
24
|
+
export declare function wordAlignmentToWordTimestamps(alignment: InworldWordAlignment): WordTimestamp[];
|
|
25
|
+
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/inworld/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;;;;;;GAUG;AACH,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,eAAe,CAAC,EAAE,SAAS,OAAO,EAAE,CAAC;IAC9C,QAAQ,CAAC,kBAAkB,EAAE,SAAS,MAAM,EAAE,CAAC;IAC/C,QAAQ,CAAC,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;IACjD,QAAQ,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,CAAC;CACnC;AAED;;;;GAIG;AACH,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,oBAAoB,GAC9B,aAAa,EAAE,CAoBjB"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Project Inworld's parallel word alignment arrays into the SDK's
|
|
3
|
+
* `WordTimestamp[]`. Skips entries past the shortest array length so a
|
|
4
|
+
* malformed response can't produce undefined start/end values.
|
|
5
|
+
*/
|
|
6
|
+
export function wordAlignmentToWordTimestamps(alignment) {
|
|
7
|
+
const len = Math.min(alignment.words.length, alignment.wordStartTimeSeconds.length, alignment.wordEndTimeSeconds.length);
|
|
8
|
+
const out = [];
|
|
9
|
+
for (let i = 0; i < len; i++) {
|
|
10
|
+
const text = alignment.words[i];
|
|
11
|
+
const start = alignment.wordStartTimeSeconds[i];
|
|
12
|
+
const end = alignment.wordEndTimeSeconds[i];
|
|
13
|
+
// Inworld occasionally emits empty-string entries for interstitial
|
|
14
|
+
// silence or punctuation — drop them so the SDK's word list only
|
|
15
|
+
// contains real words.
|
|
16
|
+
if (text == null || start == null || end == null || text.length === 0) {
|
|
17
|
+
continue;
|
|
18
|
+
}
|
|
19
|
+
out.push({ text, start, end });
|
|
20
|
+
}
|
|
21
|
+
return out;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=alignment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/inworld/alignment.ts"],"names":[],"mappings":"AAoBA;;;;GAIG;AACH,MAAM,UAAU,6BAA6B,CAC3C,SAA+B;IAE/B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAClB,SAAS,CAAC,KAAK,CAAC,MAAM,EACtB,SAAS,CAAC,oBAAoB,CAAC,MAAM,EACrC,SAAS,CAAC,kBAAkB,CAAC,MAAM,CACpC,CAAC;IACF,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,KAAK,GAAG,SAAS,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC;QAChD,MAAM,GAAG,GAAG,SAAS,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC;QAC5C,mEAAmE;QACnE,iEAAiE;QACjE,uBAAuB;QACvB,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,GAAG,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtE,SAAS;QACX,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;IACjC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
3
|
export interface InworldSpeechProviderConfig {
|
|
3
4
|
apiKey?: string;
|
|
4
5
|
baseURL?: string;
|
|
@@ -12,12 +13,18 @@ export declare class InworldSpeechProvider implements SpeechProvider<string, str
|
|
|
12
13
|
readonly id: "inworld-tts-1.5-max";
|
|
13
14
|
readonly releaseDate: "2025-08-15";
|
|
14
15
|
readonly languages: readonly ["en", "es", "fr", "de", "it", "pt", "ja", "ko", "nl", "pl", "zh"];
|
|
15
|
-
readonly features: readonly ["streaming"
|
|
16
|
+
readonly features: readonly ["streaming", {
|
|
17
|
+
readonly id: "timestamps";
|
|
18
|
+
readonly mode: "native";
|
|
19
|
+
}];
|
|
16
20
|
}, {
|
|
17
21
|
readonly id: "inworld-tts-1.5-mini";
|
|
18
22
|
readonly releaseDate: "2025-08-15";
|
|
19
23
|
readonly languages: readonly ["en", "es", "fr", "de", "it", "pt", "ja", "ko", "nl", "pl", "zh"];
|
|
20
|
-
readonly features: readonly ["streaming"
|
|
24
|
+
readonly features: readonly ["streaming", {
|
|
25
|
+
readonly id: "timestamps";
|
|
26
|
+
readonly mode: "native";
|
|
27
|
+
}];
|
|
21
28
|
}];
|
|
22
29
|
private readonly apiKey;
|
|
23
30
|
private readonly baseURL;
|
|
@@ -31,10 +38,12 @@ export declare class InworldSpeechProvider implements SpeechProvider<string, str
|
|
|
31
38
|
providerOptions?: Record<string, unknown>;
|
|
32
39
|
abortSignal?: AbortSignal;
|
|
33
40
|
headers?: Record<string, string>;
|
|
41
|
+
includeTimestamps?: boolean;
|
|
34
42
|
}): Promise<{
|
|
35
43
|
audio: string;
|
|
36
44
|
mediaType: string;
|
|
37
45
|
providerMetadata?: Record<string, unknown>;
|
|
46
|
+
timestamps?: WordTimestamp[];
|
|
38
47
|
}>;
|
|
39
48
|
stream(options: {
|
|
40
49
|
modelId: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/inworld/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/inworld/index.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC9E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAMzD,MAAM,WAAW,2BAA2B;IAC1C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AA0BD,qBAAa,qBAAsB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC1E,QAAQ,CAAC,EAAE,aAAa;IACxB,QAAQ,CAAC,YAAY,yBAAyB;IAI9C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAYtB;IAEX,QAAQ,CAAC,MAAM;;;;;;;;;;;;;;;;OAaJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,2BAA2B;IAM/C,OAAO,CAAC,SAAS;IA4BX,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;IAmDI,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IAqCF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;;;;CAcjC;AA8GD,wBAAgB,aAAa,CAAC,MAAM,GAAE,2BAAgC,IAG5C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMjE"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
2
|
+
import { wordAlignmentToWordTimestamps, } from "./alignment.js";
|
|
2
3
|
const DEFAULT_AUDIO_ENCODING = "MP3";
|
|
3
4
|
const DEFAULT_SAMPLE_RATE_HERTZ = 48_000;
|
|
4
5
|
function mediaTypeForEncoding(encoding) {
|
|
@@ -38,13 +39,13 @@ export class InworldSpeechProvider {
|
|
|
38
39
|
id: "inworld-tts-1.5-max",
|
|
39
40
|
releaseDate: "2025-08-15",
|
|
40
41
|
languages: InworldSpeechProvider.LANGUAGES,
|
|
41
|
-
features: ["streaming"],
|
|
42
|
+
features: ["streaming", { id: "timestamps", mode: "native" }],
|
|
42
43
|
},
|
|
43
44
|
{
|
|
44
45
|
id: "inworld-tts-1.5-mini",
|
|
45
46
|
releaseDate: "2025-08-15",
|
|
46
47
|
languages: InworldSpeechProvider.LANGUAGES,
|
|
47
|
-
features: ["streaming"],
|
|
48
|
+
features: ["streaming", { id: "timestamps", mode: "native" }],
|
|
48
49
|
},
|
|
49
50
|
];
|
|
50
51
|
apiKey;
|
|
@@ -76,6 +77,9 @@ export class InworldSpeechProvider {
|
|
|
76
77
|
}
|
|
77
78
|
async generate(options) {
|
|
78
79
|
const { body, audioConfig } = this.buildBody(options.text, options.modelId, options.voice, options.providerOptions);
|
|
80
|
+
if (options.includeTimestamps) {
|
|
81
|
+
body.timestamp_type = "WORD";
|
|
82
|
+
}
|
|
79
83
|
const url = `${this.baseURL}/tts/v1/voice`;
|
|
80
84
|
const response = await this.fetchFn(url, {
|
|
81
85
|
method: "POST",
|
|
@@ -93,9 +97,14 @@ export class InworldSpeechProvider {
|
|
|
93
97
|
if (!json.audioContent) {
|
|
94
98
|
throw new Error(`inworld/${options.modelId}: response missing audioContent`);
|
|
95
99
|
}
|
|
100
|
+
const wordAlignment = json.timestampInfo?.wordAlignment;
|
|
101
|
+
const timestamps = options.includeTimestamps && wordAlignment
|
|
102
|
+
? wordAlignmentToWordTimestamps(wordAlignment)
|
|
103
|
+
: undefined;
|
|
96
104
|
return {
|
|
97
105
|
audio: json.audioContent,
|
|
98
106
|
mediaType: mediaTypeForEncoding(audioConfig.audio_encoding),
|
|
107
|
+
timestamps,
|
|
99
108
|
};
|
|
100
109
|
}
|
|
101
110
|
async stream(options) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/inworld/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/inworld/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EAEL,6BAA6B,GAC9B,MAAM,gBAAgB,CAAC;AAcxB,MAAM,sBAAsB,GAAG,KAAK,CAAC;AACrC,MAAM,yBAAyB,GAAG,MAAM,CAAC;AAEzC,SAAS,oBAAoB,CAAC,QAA4B;IACxD,QAAQ,CAAC,QAAQ,IAAI,sBAAsB,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC;QAC3D,KAAK,UAAU;YACb,OAAO,WAAW,CAAC;QACrB,KAAK,UAAU;YACb,OAAO,WAAW,CAAC;QACrB,KAAK,OAAO;YACV,OAAO,aAAa,CAAC;QACvB,KAAK,MAAM;YACT,OAAO,oBAAoB,CAAC;QAC9B;YACE,OAAO,YAAY,CAAC;IACxB,CAAC;AACH,CAAC;AAED,MAAM,OAAO,qBAAqB;IACvB,EAAE,GAAG,SAAS,CAAC;IACf,YAAY,GAAG,qBAAqB,CAAC;IAE9C,oDAAoD;IACpD,2DAA2D;IACnD,MAAM,CAAU,SAAS,GAAG;QAClC,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;KACI,CAAC;IAEF,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,qBAAqB;YACzB,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,qBAAqB,CAAC,SAAS;YAC1C,QAAQ,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;SAC9D;QACD;YACE,EAAE,EAAE,sBAAsB;YAC1B,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,qBAAqB,CAAC,SAAS;YAC1C,QAAQ,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;SAC9D;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAmC;QAC7C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,wBAAwB,CAAC;QAC1D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAEO,SAAS,CACf,IAAY,EACZ,OAAe,EACf,KAAyB,EACzB,eAAoD;QAEpD,MAAM,IAAI,GAAG,CAAC,eAAe,IAAI,EAAE,CAA4B,CAAC;QAChE,MAAM,EAAE,YAAY,EAAE,cAAc,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC;QAEvD,MAAM,WAAW,GAAuB;YACtC,cAAc,EAAE,sBAAsB;YACtC,iBAAiB,EAAE,yBAAyB;YAC5C,GAAG,CAAE,cAAiD,IAAI,EAAE,CAAC;SAC9D,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,GAAG,IAAI;YACP,IAAI;YACJ,QAAQ,EAAE,OAAO;YACjB,YAAY,EAAE,WAAW;SAC1B,CAAC;QACF,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxB,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;QACxB,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAQd;QAMC,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,IAAI,CAAC,SAAS,CAC1C,OAAO,CAAC,IAAI,EACZ,OAAO,CAAC,OAAO,EACf,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,eAAe,CACxB,CAAC;QAEF,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;YAC9B,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC;QAC/B,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,eAAe,CAAC;QAE3C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,SAAS,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,iBAAiB,EAAE,SAAS,CAAC,EAAE;gBAClF,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,WAAW,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAElE,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAGlC,CAAC;QACF,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CACb,WAAW,OAAO,CAAC,OAAO,iCAAiC,CAC5D,CAAC;QACJ,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,EAAE,aAAa,CAAC;QACxD,MAAM,UAAU,GACd,OAAO,CAAC,iBAAiB,IAAI,aAAa;YACxC,CAAC,CAAC,6BAA6B,CAAC,aAAa,CAAC;YAC9C,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,YAAY;YACxB,SAAS,EAAE,oBAAoB,CAAC,WAAW,CAAC,cAAc,CAAC;YAC3D,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,IAAI,CAAC,SAAS,CAC1C,OAAO,CAAC,IAAI,EACZ,OAAO,CAAC,OAAO,EACf,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,eAAe,CACxB,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,sBAAsB,CAAC;QAElD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,SAAS,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,iBAAiB,EAAE,SAAS,CAAC,EAAE;gBAClF,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,WAAW,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAElE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,WAAW,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACtE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,wBAAwB,CAC9B,QAAQ,CAAC,IAAI,EACb,WAAW,OAAO,CAAC,OAAO,EAAE,CAC7B;YACD,SAAS,EAAE,oBAAoB,CAAC,WAAW,CAAC,cAAc,CAAC;SAC5D,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,OAAO;gBACL,eAAe,EAAE;oBACf,YAAY,EAAE;wBACZ,cAAc,EAAE,UAAU;wBAC1B,iBAAiB,EAAE,MAAM;qBAC1B;iBACF;gBACD,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;;AAGH,SAAS,aAAa,CAAC,GAAW;IAChC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAQD,SAAS,YAAY,CAAC,IAAY;IAChC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAuB,CAAC;IACzD,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,EAAE,CAAC;QACzB,MAAM,OAAO,GACX,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ;YAC9B,CAAC,CAAC,MAAM,CAAC,KAAK;YACd,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,IAAI,cAAc,CAAC,CAAC;QAC/C,MAAM,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC;IAC3B,CAAC;IACD,MAAM,GAAG,GAAG,MAAM,CAAC,YAAY,IAAI,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC;IAC/D,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,aAAa,CAAC,GAAG,CAAC,CAAC;AAC5B,CAAC;AAED,SAAS,QAAQ,CACf,IAAY,EACZ,UAAuD;IAEvD,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IACjC,IAAI,KAAK,EAAE,CAAC;QACV,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAClB,KAAyB,EACzB,UAAuD;IAEvD,IAAI,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9C,OAAO,YAAY,KAAK,CAAC,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC;QACjD,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC;QACpD,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;QAC3B,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC;AACH,CAAC;AAED,KAAK,UAAU,UAAU,CACvB,MAA+C,EAC/C,UAAuD;IAEvD,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;IAClC,MAAM,KAAK,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IAE7B,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;QAC5C,IAAI,IAAI,EAAE,CAAC;YACT,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YACjC,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC;YAC/B,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC;YAClB,QAAQ,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;YAChC,OAAO;QACT,CAAC;QACD,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,WAAW,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;IACjC,CAAC;AACH,CAAC;AAED,SAAS,wBAAwB,CAC/B,MAAkC,EAClC,KAAa;IAEb,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;IAClC,OAAO,IAAI,cAAc,CAAa;QACpC,KAAK,CAAC,KAAK,CAAC,UAAU;YACpB,IAAI,CAAC;gBACH,MAAM,UAAU,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;gBACrC,UAAU,CAAC,KAAK,EAAE,CAAC;YACrB,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,KAAK,GACT,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,GAAG,KAAK,KAAK,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBACrE,qEAAqE;gBACrE,qEAAqE;gBACrE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;oBAC9B,UAAU;gBACZ,CAAC,CAAC,CAAC;gBACH,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,MAAM,CAAC,MAAM;YACX,yEAAyE;YACzE,+CAA+C;YAC/C,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAC/B,CAAC;KACF,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,SAAsC,EAAE;IACpE,MAAM,QAAQ,GAAG,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;IAEnD,OAAO,SAAS,OAAO,CAAC,OAAgB;QACtC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
+
/**
|
|
3
|
+
* Shape of a single entry in Murf's `wordDurations` array returned from
|
|
4
|
+
* `/v1/speech/generate`. `startMs` / `endMs` are integer milliseconds from
|
|
5
|
+
* the start of the audio. `word` is the spoken token (or the original input
|
|
6
|
+
* token when `wordDurationsAsOriginalText: true` was requested).
|
|
7
|
+
*/
|
|
8
|
+
export interface MurfWordDuration {
|
|
9
|
+
readonly endMs: number;
|
|
10
|
+
readonly pitchScaleMaximum?: number;
|
|
11
|
+
readonly pitchScaleMinimum?: number;
|
|
12
|
+
readonly sourceWordIndex?: number;
|
|
13
|
+
readonly startMs: number;
|
|
14
|
+
readonly word: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Converts Murf's word-level durations (milliseconds) into the SDK's
|
|
18
|
+
* `WordTimestamp[]` (seconds). Murf already groups output by word, so no
|
|
19
|
+
* aggregation is needed — this is just a unit conversion.
|
|
20
|
+
*/
|
|
21
|
+
export declare function wordDurationsToWordTimestamps(durations: readonly MurfWordDuration[]): WordTimestamp[];
|
|
22
|
+
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/murf/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IACpC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IACpC,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC;IAClC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED;;;;GAIG;AACH,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,SAAS,gBAAgB,EAAE,GACrC,aAAa,EAAE,CAUjB"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Converts Murf's word-level durations (milliseconds) into the SDK's
|
|
3
|
+
* `WordTimestamp[]` (seconds). Murf already groups output by word, so no
|
|
4
|
+
* aggregation is needed — this is just a unit conversion.
|
|
5
|
+
*/
|
|
6
|
+
export function wordDurationsToWordTimestamps(durations) {
|
|
7
|
+
const out = [];
|
|
8
|
+
for (const d of durations) {
|
|
9
|
+
out.push({
|
|
10
|
+
text: d.word,
|
|
11
|
+
start: d.startMs / 1000,
|
|
12
|
+
end: d.endMs / 1000,
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
return out;
|
|
16
|
+
}
|
|
17
|
+
//# sourceMappingURL=alignment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/murf/alignment.ts"],"names":[],"mappings":"AAiBA;;;;GAIG;AACH,MAAM,UAAU,6BAA6B,CAC3C,SAAsC;IAEtC,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,GAAG,CAAC,IAAI,CAAC;YACP,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,CAAC,CAAC,OAAO,GAAG,IAAI;YACvB,GAAG,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;SACpB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|