@speech-sdk/core 0.7.0 → 0.8.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -108
- package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +0 -59
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +3 -10
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-utils.d.ts +0 -10
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +2 -14
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +0 -108
- package/dist/captions.d.ts.map +1 -1
- package/dist/captions.js +8 -98
- package/dist/captions.js.map +1 -1
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -23
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +5 -43
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +3 -6
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +40 -36
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +1 -35
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +1 -5
- package/dist/derive-timestamps.d.ts.map +1 -1
- package/dist/derive-timestamps.js +1 -15
- package/dist/derive-timestamps.js.map +1 -1
- package/dist/errors.d.ts +5 -12
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +12 -14
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +4 -3
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +161 -67
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +1 -26
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +85 -64
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +4 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +2 -13
- package/dist/logger.js.map +1 -1
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/provider-utils.d.ts +3 -9
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +34 -51
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +0 -16
- package/dist/providers/cartesia/alignment.d.ts.map +1 -1
- package/dist/providers/cartesia/alignment.js +1 -6
- package/dist/providers/cartesia/alignment.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +7 -19
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +68 -80
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +7 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +17 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +7 -21
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
- package/dist/providers/elevenlabs/alignment.js +8 -9
- package/dist/providers/elevenlabs/alignment.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +7 -38
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +161 -169
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +7 -18
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +37 -31
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +7 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +23 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +68 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +236 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +7 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +161 -151
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +30 -35
- package/dist/providers/hume/alignment.d.ts.map +1 -1
- package/dist/providers/hume/alignment.js +14 -8
- package/dist/providers/hume/alignment.js.map +1 -1
- package/dist/providers/hume/index.d.ts +7 -16
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +55 -65
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +8 -22
- package/dist/providers/inworld/alignment.d.ts.map +1 -1
- package/dist/providers/inworld/alignment.js +9 -8
- package/dist/providers/inworld/alignment.js.map +1 -1
- package/dist/providers/inworld/index.d.ts +7 -20
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +47 -39
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +7 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +39 -38
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +10 -19
- package/dist/providers/murf/alignment.d.ts.map +1 -1
- package/dist/providers/murf/alignment.js +10 -5
- package/dist/providers/murf/alignment.js.map +1 -1
- package/dist/providers/murf/index.d.ts +7 -16
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +65 -57
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +36 -29
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +270 -106
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +8 -29
- package/dist/providers/resemble/alignment.d.ts.map +1 -1
- package/dist/providers/resemble/alignment.js +9 -12
- package/dist/providers/resemble/alignment.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +7 -11
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +54 -48
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts +7 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +37 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +29 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +15 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +8 -51
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +13 -53
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +5 -26
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +4 -9
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +0 -12
- package/dist/speech-to-text-provider.d.ts.map +1 -1
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +2 -3
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +3 -17
- package/dist/timestamps.d.ts.map +1 -1
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +0 -6
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +11 -66
- package/dist/stt-providers/openai/index.d.ts +0 -42
- package/dist/stt-providers/openai/index.d.ts.map +0 -1
- package/dist/stt-providers/openai/index.js +0 -184
- package/dist/stt-providers/openai/index.js.map +0 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-provider.d.ts","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,KAAK,EAAE,MAAM,GAAG,UAAU,CAAA;CAAE,CAAC;AAE9E
|
|
1
|
+
{"version":3,"file":"speech-provider.d.ts","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,qBAAqB,EAC3B,MAAM,8BAA8B,CAAC;AACtC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,MAAM,KAAK,GAAG,MAAM,GAAG;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG;IAAE,KAAK,EAAE,MAAM,GAAG,UAAU,CAAA;CAAE,CAAC;AAE9E,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC1C;AAED,MAAM,MAAM,OAAO,GAAG,MAAM,GAAG;IAAE,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAA;CAAE,CAAC;AAEvD,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,QAAQ,EAAE,SAAS,OAAO,EAAE,CAAC;IACtC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,eAAO,MAAM,QAAQ;;;;;;CAMX,CAAC;AAEX,wBAAgB,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE,MAAM,GAAG,OAAO,CAIhE;AAED,MAAM,WAAW,cAAc,CAC7B,MAAM,SAAS,MAAM,GAAG,MAAM,EAC9B,MAAM,SAAS,KAAK,GAAG,KAAK;IAE5B,YAAY,EAAE,MAAM,CAAC;IAErB,oBAAoB,CAAC,CAAC,OAAO,EAAE,MAAM,GACjC;QACE,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,aAAa,CAAC,EAAE,MAAM,CAAC;KACxB,GACD,SAAS,CAAC;IAEd,QAAQ,CAAC,OAAO,EAAE;QAChB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;QAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;QAC7B,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC,CAAC;IAEH,gBAAgB,CAAC,CAAC,OAAO,EAAE;QACzB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,SAAS;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,EAAE,CAAC;QAClD,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;QAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC,CAAC;IAEH,gBAAgB,CAAC,CAAC,OAAO,EAAE,MAAM,GAAG,iBAAiB,GAAG,SAAS,CAAC;IAClE,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,SAAS,SAAS,EAAE,CAAC;IAE7B,gBAAgB,CAAC,CACf,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAExC,MAAM,CAAC,CAAC,OAAO,EAAE;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,aAAa,CAAC,MAAM,SAAS,KAAK,GAAG,KAAK;IACzD,WAAW,CAAC,EAAE,gBAAgB,CAAC;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC1C;AAED,wBAAgB,oBAAoB,CAAC,CAAC,SAAS,KAAK,EAClD,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC,GACtB,KAAK,IAAI,aAAa,CAAC,CAAC,CAAC,GAAG;IAAE,QAAQ,EAAE,qBAAqB,CAAA;CAAE,CAEjE;AAED,wBAAgB,6BAA6B,CAC3C,QAAQ,EAAE,aAAa,GACtB,OAAO,CAKT"}
|
package/dist/speech-provider.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import { SPEECH_GATEWAY_PROVIDER_ID, } from "./providers/gateway/index.js";
|
|
2
2
|
export const FEATURES = {
|
|
3
3
|
STREAMING: "streaming",
|
|
4
4
|
AUDIO_TAGS: "audio-tags",
|
|
@@ -7,34 +7,13 @@ export const FEATURES = {
|
|
|
7
7
|
TIMESTAMPS: "timestamps",
|
|
8
8
|
};
|
|
9
9
|
export function hasFeature(model, id) {
|
|
10
|
-
|
|
11
|
-
if (typeof f === "string" ? f === id : f.id === id) {
|
|
12
|
-
return true;
|
|
13
|
-
}
|
|
14
|
-
}
|
|
15
|
-
return false;
|
|
10
|
+
return model.features.some((f) => typeof f === "string" ? f === id : f.id === id);
|
|
16
11
|
}
|
|
17
|
-
export function
|
|
18
|
-
|
|
19
|
-
if (typeof f !== "string" && f.id === id) {
|
|
20
|
-
return f;
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
return undefined;
|
|
12
|
+
export function isSpeechGatewayModel(model) {
|
|
13
|
+
return model.provider.id === SPEECH_GATEWAY_PROVIDER_ID;
|
|
24
14
|
}
|
|
25
|
-
/**
|
|
26
|
-
* Returns true when the resolved model declares `{ id: "timestamps", mode: "native" }`
|
|
27
|
-
* in its features (i.e., its TTS endpoint returns alignment data directly in the
|
|
28
|
-
* response, no STT round-trip needed).
|
|
29
|
-
*/
|
|
30
15
|
export function modelDeclaresNativeTimestamps(resolved) {
|
|
31
|
-
// `.models` is required by the SpeechProvider interface but we use optional
|
|
32
|
-
// chaining so tests/mocks that omit it don't crash here.
|
|
33
16
|
const modelInfo = resolved.provider.models?.find((m) => m.id === resolved.modelId);
|
|
34
|
-
|
|
35
|
-
return false;
|
|
36
|
-
}
|
|
37
|
-
const feature = getFeature(modelInfo, "timestamps");
|
|
38
|
-
return feature?.mode === "native";
|
|
17
|
+
return modelInfo != null && hasFeature(modelInfo, FEATURES.TIMESTAMPS);
|
|
39
18
|
}
|
|
40
19
|
//# sourceMappingURL=speech-provider.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-provider.js","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"speech-provider.js","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,0BAA0B,GAE3B,MAAM,8BAA8B,CAAC;AAoBtC,MAAM,CAAC,MAAM,QAAQ,GAAG;IACtB,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,YAAY;IACxB,oBAAoB,EAAE,sBAAsB;IAC5C,WAAW,EAAE,aAAa;IAC1B,UAAU,EAAE,YAAY;CAChB,CAAC;AAEX,MAAM,UAAU,UAAU,CAAC,KAAgB,EAAE,EAAU;IACrD,OAAO,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAC/B,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAC/C,CAAC;AACJ,CAAC;AA8ED,MAAM,UAAU,oBAAoB,CAClC,KAAuB;IAEvB,OAAO,KAAK,CAAC,QAAQ,CAAC,EAAE,KAAK,0BAA0B,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,6BAA6B,CAC3C,QAAuB;IAEvB,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAC9C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,OAAO,SAAS,IAAI,IAAI,IAAI,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,UAAU,CAAC,CAAC;AACzE,CAAC"}
|
package/dist/speech-result.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { SpeechMetadata } from "./metadata.js";
|
|
2
|
-
import type { WordTimestamp } from "./timestamps.js";
|
|
2
|
+
import type { ConversationWordTimestamp, WordTimestamp } from "./timestamps.js";
|
|
3
3
|
export interface GeneratedAudioFile {
|
|
4
4
|
readonly base64: string;
|
|
5
5
|
readonly mediaType: string;
|
|
@@ -9,17 +9,12 @@ export interface SpeechResult {
|
|
|
9
9
|
readonly audio: GeneratedAudioFile;
|
|
10
10
|
readonly metadata: SpeechMetadata;
|
|
11
11
|
readonly providerMetadata?: Record<string, unknown>;
|
|
12
|
-
/**
|
|
13
|
-
* Word-level alignment data. Populated when `timestamps: "on"` or when
|
|
14
|
-
* `timestamps: "auto"` (default) is combined with a TTS provider that
|
|
15
|
-
* returns alignment natively. Undefined otherwise.
|
|
16
|
-
*
|
|
17
|
-
* Timestamps are always word-granularity with start/end in seconds.
|
|
18
|
-
* Character- or phoneme-level native data is aggregated internally.
|
|
19
|
-
*/
|
|
20
12
|
readonly timestamps?: readonly WordTimestamp[];
|
|
21
13
|
readonly warnings?: string[];
|
|
22
14
|
}
|
|
15
|
+
export interface ConversationResult extends Omit<SpeechResult, "timestamps"> {
|
|
16
|
+
readonly timestamps?: readonly ConversationWordTimestamp[];
|
|
17
|
+
}
|
|
23
18
|
export declare class DefaultGeneratedAudioFile implements GeneratedAudioFile {
|
|
24
19
|
readonly mediaType: string;
|
|
25
20
|
private readonly _data;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-result.d.ts","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"speech-result.d.ts","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,KAAK,EAAE,yBAAyB,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhF,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,KAAK,EAAE,kBAAkB,CAAC;IACnC,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAClC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpD,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IAC/C,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAmB,SAAQ,IAAI,CAAC,YAAY,EAAE,YAAY,CAAC;IAC1E,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,yBAAyB,EAAE,CAAC;CAC5D;AAED,qBAAa,yBAA0B,YAAW,kBAAkB;IAClE,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAE3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsB;IAC5C,OAAO,CAAC,WAAW,CAAC,CAAa;IACjC,OAAO,CAAC,OAAO,CAAC,CAAS;gBAEb,EACV,IAAI,EACJ,SAAS,GACV,EAAE;QAAE,IAAI,EAAE,MAAM,GAAG,UAAU,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE;IAKnD,IAAI,UAAU,IAAI,UAAU,CAe3B;IAED,IAAI,MAAM,IAAI,MAAM,CAcnB;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-result.js","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"speech-result.js","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAqBA,MAAM,OAAO,yBAAyB;IAC3B,SAAS,CAAS;IAEV,KAAK,CAAsB;IACpC,WAAW,CAAc;IACzB,OAAO,CAAU;IAEzB,YAAY,EACV,IAAI,EACJ,SAAS,GACwC;QACjD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,IAAI,UAAU;QACZ,IAAI,IAAI,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7B,OAAO,IAAI,CAAC,WAAW,CAAC;QAC1B,CAAC;QACD,IAAI,IAAI,CAAC,KAAK,YAAY,UAAU,EAAE,CAAC;YACrC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC;QAChC,CAAC;aAAM,CAAC;YACN,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACtC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACxC,CAAC;YACD,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;QAC3B,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,IAAI,MAAM;QACR,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,IAAI,YAAY,GAAG,EAAE,CAAC;YACtB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC9B,YAAY,IAAI,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YAC5C,CAAC;YACD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC;QACpC,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF"}
|
|
@@ -1,21 +1,9 @@
|
|
|
1
1
|
import type { WordTimestamp } from "./timestamps.js";
|
|
2
|
-
/**
|
|
3
|
-
* Minimal info about an STT model. Parallels `ModelInfo` on the TTS side.
|
|
4
|
-
*/
|
|
5
2
|
export interface STTModelInfo {
|
|
6
3
|
readonly id: string;
|
|
7
4
|
readonly languages: readonly string[];
|
|
8
5
|
readonly releaseDate: string;
|
|
9
6
|
}
|
|
10
|
-
/**
|
|
11
|
-
* Transcribes generated audio and returns word-level timestamps. This is the
|
|
12
|
-
* "derived" path for `timestamps: "on"` — used when the TTS provider doesn't
|
|
13
|
-
* return alignment data natively.
|
|
14
|
-
*
|
|
15
|
-
* Providers return `WordTimestamp[]` with start/end in seconds. Normalization
|
|
16
|
-
* (ms → seconds, char/phoneme aggregation, tuple → object) happens inside the
|
|
17
|
-
* provider adapter so the public surface is uniform.
|
|
18
|
-
*/
|
|
19
7
|
export interface SpeechToTextProvider {
|
|
20
8
|
readonly defaultModel: string;
|
|
21
9
|
readonly id: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-to-text-provider.d.ts","sourceRoot":"","sources":["../src/speech-to-text-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD
|
|
1
|
+
{"version":3,"file":"speech-to-text-provider.d.ts","sourceRoot":"","sources":["../src/speech-to-text-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,MAAM,EAAE,SAAS,YAAY,EAAE,CAAC;IAEzC,UAAU,CAAC,OAAO,EAAE;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,UAAU,EAAE,aAAa,EAAE,CAAC;QAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,QAAQ,EAAE,oBAAoB,CAAC;CACzC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stream-speech.d.ts","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"stream-speech.d.ts","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAUA,OAAO,EAGL,KAAK,aAAa,EAClB,KAAK,KAAK,EACX,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAEpE,wBAAsB,YAAY,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAAE,OAAO,EAAE;IACnE,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;IACT,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CA0F9B"}
|
package/dist/stream-speech.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import pRetry from "p-retry";
|
|
2
2
|
import { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
3
|
import { ApiError, NoSpeechGeneratedError, StreamingNotSupportedError, } from "./errors.js";
|
|
4
|
+
import { isRetriableApiError } from "./provider-utils.js";
|
|
4
5
|
import { resolveModel } from "./resolve-provider.js";
|
|
5
6
|
import { FEATURES, hasFeature, } from "./speech-provider.js";
|
|
6
7
|
export async function streamSpeech(options) {
|
|
@@ -48,7 +49,7 @@ export async function streamSpeech(options) {
|
|
|
48
49
|
retries: maxRetries,
|
|
49
50
|
signal: abortSignal,
|
|
50
51
|
shouldRetry: ({ error }) => {
|
|
51
|
-
if (error instanceof ApiError && error
|
|
52
|
+
if (error instanceof ApiError && !isRetriableApiError(error)) {
|
|
52
53
|
return false;
|
|
53
54
|
}
|
|
54
55
|
return true;
|
|
@@ -59,8 +60,6 @@ export async function streamSpeech(options) {
|
|
|
59
60
|
latencyMs: ttfbMs,
|
|
60
61
|
ttfbMs,
|
|
61
62
|
inputChars: processedText.length,
|
|
62
|
-
provider: resolved.provider.id,
|
|
63
|
-
model: resolved.modelId,
|
|
64
63
|
...(result.audioDurationMs != null && {
|
|
65
64
|
audioDurationMs: result.audioDurationMs,
|
|
66
65
|
}),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stream-speech.js","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,aAAa,CAAC;AAErB,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,UAAU,GAGX,MAAM,sBAAsB,CAAC;AAG9B,MAAM,CAAC,KAAK,UAAU,YAAY,CAA0B,OAS3D;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,IAAI,SAAS,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IACD,IAAI,OAAO,QAAQ,CAAC,QAAQ,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;QACnD,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IAED,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAElE,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC;QACP,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;KACR,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,
|
|
1
|
+
{"version":3,"file":"stream-speech.js","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,aAAa,CAAC;AAErB,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,UAAU,GAGX,MAAM,sBAAsB,CAAC;AAG9B,MAAM,CAAC,KAAK,UAAU,YAAY,CAA0B,OAS3D;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,IAAI,SAAS,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IACD,IAAI,OAAO,QAAQ,CAAC,QAAQ,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;QACnD,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IAED,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAElE,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC;QACP,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;KACR,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7D,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IAEzD,MAAM,QAAQ,GAAmB;QAC/B,SAAS,EAAE,MAAM;QACjB,MAAM;QACN,UAAU,EAAE,aAAa,CAAC,MAAM;QAChC,GAAG,CAAC,MAAM,CAAC,eAAe,IAAI,IAAI,IAAI;YACpC,eAAe,EAAE,MAAM,CAAC,eAAe;SACxC,CAAC;KACH,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,MAAM;QACpB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;KACrD,CAAC;AACJ,CAAC"}
|
package/dist/timestamps.d.ts
CHANGED
|
@@ -1,23 +1,9 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Word-granularity alignment data. Timestamps are always in seconds from
|
|
3
|
-
* the start of the generated audio. Providers that natively return character
|
|
4
|
-
* or phoneme granularity are aggregated to words internally.
|
|
5
|
-
*/
|
|
6
1
|
export interface WordTimestamp {
|
|
7
2
|
readonly end: number;
|
|
8
3
|
readonly start: number;
|
|
9
4
|
readonly text: string;
|
|
10
5
|
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
* - `"auto"` (default): return timestamps only if the TTS provider supplies
|
|
15
|
-
* them natively. Free, no extra API calls.
|
|
16
|
-
* - `"on"`: always return timestamps. Uses native data when available;
|
|
17
|
-
* otherwise falls back to a speech-to-text round-trip of the synthesized
|
|
18
|
-
* audio (cost + latency implications).
|
|
19
|
-
* - `"off"`: never return timestamps, even when the provider would give them
|
|
20
|
-
* away for free.
|
|
21
|
-
*/
|
|
22
|
-
export type TimestampMode = "on" | "auto" | "off";
|
|
6
|
+
export interface ConversationWordTimestamp extends WordTimestamp {
|
|
7
|
+
readonly turnIndex: number;
|
|
8
|
+
}
|
|
23
9
|
//# sourceMappingURL=timestamps.d.ts.map
|
package/dist/timestamps.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"timestamps.d.ts","sourceRoot":"","sources":["../src/timestamps.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"timestamps.d.ts","sourceRoot":"","sources":["../src/timestamps.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,yBAA0B,SAAQ,aAAa;IAC9D,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
package/dist/turns.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ConversationWordTimestamp } from "./timestamps.js";
|
|
2
|
+
export interface TurnTimestamp {
|
|
3
|
+
readonly end: number;
|
|
4
|
+
readonly start: number;
|
|
5
|
+
readonly text: string;
|
|
6
|
+
readonly turnIndex: number;
|
|
7
|
+
}
|
|
8
|
+
export declare function timestampsToTurns(timestamps: readonly ConversationWordTimestamp[]): readonly TurnTimestamp[];
|
|
9
|
+
//# sourceMappingURL=turns.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"turns.d.ts","sourceRoot":"","sources":["../src/turns.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,iBAAiB,CAAC;AAEjE,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAKD,wBAAgB,iBAAiB,CAC/B,UAAU,EAAE,SAAS,yBAAyB,EAAE,GAC/C,SAAS,aAAa,EAAE,CAiB1B"}
|
package/dist/turns.js
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// Assumes turnIndex runs are monotonic; non-adjacent runs of the same turnIndex would produce duplicate entries.
|
|
2
|
+
export function timestampsToTurns(timestamps) {
|
|
3
|
+
const turns = [];
|
|
4
|
+
for (const word of timestamps) {
|
|
5
|
+
const last = turns.at(-1);
|
|
6
|
+
if (last && last.turnIndex === word.turnIndex) {
|
|
7
|
+
last.end = word.end;
|
|
8
|
+
last.text = `${last.text} ${word.text}`;
|
|
9
|
+
}
|
|
10
|
+
else {
|
|
11
|
+
turns.push({
|
|
12
|
+
turnIndex: word.turnIndex,
|
|
13
|
+
start: word.start,
|
|
14
|
+
end: word.end,
|
|
15
|
+
text: word.text,
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return turns;
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=turns.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"turns.js","sourceRoot":"","sources":["../src/turns.ts"],"names":[],"mappings":"AAWA,iHAAiH;AACjH,MAAM,UAAU,iBAAiB,CAC/B,UAAgD;IAEhD,MAAM,KAAK,GAA6B,EAAE,CAAC;IAC3C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YAC9C,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC;YACpB,IAAI,CAAC,IAAI,GAAG,GAAG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAC1C,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC;gBACT,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,IAAI,EAAE,IAAI,CAAC,IAAI;aAChB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,4 +1,27 @@
|
|
|
1
1
|
import type { ResolvedModel, Voice } from "./speech-provider.js";
|
|
2
|
+
export type { CaptionFormat, CaptionsOptions } from "./captions.js";
|
|
3
|
+
export type { ConversationTurn, GenerateConversationOptions, } from "./conversation/types.js";
|
|
4
|
+
export type { SpeechMetadata } from "./metadata.js";
|
|
5
|
+
export type { CartesiaSpeechProviderConfig } from "./providers/cartesia/index.js";
|
|
6
|
+
export type { DeepgramSpeechProviderConfig } from "./providers/deepgram/index.js";
|
|
7
|
+
export type { ElevenLabsSpeechProviderConfig } from "./providers/elevenlabs/index.js";
|
|
8
|
+
export type { FalSpeechProviderConfig } from "./providers/fal/index.js";
|
|
9
|
+
export type { FishAudioSpeechProviderConfig } from "./providers/fish-audio/index.js";
|
|
10
|
+
export type { SpeechGatewayProviderConfig } from "./providers/gateway/index.js";
|
|
11
|
+
export type { GoogleSpeechProviderConfig } from "./providers/google/index.js";
|
|
12
|
+
export type { HumeSpeechProviderConfig } from "./providers/hume/index.js";
|
|
13
|
+
export type { InworldSpeechProviderConfig } from "./providers/inworld/index.js";
|
|
14
|
+
export type { MistralSpeechProviderConfig } from "./providers/mistral/index.js";
|
|
15
|
+
export type { MurfSpeechProviderConfig } from "./providers/murf/index.js";
|
|
16
|
+
export type { OpenAISpeechProviderConfig } from "./providers/openai/index.js";
|
|
17
|
+
export type { ResembleSpeechProviderConfig } from "./providers/resemble/index.js";
|
|
18
|
+
export type { XaiSpeechProviderConfig } from "./providers/xai/index.js";
|
|
19
|
+
export type { Feature, ModelInfo, ResolvedModel, SpeechProvider, Voice, } from "./speech-provider.js";
|
|
20
|
+
export type { ConversationResult, GeneratedAudioFile, SpeechResult, } from "./speech-result.js";
|
|
21
|
+
export type { ResolvedSTTModel, SpeechToTextProvider, STTModelInfo, } from "./speech-to-text-provider.js";
|
|
22
|
+
export type { StreamSpeechResult } from "./stream-speech-result.js";
|
|
23
|
+
export type { ConversationWordTimestamp, WordTimestamp, } from "./timestamps.js";
|
|
24
|
+
export type { TurnTimestamp } from "./turns.js";
|
|
2
25
|
export interface GenerateSpeechOptions<V extends Voice = Voice> {
|
|
3
26
|
abortSignal?: AbortSignal;
|
|
4
27
|
apiKey?: string;
|
|
@@ -7,6 +30,8 @@ export interface GenerateSpeechOptions<V extends Voice = Voice> {
|
|
|
7
30
|
model: string | ResolvedModel<V>;
|
|
8
31
|
providerOptions?: Record<string, unknown>;
|
|
9
32
|
text: string;
|
|
33
|
+
timestamps?: boolean;
|
|
10
34
|
voice: V;
|
|
35
|
+
volumeDbfs?: number;
|
|
11
36
|
}
|
|
12
37
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAEjE,MAAM,WAAW,qBAAqB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC5D,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAEjE,YAAY,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACpE,YAAY,EACV,gBAAgB,EAChB,2BAA2B,GAC5B,MAAM,yBAAyB,CAAC;AACjC,YAAY,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,8BAA8B,EAAE,MAAM,iCAAiC,CAAC;AACtF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EAAE,6BAA6B,EAAE,MAAM,iCAAiC,CAAC;AACrF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EACV,OAAO,EACP,SAAS,EACT,aAAa,EACb,cAAc,EACd,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,kBAAkB,EAClB,kBAAkB,EAClB,YAAY,GACb,MAAM,oBAAoB,CAAC;AAC5B,YAAY,EACV,gBAAgB,EAChB,oBAAoB,EACpB,YAAY,GACb,MAAM,8BAA8B,CAAC;AACtC,YAAY,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,YAAY,EACV,yBAAyB,EACzB,aAAa,GACd,MAAM,iBAAiB,CAAC;AACzB,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD,MAAM,WAAW,qBAAqB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC5D,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,KAAK,EAAE,CAAC,CAAC;IACT,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
|
package/dist/volume-adjust.d.ts
CHANGED
|
@@ -3,12 +3,6 @@ interface AdjustVolumeInput {
|
|
|
3
3
|
readonly mediaType: string;
|
|
4
4
|
readonly volumeDbfs: number;
|
|
5
5
|
}
|
|
6
|
-
/**
|
|
7
|
-
* Decode the provider's PCM/WAV output, RMS-normalize to the target dBFS,
|
|
8
|
-
* and re-encode as 16-bit mono WAV. Lazy-loaded by generateSpeech only when
|
|
9
|
-
* `volumeDbfs` is set so callers that never use volume adjustment don't pay
|
|
10
|
-
* for the WAV mux dependency chain at import time.
|
|
11
|
-
*/
|
|
12
6
|
export declare function adjustVolume(input: AdjustVolumeInput): Promise<Uint8Array>;
|
|
13
7
|
export {};
|
|
14
8
|
//# sourceMappingURL=volume-adjust.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"volume-adjust.d.ts","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAOA,UAAU,iBAAiB;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;IACpC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED
|
|
1
|
+
{"version":3,"file":"volume-adjust.d.ts","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAOA,UAAU,iBAAiB;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;IACpC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,wBAAsB,YAAY,CAChC,KAAK,EAAE,iBAAiB,GACvB,OAAO,CAAC,UAAU,CAAC,CAgBrB"}
|
package/dist/volume-adjust.js
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
1
|
import { concatPcmToWav, dbfsToInt16Rms, decodeToPcm16, normalizeRms, } from "./conversation/pcm-concat.js";
|
|
2
|
-
/**
|
|
3
|
-
* Decode the provider's PCM/WAV output, RMS-normalize to the target dBFS,
|
|
4
|
-
* and re-encode as 16-bit mono WAV. Lazy-loaded by generateSpeech only when
|
|
5
|
-
* `volumeDbfs` is set so callers that never use volume adjustment don't pay
|
|
6
|
-
* for the WAV mux dependency chain at import time.
|
|
7
|
-
*/
|
|
8
2
|
export async function adjustVolume(input) {
|
|
9
3
|
const bytes = input.audio instanceof Uint8Array
|
|
10
4
|
? input.audio
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"volume-adjust.js","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,YAAY,GACb,MAAM,8BAA8B,CAAC;AAQtC
|
|
1
|
+
{"version":3,"file":"volume-adjust.js","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,YAAY,GACb,MAAM,8BAA8B,CAAC;AAQtC,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,KAAwB;IAExB,MAAM,KAAK,GACT,KAAK,CAAC,KAAK,YAAY,UAAU;QAC/B,CAAC,CAAC,KAAK,CAAC,KAAK;QACb,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,OAAO,GAAG,aAAa,CAAC,KAAK,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IACtD,MAAM,CAAC,UAAU,CAAC,GAAG,YAAY,CAC/B,CAAC,OAAO,CAAC,EACT,cAAc,CAAC,KAAK,CAAC,UAAU,CAAC,CACjC,CAAC;IAEF,OAAO,MAAM,cAAc,CAAC,CAAC,UAAU,CAAC,EAAE;QACxC,KAAK,EAAE,CAAC;QACR,gBAAgB,EAAE,UAAU,CAAC,UAAU;KACxC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,GAAG,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@speech-sdk/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0-alpha",
|
|
4
4
|
"description": "Universal, cross-platform text-to-speech SDK with multi-provider support.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -10,69 +10,13 @@
|
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
11
11
|
"default": "./dist/index.js"
|
|
12
12
|
},
|
|
13
|
-
"./
|
|
14
|
-
"types": "./dist/
|
|
15
|
-
"default": "./dist/
|
|
13
|
+
"./providers": {
|
|
14
|
+
"types": "./dist/providers.d.ts",
|
|
15
|
+
"default": "./dist/providers.js"
|
|
16
16
|
},
|
|
17
|
-
"./
|
|
18
|
-
"types": "./dist/
|
|
19
|
-
"default": "./dist/
|
|
20
|
-
},
|
|
21
|
-
"./openai": {
|
|
22
|
-
"types": "./dist/providers/openai/index.d.ts",
|
|
23
|
-
"default": "./dist/providers/openai/index.js"
|
|
24
|
-
},
|
|
25
|
-
"./elevenlabs": {
|
|
26
|
-
"types": "./dist/providers/elevenlabs/index.d.ts",
|
|
27
|
-
"default": "./dist/providers/elevenlabs/index.js"
|
|
28
|
-
},
|
|
29
|
-
"./deepgram": {
|
|
30
|
-
"types": "./dist/providers/deepgram/index.d.ts",
|
|
31
|
-
"default": "./dist/providers/deepgram/index.js"
|
|
32
|
-
},
|
|
33
|
-
"./cartesia": {
|
|
34
|
-
"types": "./dist/providers/cartesia/index.d.ts",
|
|
35
|
-
"default": "./dist/providers/cartesia/index.js"
|
|
36
|
-
},
|
|
37
|
-
"./hume": {
|
|
38
|
-
"types": "./dist/providers/hume/index.d.ts",
|
|
39
|
-
"default": "./dist/providers/hume/index.js"
|
|
40
|
-
},
|
|
41
|
-
"./inworld": {
|
|
42
|
-
"types": "./dist/providers/inworld/index.d.ts",
|
|
43
|
-
"default": "./dist/providers/inworld/index.js"
|
|
44
|
-
},
|
|
45
|
-
"./google": {
|
|
46
|
-
"types": "./dist/providers/google/index.d.ts",
|
|
47
|
-
"default": "./dist/providers/google/index.js"
|
|
48
|
-
},
|
|
49
|
-
"./fish-audio": {
|
|
50
|
-
"types": "./dist/providers/fish-audio/index.d.ts",
|
|
51
|
-
"default": "./dist/providers/fish-audio/index.js"
|
|
52
|
-
},
|
|
53
|
-
"./murf": {
|
|
54
|
-
"types": "./dist/providers/murf/index.d.ts",
|
|
55
|
-
"default": "./dist/providers/murf/index.js"
|
|
56
|
-
},
|
|
57
|
-
"./resemble": {
|
|
58
|
-
"types": "./dist/providers/resemble/index.d.ts",
|
|
59
|
-
"default": "./dist/providers/resemble/index.js"
|
|
60
|
-
},
|
|
61
|
-
"./fal-ai": {
|
|
62
|
-
"types": "./dist/providers/fal/index.d.ts",
|
|
63
|
-
"default": "./dist/providers/fal/index.js"
|
|
64
|
-
},
|
|
65
|
-
"./mistral": {
|
|
66
|
-
"types": "./dist/providers/mistral/index.d.ts",
|
|
67
|
-
"default": "./dist/providers/mistral/index.js"
|
|
68
|
-
},
|
|
69
|
-
"./xai": {
|
|
70
|
-
"types": "./dist/providers/xai/index.d.ts",
|
|
71
|
-
"default": "./dist/providers/xai/index.js"
|
|
72
|
-
},
|
|
73
|
-
"./stt/openai": {
|
|
74
|
-
"types": "./dist/stt-providers/openai/index.d.ts",
|
|
75
|
-
"default": "./dist/stt-providers/openai/index.js"
|
|
17
|
+
"./types": {
|
|
18
|
+
"types": "./dist/types.d.ts",
|
|
19
|
+
"default": "./dist/types.js"
|
|
76
20
|
}
|
|
77
21
|
},
|
|
78
22
|
"files": [
|
|
@@ -95,14 +39,15 @@
|
|
|
95
39
|
},
|
|
96
40
|
"dependencies": {
|
|
97
41
|
"mediabunny": "^1.40.1",
|
|
98
|
-
"p-retry": "^8.0.0"
|
|
42
|
+
"p-retry": "^8.0.0",
|
|
43
|
+
"zod": "^4.3.6"
|
|
99
44
|
},
|
|
100
45
|
"devDependencies": {
|
|
101
|
-
"@biomejs/biome": "2.4.
|
|
46
|
+
"@biomejs/biome": "2.4.13",
|
|
102
47
|
"@types/node": "^25.5.0",
|
|
103
48
|
"dotenv": "^17.3.1",
|
|
104
49
|
"typescript": "^5.8.0",
|
|
105
|
-
"ultracite": "7.
|
|
50
|
+
"ultracite": "7.6.2",
|
|
106
51
|
"vite": "^7.3.2",
|
|
107
52
|
"vitest": "^4.1.3"
|
|
108
53
|
},
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import type { ResolvedSTTModel, SpeechToTextProvider } from "../../speech-to-text-provider.js";
|
|
2
|
-
import type { WordTimestamp } from "../../timestamps.js";
|
|
3
|
-
export interface OpenAISpeechToTextProviderConfig {
|
|
4
|
-
apiKey?: string;
|
|
5
|
-
baseURL?: string;
|
|
6
|
-
fetch?: typeof globalThis.fetch;
|
|
7
|
-
}
|
|
8
|
-
/**
|
|
9
|
-
* OpenAI Whisper / gpt-4o-transcribe adapter for the SDK's derived-timestamps
|
|
10
|
-
* path. Uses `/v1/audio/transcriptions` with `timestamp_granularities: ["word"]`
|
|
11
|
-
* and `response_format: "verbose_json"`.
|
|
12
|
-
*
|
|
13
|
-
* Note: `gpt-4o-transcribe-diarize` is intentionally not listed — that
|
|
14
|
-
* variant does not support `timestamp_granularities`.
|
|
15
|
-
*/
|
|
16
|
-
export declare class OpenAISpeechToTextProvider implements SpeechToTextProvider {
|
|
17
|
-
readonly id = "openai";
|
|
18
|
-
readonly defaultModel = "whisper-1";
|
|
19
|
-
readonly models: readonly [{
|
|
20
|
-
readonly id: "whisper-1";
|
|
21
|
-
readonly releaseDate: "2023-03-01";
|
|
22
|
-
readonly languages: readonly ["af", "ar", "az", "be", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en", "es", "et", "fa", "fi", "fr", "gl", "he", "hi", "hr", "hu", "hy", "id", "is", "it", "ja", "kk", "kn", "ko", "lt", "lv", "mi", "mk", "mr", "ms", "ne", "nl", "no", "pl", "pt", "ro", "ru", "sk", "sl", "sr", "sv", "sw", "ta", "th", "tl", "tr", "uk", "ur", "vi", "zh"];
|
|
23
|
-
}];
|
|
24
|
-
private readonly apiKey;
|
|
25
|
-
private readonly baseURL;
|
|
26
|
-
private readonly fetchFn;
|
|
27
|
-
constructor(config?: OpenAISpeechToTextProviderConfig);
|
|
28
|
-
transcribe(options: {
|
|
29
|
-
modelId: string;
|
|
30
|
-
audio: Uint8Array;
|
|
31
|
-
mediaType: string;
|
|
32
|
-
language?: string;
|
|
33
|
-
abortSignal?: AbortSignal;
|
|
34
|
-
headers?: Record<string, string>;
|
|
35
|
-
}): Promise<{
|
|
36
|
-
timestamps: WordTimestamp[];
|
|
37
|
-
text?: string;
|
|
38
|
-
providerMetadata?: Record<string, unknown>;
|
|
39
|
-
}>;
|
|
40
|
-
}
|
|
41
|
-
export declare function createOpenAISTT(config?: OpenAISpeechToTextProviderConfig): (modelId?: string) => ResolvedSTTModel;
|
|
42
|
-
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/stt-providers/openai/index.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EACV,gBAAgB,EAChB,oBAAoB,EACrB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,MAAM,WAAW,gCAAgC;IAC/C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAiED;;;;;;;GAOG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,YAAY,eAAe;IAMpC,QAAQ,CAAC,MAAM;;;;OAMJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,GAAE,gCAAqC;IAMnD,UAAU,CAAC,OAAO,EAAE;QACxB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,UAAU,EAAE,aAAa,EAAE,CAAC;QAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAsDH;AAED,wBAAgB,eAAe,CAAC,MAAM,GAAE,gCAAqC,IAGjD,UAAU,MAAM,KAAG,gBAAgB,CAM9D"}
|