@speech-sdk/core 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -21
- package/README.md +215 -269
- package/dist/__tests__/e2e/_save-audio.d.ts +25 -2
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +46 -10
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-utils.d.ts +2 -0
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +9 -0
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +137 -0
- package/dist/captions.d.ts.map +1 -0
- package/dist/captions.js +283 -0
- package/dist/captions.js.map +1 -0
- package/dist/conversation/stitch.d.ts +5 -0
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +37 -0
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +16 -0
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/derive-timestamps.d.ts +14 -0
- package/dist/derive-timestamps.d.ts.map +1 -0
- package/dist/derive-timestamps.js +38 -0
- package/dist/derive-timestamps.js.map +1 -0
- package/dist/errors.d.ts +25 -0
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +28 -0
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +1 -1
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +59 -0
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +18 -1
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +73 -16
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +6 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +2 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +40 -0
- package/dist/logger.js.map +1 -0
- package/dist/provider-utils.d.ts +8 -0
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +16 -2
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +24 -0
- package/dist/providers/cartesia/alignment.d.ts.map +1 -0
- package/dist/providers/cartesia/alignment.js +23 -0
- package/dist/providers/cartesia/alignment.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +12 -2
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +137 -2
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +24 -0
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -0
- package/dist/providers/elevenlabs/alignment.js +48 -0
- package/dist/providers/elevenlabs/alignment.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +19 -4
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +83 -13
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +0 -25
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +3 -58
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +38 -0
- package/dist/providers/hume/alignment.d.ts.map +1 -0
- package/dist/providers/hume/alignment.js +31 -0
- package/dist/providers/hume/alignment.js.map +1 -0
- package/dist/providers/hume/index.d.ts +8 -1
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +75 -1
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +25 -0
- package/dist/providers/inworld/alignment.d.ts.map +1 -0
- package/dist/providers/inworld/alignment.js +23 -0
- package/dist/providers/inworld/alignment.js.map +1 -0
- package/dist/providers/inworld/index.d.ts +11 -2
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +11 -2
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +22 -0
- package/dist/providers/murf/alignment.d.ts.map +1 -0
- package/dist/providers/murf/alignment.js +17 -0
- package/dist/providers/murf/alignment.js.map +1 -0
- package/dist/providers/murf/index.d.ts +8 -1
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +10 -1
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +12 -3
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +7 -3
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +32 -0
- package/dist/providers/resemble/alignment.d.ts.map +1 -0
- package/dist/providers/resemble/alignment.js +57 -0
- package/dist/providers/resemble/alignment.js.map +1 -0
- package/dist/providers/resemble/index.d.ts +7 -1
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +13 -1
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +3 -12
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +48 -4
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +16 -0
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +10 -0
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +40 -0
- package/dist/speech-to-text-provider.d.ts.map +1 -0
- package/dist/speech-to-text-provider.js +2 -0
- package/dist/speech-to-text-provider.js.map +1 -0
- package/dist/stt-providers/openai/index.d.ts +42 -0
- package/dist/stt-providers/openai/index.d.ts.map +1 -0
- package/dist/stt-providers/openai/index.js +184 -0
- package/dist/stt-providers/openai/index.js.map +1 -0
- package/dist/timestamps.d.ts +23 -0
- package/dist/timestamps.d.ts.map +1 -0
- package/dist/timestamps.js +2 -0
- package/dist/timestamps.js.map +1 -0
- package/package.json +6 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gCAAgC,GACjC,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"generate-speech.js","sourceRoot":"","sources":["../src/generate-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gCAAgC,GACjC,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEpC,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,6BAA6B,GAG9B,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAI/D,MAAM,CAAC,KAAK,UAAU,cAAc,CAA0B,OAkC7D;IACC,MAAM,EACJ,KAAK,EACL,KAAK,EACL,WAAW,EACX,OAAO,EACP,UAAU,EACV,UAAU,EAAE,aAAa,GAAG,MAAM,EAClC,iBAAiB,GAClB,GAAG,OAAO,CAAC;IACZ,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,IAAI,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;IAE9C,IAAI,UAAU,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,MAAM,IAAI,gCAAgC,CAAC,eAAe,CAAC,CAAC;QAC9D,CAAC;QACD,sEAAsE;QACtE,wEAAwE;QACxE,mEAAmE;QACnE,eAAe,GAAG;YAChB,GAAG,OAAO,CAAC,eAAe;YAC1B,GAAG,UAAU,CAAC,eAAe;SAC9B,CAAC;IACJ,CAAC;IAED,MAAM,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACtD,QAAQ,EACR,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC;IAEF,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,mBAAmB,GAAG,6BAA6B,CAAC,QAAQ,CAAC,CAAC;IAEpE,uEAAuE;IACvE,yCAAyC;IACzC,MAAM,mBAAmB,GACvB,CAAC,aAAa,KAAK,IAAI,IAAI,aAAa,KAAK,MAAM,CAAC,IAAI,mBAAmB,CAAC;IAE9E,oBAAoB,CAAC;QACnB,eAAe;QACf,IAAI,EAAE,aAAa;QACnB,SAAS,EAAE,mBAAmB;QAC9B,iBAAiB,EAAE,mBAAmB;QACtC,iBAAiB;KAClB,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACzB,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;QACP,iBAAiB,EAAE,mBAAmB;KACvC,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,KAAK,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;gBACxD,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IAE5D,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;IAE/B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,sBAAsB,EAAE,CAAC;IACrC,CAAC;IAED,IAAI,WAAW,GAAwB,SAAS,CAAC;IACjD,IAAI,eAAe,GAAG,MAAM,CAAC,SAAS,CAAC;IAEvC,IAAI,UAAU,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;QAC5D,WAAW,GAAG,MAAM,YAAY,CAAC;YAC/B,KAAK,EAAE,SAAS;YAChB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,UAAU;SACX,CAAC,CAAC;QACH,eAAe,GAAG,WAAW,CAAC;IAChC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,yBAAyB,CAAC;QAC1C,IAAI,EAAE,WAAW;QACjB,SAAS,EAAE,eAAe;KAC3B,CAAC,CAAC;IAEH,MAAM,eAAe,GACnB,CAAC,MAAM,oBAAoB,CAAC,KAAK,CAAC,UAAU,EAAE,eAAe,CAAC,CAAC;QAC/D,MAAM,CAAC,eAAe,CAAC;IAEzB,IAAI,UAAgD,CAAC;IACrD,IAAI,aAAa,KAAK,KAAK,EAAE,CAAC;QAC5B,IAAI,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtD,KAAK,CACH,GAAG,eAAe,cAAc,MAAM,CAAC,UAAU,CAAC,MAAM,0BAA0B,CACnF,CAAC;YACF,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QACjC,CAAC;aAAM,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YAClC,UAAU,GAAG,MAAM,sBAAsB,CAAC;gBACxC,QAAQ,EAAE,eAAe;gBACzB,KAAK,EAAE,KAAK,CAAC,UAAU;gBACvB,SAAS,EAAE,eAAe;gBAC1B,iBAAiB;gBACjB,WAAW;aACZ,CAAC,CAAC;YACH,KAAK,CACH,GAAG,eAAe,aAAa,UAAU,CAAC,MAAM,oCAAoC,CACrF,CAAC;QACJ,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAmB;QAC/B,SAAS;QACT,UAAU,EAAE,aAAa,CAAC,MAAM;QAChC,QAAQ,EAAE,QAAQ,CAAC,QAAQ,CAAC,EAAE;QAC9B,KAAK,EAAE,QAAQ,CAAC,OAAO;QACvB,GAAG,CAAC,eAAe,IAAI,IAAI,IAAI,EAAE,eAAe,EAAE,CAAC;KACpD,CAAC;IAEF,OAAO;QACL,KAAK;QACL,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;QACpD,UAAU;KACX,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CACrB,QAAuB,EACvB,OAAe,EACf,eAAuB;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,OAAO,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC,OAAO,EAAE,QAAQ,CAAC,OAAO,CAAC,CAAC;IACvE,CAAC;IACD,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IACtC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpB,OAAO,cAAc,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;AACzC,CAAC;AAED;;;;GAIG;AACH,SAAS,oBAAoB,CAAC,IAM7B;IACC,MAAM,EAAE,eAAe,EAAE,IAAI,EAAE,iBAAiB,EAAE,GAAG,IAAI,CAAC;IAC1D,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;QACnB,KAAK,CAAC,GAAG,eAAe,2CAA2C,CAAC,CAAC;QACrE,OAAO;IACT,CAAC;IACD,IAAI,iBAAiB,EAAE,CAAC;QACtB,KAAK,CACH,GAAG,eAAe,kBAAkB,IAAI,oDAAoD,CAC7F,CAAC;QACF,OAAO;IACT,CAAC;IACD,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;QACpB,KAAK,CACH,GAAG,eAAe,kLAAkL,CACrM,CAAC;QACF,OAAO;IACT,CAAC;IACD,8DAA8D;IAC9D,KAAK,CACH,GAAG,eAAe,8FAA8F,iBAAiB,CAAC,IAAI,CAAC,iBAAiB,CAAC,2CAA2C,CACrM,CAAC;AACJ,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAsC;IAC/D,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IACvD,CAAC;IACD,OAAO,4BAA4B,CAAC;AACtC,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
2
|
-
export {
|
|
2
|
+
export type { CaptionFormat, CaptionsOptions } from "./captions.js";
|
|
3
|
+
export { timestampsToCaptions } from "./captions.js";
|
|
4
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError, StreamingNotSupportedError, TimestampKeyMissingError, } from "./errors.js";
|
|
3
5
|
export { generateSpeech } from "./generate-speech.js";
|
|
4
6
|
export type { SpeechMetadata } from "./metadata.js";
|
|
5
|
-
export type { Feature, ModelInfo, ResolvedModel, SpeechProvider, Voice, } from "./speech-provider.js";
|
|
7
|
+
export type { Feature, ModelInfo, ResolvedModel, SpeechProvider, TimestampsFeature, Voice, } from "./speech-provider.js";
|
|
6
8
|
export { FEATURES, getFeature, hasFeature, } from "./speech-provider.js";
|
|
7
9
|
export type { GeneratedAudioFile, SpeechResult } from "./speech-result.js";
|
|
10
|
+
export type { ResolvedSTTModel, SpeechToTextProvider, STTModelInfo, } from "./speech-to-text-provider.js";
|
|
8
11
|
export { streamSpeech } from "./stream-speech.js";
|
|
9
12
|
export type { StreamSpeechResult } from "./stream-speech-result.js";
|
|
13
|
+
export type { TimestampMode, WordTimestamp } from "./timestamps.js";
|
|
10
14
|
export type { GenerateSpeechOptions } from "./types.js";
|
|
11
15
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,cAAc,EACd,0BAA0B,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,YAAY,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,cAAc,EACd,0BAA0B,EAC1B,wBAAwB,GACzB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,YAAY,EACV,OAAO,EACP,SAAS,EACT,aAAa,EACb,cAAc,EACd,iBAAiB,EACjB,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACL,QAAQ,EACR,UAAU,EACV,UAAU,GACX,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAC3E,YAAY,EACV,gBAAgB,EAChB,oBAAoB,EACpB,YAAY,GACb,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,YAAY,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AACpE,YAAY,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// biome-ignore lint/performance/noBarrelFile: intentional public API barrel
|
|
2
2
|
export { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
|
-
export {
|
|
3
|
+
export { timestampsToCaptions } from "./captions.js";
|
|
4
|
+
export { ApiError, NoSpeechGeneratedError, SpeechSDKError, StreamingNotSupportedError, TimestampKeyMissingError, } from "./errors.js";
|
|
4
5
|
export { generateSpeech } from "./generate-speech.js";
|
|
5
6
|
export { FEATURES, getFeature, hasFeature, } from "./speech-provider.js";
|
|
6
7
|
export { streamSpeech } from "./stream-speech.js";
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAC5E,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAElE,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,cAAc,EACd,0BAA0B,EAC1B,wBAAwB,GACzB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAUtD,OAAO,EACL,QAAQ,EACR,UAAU,EACV,UAAU,GACX,MAAM,sBAAsB,CAAC;AAO9B,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC"}
|
package/dist/logger.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.d.ts","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAsCA,wBAAgB,KAAK,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAK3C"}
|
package/dist/logger.js
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal debug-level logger. Emits a namespaced message only when the
|
|
3
|
+
* `DEBUG` env var opts in (convention borrowed from the `debug` npm
|
|
4
|
+
* package, without the dependency). Matches any of:
|
|
5
|
+
* DEBUG=* enables everything
|
|
6
|
+
* DEBUG=speech-sdk enables the SDK
|
|
7
|
+
* DEBUG=speech-sdk:* same (wildcard namespace)
|
|
8
|
+
* DEBUG=foo,speech-sdk comma list
|
|
9
|
+
*/
|
|
10
|
+
const NAMESPACE = "speech-sdk";
|
|
11
|
+
function debugEnabled() {
|
|
12
|
+
if (typeof process === "undefined" || !process.env?.DEBUG) {
|
|
13
|
+
return false;
|
|
14
|
+
}
|
|
15
|
+
const raw = process.env.DEBUG;
|
|
16
|
+
if (raw === "*") {
|
|
17
|
+
return true;
|
|
18
|
+
}
|
|
19
|
+
for (const entry of raw.split(",")) {
|
|
20
|
+
const trimmed = entry.trim();
|
|
21
|
+
if (trimmed === NAMESPACE ||
|
|
22
|
+
trimmed === `${NAMESPACE}:*` ||
|
|
23
|
+
trimmed.startsWith(`${NAMESPACE}:`)) {
|
|
24
|
+
return true;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
// Evaluated once at module load; avoids reading env on every call in hot
|
|
30
|
+
// paths. Developers toggling DEBUG mid-process would need to re-import —
|
|
31
|
+
// acceptable trade-off since debug logging is an operator concern set at
|
|
32
|
+
// startup, not a runtime setting.
|
|
33
|
+
const ENABLED = debugEnabled();
|
|
34
|
+
export function debug(message) {
|
|
35
|
+
if (!ENABLED) {
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
console.debug(`[${NAMESPACE}] ${message}`);
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=logger.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,MAAM,SAAS,GAAG,YAAY,CAAC;AAE/B,SAAS,YAAY;IACnB,IAAI,OAAO,OAAO,KAAK,WAAW,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC;QAC1D,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC;IAC9B,IAAI,GAAG,KAAK,GAAG,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAC7B,IACE,OAAO,KAAK,SAAS;YACrB,OAAO,KAAK,GAAG,SAAS,IAAI;YAC5B,OAAO,CAAC,UAAU,CAAC,GAAG,SAAS,GAAG,CAAC,EACnC,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,yEAAyE;AACzE,yEAAyE;AACzE,yEAAyE;AACzE,kCAAkC;AAClC,MAAM,OAAO,GAAG,YAAY,EAAE,CAAC;AAE/B,MAAM,UAAU,KAAK,CAAC,OAAe;IACnC,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO;IACT,CAAC;IACD,OAAO,CAAC,KAAK,CAAC,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC,CAAC;AAC7C,CAAC"}
|
package/dist/provider-utils.d.ts
CHANGED
|
@@ -1,4 +1,12 @@
|
|
|
1
1
|
export declare const SDK_USER_AGENT = "jellypod-speech-sdk";
|
|
2
|
+
/**
|
|
3
|
+
* Split a `"provider/model"` spec into its parts. Spec with no slash is
|
|
4
|
+
* treated as a bare provider name (caller falls back to `defaultModel`).
|
|
5
|
+
*/
|
|
6
|
+
export declare function parseProviderModelSpec(spec: string): {
|
|
7
|
+
providerName: string;
|
|
8
|
+
modelId: string | undefined;
|
|
9
|
+
};
|
|
2
10
|
export declare function resolveApiKey(stored: string | undefined, envVar: string, providerName: string): string;
|
|
3
11
|
export declare function handleErrorResponse(response: Response, model: string): Promise<void>;
|
|
4
12
|
//# sourceMappingURL=provider-utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAMA,eAAO,MAAM,cAAc,wBAAwB,CAAC;AAEpD,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,
|
|
1
|
+
{"version":3,"file":"provider-utils.d.ts","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAMA,eAAO,MAAM,cAAc,wBAAwB,CAAC;AAEpD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG;IACpD,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;CAC7B,CASA;AAED,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,GAAG,SAAS,EAC1B,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,GACnB,MAAM,CAQR;AA+BD,wBAAsB,mBAAmB,CACvC,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,IAAI,CAAC,CAcf"}
|
package/dist/provider-utils.js
CHANGED
|
@@ -1,14 +1,28 @@
|
|
|
1
|
-
import { ApiError } from "./errors.js";
|
|
1
|
+
import { ApiError, MissingApiKeyError } from "./errors.js";
|
|
2
2
|
// Identifies traffic originating from this SDK so providers can bucket
|
|
3
3
|
// usage by integration. Sent as `X-User-Agent` because `User-Agent` is
|
|
4
4
|
// a forbidden header name in browser fetch. Callers may override via
|
|
5
5
|
// options.headers.
|
|
6
6
|
export const SDK_USER_AGENT = "jellypod-speech-sdk";
|
|
7
|
+
/**
|
|
8
|
+
* Split a `"provider/model"` spec into its parts. Spec with no slash is
|
|
9
|
+
* treated as a bare provider name (caller falls back to `defaultModel`).
|
|
10
|
+
*/
|
|
11
|
+
export function parseProviderModelSpec(spec) {
|
|
12
|
+
const slashIndex = spec.indexOf("/");
|
|
13
|
+
if (slashIndex === -1) {
|
|
14
|
+
return { providerName: spec, modelId: undefined };
|
|
15
|
+
}
|
|
16
|
+
return {
|
|
17
|
+
providerName: spec.slice(0, slashIndex),
|
|
18
|
+
modelId: spec.slice(slashIndex + 1) || undefined,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
7
21
|
export function resolveApiKey(stored, envVar, providerName) {
|
|
8
22
|
const key = stored ??
|
|
9
23
|
(typeof process === "undefined" ? undefined : process.env?.[envVar]);
|
|
10
24
|
if (!key) {
|
|
11
|
-
throw new
|
|
25
|
+
throw new MissingApiKeyError({ providerName, envVar });
|
|
12
26
|
}
|
|
13
27
|
return key;
|
|
14
28
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"provider-utils.js","sourceRoot":"","sources":["../src/provider-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAE3D,uEAAuE;AACvE,uEAAuE;AACvE,qEAAqE;AACrE,mBAAmB;AACnB,MAAM,CAAC,MAAM,cAAc,GAAG,qBAAqB,CAAC;AAEpD;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAY;IAIjD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;IACpD,CAAC;IACD,OAAO;QACL,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC;QACvC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,IAAI,SAAS;KACjD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,MAA0B,EAC1B,MAAc,EACd,YAAoB;IAEpB,MAAM,GAAG,GACP,MAAM;QACN,CAAC,OAAO,OAAO,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC;IACvE,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,MAAM,IAAI,kBAAkB,CAAC,EAAE,YAAY,EAAE,MAAM,EAAE,CAAC,CAAC;IACzD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAwB;IACnD,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,+BAA+B;QAC/B,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC,KAAK,CAAC;QACpB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,KAAK,EAAE,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC5C,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC;QAC5B,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YACpC,OAAO,IAAI,CAAC,MAAM,CAAC;QACrB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,qCAAqC;QACrC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtB,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC;QAClC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;AAC7D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,QAAkB,EAClB,KAAa;IAEb,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;QAClE,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,MAAM;YACpB,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,KAAK,MAAM,EAAE;YACpD,CAAC,CAAC,GAAG,KAAK,cAAc,QAAQ,CAAC,MAAM,EAAE,CAAC;QAE5C,MAAM,IAAI,QAAQ,CAAC,OAAO,EAAE;YAC1B,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,KAAK;YACL,YAAY;SACb,CAAC,CAAC;IACL,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
+
/**
|
|
3
|
+
* Shape of the `word_timestamps` block inside a Cartesia SSE/WebSocket
|
|
4
|
+
* `type: "timestamps"` message. Three parallel arrays — index N is the Nth
|
|
5
|
+
* word's text (`words[N]`), start time (`start[N]`, seconds), and end time
|
|
6
|
+
* (`end[N]`, seconds).
|
|
7
|
+
*
|
|
8
|
+
* Cartesia emits these messages incrementally — each message covers a span
|
|
9
|
+
* of words synthesized so far in the current `context_id`. The SDK
|
|
10
|
+
* accumulates them in arrival order and flattens at end-of-stream.
|
|
11
|
+
*/
|
|
12
|
+
export interface CartesiaWordTimestamps {
|
|
13
|
+
readonly end: readonly number[];
|
|
14
|
+
readonly start: readonly number[];
|
|
15
|
+
readonly words: readonly string[];
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Flatten a sequence of `word_timestamps` messages — collected as the SSE
|
|
19
|
+
* stream emitted them — into a single `WordTimestamp[]`. Skips entries past
|
|
20
|
+
* the shortest array length so a malformed message can't produce undefined
|
|
21
|
+
* start/end values.
|
|
22
|
+
*/
|
|
23
|
+
export declare function mergeWordTimestampMessages(messages: readonly CartesiaWordTimestamps[]): WordTimestamp[];
|
|
24
|
+
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;;;;;GASG;AACH,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,GAAG,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,QAAQ,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,CAAC;IAClC,QAAQ,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,CAAC;CACnC;AAED;;;;;GAKG;AACH,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,SAAS,sBAAsB,EAAE,GAC1C,aAAa,EAAE,CAejB"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Flatten a sequence of `word_timestamps` messages — collected as the SSE
|
|
3
|
+
* stream emitted them — into a single `WordTimestamp[]`. Skips entries past
|
|
4
|
+
* the shortest array length so a malformed message can't produce undefined
|
|
5
|
+
* start/end values.
|
|
6
|
+
*/
|
|
7
|
+
export function mergeWordTimestampMessages(messages) {
|
|
8
|
+
const out = [];
|
|
9
|
+
for (const msg of messages) {
|
|
10
|
+
const len = Math.min(msg.words.length, msg.start.length, msg.end.length);
|
|
11
|
+
for (let i = 0; i < len; i++) {
|
|
12
|
+
const text = msg.words[i];
|
|
13
|
+
const start = msg.start[i];
|
|
14
|
+
const end = msg.end[i];
|
|
15
|
+
if (text == null || start == null || end == null) {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
out.push({ text, start, end });
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return out;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=alignment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/cartesia/alignment.ts"],"names":[],"mappings":"AAkBA;;;;;GAKG;AACH,MAAM,UAAU,0BAA0B,CACxC,QAA2C;IAE3C,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QACzE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvB,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;gBACjD,SAAS;YACX,CAAC;YACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { type ResolvedModel, type SpeechProvider } from "../../speech-provider.js";
|
|
2
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
3
|
export interface CartesiaSpeechProviderConfig {
|
|
3
4
|
apiKey?: string;
|
|
4
5
|
baseURL?: string;
|
|
@@ -11,12 +12,18 @@ export declare class CartesiaSpeechProvider implements SpeechProvider<string, st
|
|
|
11
12
|
readonly id: "sonic-3";
|
|
12
13
|
readonly releaseDate: "2025-10-27";
|
|
13
14
|
readonly languages: readonly ["en", "fr", "de", "es", "pt", "zh", "ja", "hi", "it", "ko", "nl", "pl", "ru", "sv", "tr", "tl", "bg", "ro", "ar", "cs", "el", "fi", "hr", "ms", "sk", "da", "ta", "uk", "hu", "no", "vi", "bn", "th", "he", "ka", "id", "te", "gu", "kn", "ml", "mr", "pa"];
|
|
14
|
-
readonly features: readonly ["streaming", "audio-tags", "inline-voice-cloning"
|
|
15
|
+
readonly features: readonly ["streaming", "audio-tags", "inline-voice-cloning", {
|
|
16
|
+
readonly id: "timestamps";
|
|
17
|
+
readonly mode: "native";
|
|
18
|
+
}];
|
|
15
19
|
}, {
|
|
16
20
|
readonly id: "sonic-2";
|
|
17
21
|
readonly releaseDate: "2025-03-13";
|
|
18
22
|
readonly languages: readonly ["en"];
|
|
19
|
-
readonly features: readonly ["streaming"
|
|
23
|
+
readonly features: readonly ["streaming", {
|
|
24
|
+
readonly id: "timestamps";
|
|
25
|
+
readonly mode: "native";
|
|
26
|
+
}];
|
|
20
27
|
}];
|
|
21
28
|
private static readonly PASSTHROUGH_TAGS;
|
|
22
29
|
private static readonly EMOTIONS;
|
|
@@ -35,11 +42,14 @@ export declare class CartesiaSpeechProvider implements SpeechProvider<string, st
|
|
|
35
42
|
providerOptions?: Record<string, unknown>;
|
|
36
43
|
abortSignal?: AbortSignal;
|
|
37
44
|
headers?: Record<string, string>;
|
|
45
|
+
includeTimestamps?: boolean;
|
|
38
46
|
}): Promise<{
|
|
39
47
|
audio: Uint8Array;
|
|
40
48
|
mediaType: string;
|
|
41
49
|
providerMetadata?: Record<string, unknown>;
|
|
50
|
+
timestamps?: WordTimestamp[];
|
|
42
51
|
}>;
|
|
52
|
+
private generateWithTimestamps;
|
|
43
53
|
stream(options: {
|
|
44
54
|
modelId: string;
|
|
45
55
|
text: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAQA,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,cAAc,EACpB,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAMzD,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,cAAc;IACzB,QAAQ,CAAC,YAAY,aAAa;IAElC,QAAQ,CAAC,MAAM;;;;;;;;;;;;;;;;OA6DJ;IAEX,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,gBAAgB,CAAyB;IAEjE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CA2DrB;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAMhD,gBAAgB,CACd,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE;IA2CjC,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;YA4CY,sBAAsB;IAoE9B,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IAwCF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;;;;;CAejC;AAED,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAG7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMlE"}
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { detectAudioTags, stripAudioTags } from "../../audio-tags.js";
|
|
2
|
+
import { base64ToUint8Array, wrapPcm16Mono } from "../../audio-utils.js";
|
|
3
|
+
import { SpeechSDKError } from "../../errors.js";
|
|
2
4
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
3
5
|
import { hasFeature, } from "../../speech-provider.js";
|
|
6
|
+
import { mergeWordTimestampMessages, } from "./alignment.js";
|
|
4
7
|
export class CartesiaSpeechProvider {
|
|
5
8
|
id = "cartesia";
|
|
6
9
|
defaultModel = "sonic-3";
|
|
@@ -52,13 +55,18 @@ export class CartesiaSpeechProvider {
|
|
|
52
55
|
"mr",
|
|
53
56
|
"pa",
|
|
54
57
|
],
|
|
55
|
-
features: [
|
|
58
|
+
features: [
|
|
59
|
+
"streaming",
|
|
60
|
+
"audio-tags",
|
|
61
|
+
"inline-voice-cloning",
|
|
62
|
+
{ id: "timestamps", mode: "native" },
|
|
63
|
+
],
|
|
56
64
|
},
|
|
57
65
|
{
|
|
58
66
|
id: "sonic-2",
|
|
59
67
|
releaseDate: "2025-03-13",
|
|
60
68
|
languages: ["en"],
|
|
61
|
-
features: ["streaming"],
|
|
69
|
+
features: ["streaming", { id: "timestamps", mode: "native" }],
|
|
62
70
|
},
|
|
63
71
|
];
|
|
64
72
|
static PASSTHROUGH_TAGS = ["laughter"];
|
|
@@ -156,6 +164,10 @@ export class CartesiaSpeechProvider {
|
|
|
156
164
|
return { text: processed, warnings };
|
|
157
165
|
}
|
|
158
166
|
async generate(options) {
|
|
167
|
+
// /tts/bytes is audio-only; word timing requires the SSE endpoint.
|
|
168
|
+
if (options.includeTimestamps) {
|
|
169
|
+
return this.generateWithTimestamps(options);
|
|
170
|
+
}
|
|
159
171
|
const url = `${this.baseURL}/tts/bytes`;
|
|
160
172
|
const body = {
|
|
161
173
|
output_format: {
|
|
@@ -188,6 +200,51 @@ export class CartesiaSpeechProvider {
|
|
|
188
200
|
mediaType,
|
|
189
201
|
};
|
|
190
202
|
}
|
|
203
|
+
async generateWithTimestamps(options) {
|
|
204
|
+
// Force raw pcm_s16le @ 24kHz: the SDK concatenates chunks directly and
|
|
205
|
+
// wraps them in a WAV header at end-of-stream. Allowing a caller to
|
|
206
|
+
// override the container would corrupt the merged audio since Cartesia
|
|
207
|
+
// SSE's wav/mp3 chunks are not safely concat-able.
|
|
208
|
+
const sampleRate = 24_000;
|
|
209
|
+
const body = {
|
|
210
|
+
...options.providerOptions,
|
|
211
|
+
model_id: options.modelId,
|
|
212
|
+
transcript: options.text,
|
|
213
|
+
voice: { mode: "id", id: options.voice },
|
|
214
|
+
output_format: {
|
|
215
|
+
container: "raw",
|
|
216
|
+
encoding: "pcm_s16le",
|
|
217
|
+
sample_rate: sampleRate,
|
|
218
|
+
},
|
|
219
|
+
add_timestamps: true,
|
|
220
|
+
};
|
|
221
|
+
const url = `${this.baseURL}/tts/sse`;
|
|
222
|
+
const response = await this.fetchFn(url, {
|
|
223
|
+
method: "POST",
|
|
224
|
+
headers: {
|
|
225
|
+
"Content-Type": "application/json",
|
|
226
|
+
"X-API-Key": resolveApiKey(this.apiKey, "CARTESIA_API_KEY", "Cartesia"),
|
|
227
|
+
"Cartesia-Version": "2025-04-16",
|
|
228
|
+
"X-User-Agent": SDK_USER_AGENT,
|
|
229
|
+
...options.headers,
|
|
230
|
+
},
|
|
231
|
+
body: JSON.stringify(body),
|
|
232
|
+
signal: options.abortSignal,
|
|
233
|
+
});
|
|
234
|
+
await handleErrorResponse(response, `cartesia/${options.modelId}`);
|
|
235
|
+
if (!response.body) {
|
|
236
|
+
throw new SpeechSDKError(`cartesia/${options.modelId}: /tts/sse response has no body`);
|
|
237
|
+
}
|
|
238
|
+
const { audio: pcmAudio, timestamps } = await collectCartesiaSse(response.body, `cartesia/${options.modelId}`);
|
|
239
|
+
// Concatenated PCM → standard WAV file, so callers don't need to know
|
|
240
|
+
// sample rate / encoding out-of-band to decode.
|
|
241
|
+
const audio = await wrapPcm16Mono(pcmAudio, sampleRate);
|
|
242
|
+
return {
|
|
243
|
+
audio,
|
|
244
|
+
mediaType: "audio/wav",
|
|
245
|
+
timestamps,
|
|
246
|
+
};
|
|
247
|
+
}
|
|
191
248
|
async stream(options) {
|
|
192
249
|
const url = `${this.baseURL}/tts/bytes`;
|
|
193
250
|
const body = {
|
|
@@ -247,4 +304,82 @@ export function createCartesia(config = {}) {
|
|
|
247
304
|
};
|
|
248
305
|
};
|
|
249
306
|
}
|
|
307
|
+
const SSE_LEADING_SPACE = /^ /;
|
|
308
|
+
/**
|
|
309
|
+
* Drain a Cartesia `/tts/sse` response body into a single audio buffer plus
|
|
310
|
+
* a flat `WordTimestamp[]`. Buffers are collected separately because the
|
|
311
|
+
* server may interleave `chunk` and `timestamps` events in any order — we
|
|
312
|
+
* concatenate audio chunks in arrival order and flatten timestamp messages
|
|
313
|
+
* the same way at end-of-stream (`type: "done"`).
|
|
314
|
+
*/
|
|
315
|
+
async function collectCartesiaSse(body, modelLabel) {
|
|
316
|
+
const audioParts = [];
|
|
317
|
+
const timestampMessages = [];
|
|
318
|
+
const reader = body.getReader();
|
|
319
|
+
const decoder = new TextDecoder();
|
|
320
|
+
let buffer = "";
|
|
321
|
+
const flushEvent = (raw) => {
|
|
322
|
+
const dataLines = [];
|
|
323
|
+
for (const rawLine of raw.split("\n")) {
|
|
324
|
+
const line = rawLine.endsWith("\r") ? rawLine.slice(0, -1) : rawLine;
|
|
325
|
+
if (line.startsWith("data:")) {
|
|
326
|
+
dataLines.push(line.slice(5).replace(SSE_LEADING_SPACE, ""));
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
if (dataLines.length === 0) {
|
|
330
|
+
return;
|
|
331
|
+
}
|
|
332
|
+
const json = dataLines.join("\n");
|
|
333
|
+
let parsed;
|
|
334
|
+
try {
|
|
335
|
+
parsed = JSON.parse(json);
|
|
336
|
+
}
|
|
337
|
+
catch {
|
|
338
|
+
throw new SpeechSDKError(`${modelLabel}: malformed SSE event payload (not JSON)`);
|
|
339
|
+
}
|
|
340
|
+
if (parsed.type === "chunk" && typeof parsed.data === "string") {
|
|
341
|
+
audioParts.push(base64ToUint8Array(parsed.data));
|
|
342
|
+
}
|
|
343
|
+
else if (parsed.type === "timestamps" && parsed.word_timestamps) {
|
|
344
|
+
timestampMessages.push(parsed.word_timestamps);
|
|
345
|
+
}
|
|
346
|
+
else if (parsed.type === "error") {
|
|
347
|
+
throw new SpeechSDKError(`${modelLabel}: SSE error: ${JSON.stringify(parsed)}`);
|
|
348
|
+
}
|
|
349
|
+
};
|
|
350
|
+
try {
|
|
351
|
+
while (true) {
|
|
352
|
+
const { value, done } = await reader.read();
|
|
353
|
+
if (done) {
|
|
354
|
+
buffer += decoder.decode();
|
|
355
|
+
if (buffer.length > 0) {
|
|
356
|
+
flushEvent(buffer);
|
|
357
|
+
}
|
|
358
|
+
break;
|
|
359
|
+
}
|
|
360
|
+
buffer += decoder.decode(value, { stream: true });
|
|
361
|
+
let sepIndex = buffer.indexOf("\n\n");
|
|
362
|
+
while (sepIndex !== -1) {
|
|
363
|
+
const raw = buffer.slice(0, sepIndex);
|
|
364
|
+
buffer = buffer.slice(sepIndex + 2);
|
|
365
|
+
flushEvent(raw);
|
|
366
|
+
sepIndex = buffer.indexOf("\n\n");
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
finally {
|
|
371
|
+
reader.releaseLock();
|
|
372
|
+
}
|
|
373
|
+
const totalLen = audioParts.reduce((n, p) => n + p.byteLength, 0);
|
|
374
|
+
const audio = new Uint8Array(totalLen);
|
|
375
|
+
let offset = 0;
|
|
376
|
+
for (const part of audioParts) {
|
|
377
|
+
audio.set(part, offset);
|
|
378
|
+
offset += part.byteLength;
|
|
379
|
+
}
|
|
380
|
+
return {
|
|
381
|
+
audio,
|
|
382
|
+
timestamps: mergeWordTimestampMessages(timestampMessages),
|
|
383
|
+
};
|
|
384
|
+
}
|
|
250
385
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACtE,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,GAGX,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/cartesia/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACtE,OAAO,EAAE,kBAAkB,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACzE,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AACjC,OAAO,EACL,UAAU,GAGX,MAAM,0BAA0B,CAAC;AAElC,OAAO,EAEL,0BAA0B,GAC3B,MAAM,gBAAgB,CAAC;AAQxB,MAAM,OAAO,sBAAsB;IACxB,EAAE,GAAG,UAAU,CAAC;IAChB,YAAY,GAAG,SAAS,CAAC;IAEzB,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,SAAS;YACb,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE;gBACT,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,IAAI;aACL;YACD,QAAQ,EAAE;gBACR,WAAW;gBACX,YAAY;gBACZ,sBAAsB;gBACtB,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE;aACrC;SACF;QACD;YACE,EAAE,EAAE,SAAS;YACb,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,CAAC,IAAI,CAAC;YACjB,QAAQ,EAAE,CAAC,WAAW,EAAE,EAAE,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;SAC9D;KACO,CAAC;IAEH,MAAM,CAAU,gBAAgB,GAAG,CAAC,UAAU,CAAU,CAAC;IAEzD,MAAM,CAAU,QAAQ,GAAG;QACjC,SAAS;QACT,OAAO;QACP,SAAS;QACT,SAAS;QACT,KAAK;QACL,QAAQ;QACR,OAAO;QACP,UAAU;QACV,SAAS;QACT,UAAU;QACV,MAAM;QACN,WAAW;QACX,SAAS;QACT,YAAY;QACZ,WAAW;QACX,aAAa;QACb,WAAW;QACX,WAAW;QACX,eAAe;QACf,YAAY;QACZ,OAAO;QACP,SAAS;QACT,WAAW;QACX,YAAY;QACZ,cAAc;QACd,UAAU;QACV,cAAc;QACd,aAAa;QACb,WAAW;QACX,SAAS;QACT,YAAY;QACZ,UAAU;QACV,UAAU;QACV,UAAU;QACV,UAAU;QACV,SAAS;QACT,OAAO;QACP,OAAO;QACP,UAAU;QACV,MAAM;QACN,cAAc;QACd,UAAU;QACV,QAAQ;QACR,SAAS;QACT,UAAU;QACV,YAAY;QACZ,UAAU;QACV,UAAU;QACV,KAAK;QACL,YAAY;QACZ,QAAQ;QACR,aAAa;QACb,gBAAgB;QAChB,QAAQ;QACR,UAAU;QACV,cAAc;QACd,QAAQ;QACR,OAAO;KACC,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAoC;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,yBAAyB,CAAC;QAC3D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,gBAAgB,CACd,IAAY,EACZ,OAAe;QAEf,IACE,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,IAAI,UAAU,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,EACzE,CAAC;YACD,OAAO,cAAc,CAAC,IAAI,EAAE,YAAY,OAAO,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;QAChC,CAAC;QAED,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,IAAI,SAAS,GAAG,IAAI,CAAC;QAErB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;YAE7C,IACG,sBAAsB,CAAC,gBAAsC,CAAC,QAAQ,CACrE,KAAK,CACN,EACD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,IACG,sBAAsB,CAAC,QAA8B,CAAC,QAAQ,CAAC,KAAK,CAAC,EACtE,CAAC;gBACD,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,EAAE,mBAAmB,KAAK,KAAK,CAAC,CAAC;gBAClE,SAAS;YACX,CAAC;YAED,QAAQ,CAAC,IAAI,CACX,aAAa,GAAG,iCAAiC,OAAO,mBAAmB,CAC5E,CAAC;YACF,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAClD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAQd;QAMC,mEAAmE;QACnE,IAAI,OAAO,CAAC,iBAAiB,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,sBAAsB,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,YAAY,CAAC;QAExC,MAAM,IAAI,GAA4B;YACpC,aAAa,EAAE;gBACb,SAAS,EAAE,KAAK;gBAChB,QAAQ,EAAE,WAAW;gBACrB,WAAW,EAAE,MAAM;aACpB;YACD,GAAG,OAAO,CAAC,eAAe;YAC1B,QAAQ,EAAE,OAAO,CAAC,OAAO;YACzB,UAAU,EAAE,OAAO,CAAC,IAAI;YACxB,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,KAAK,EAAE;SACzC,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,kBAAkB,EAAE,UAAU,CAAC;gBACvE,kBAAkB,EAAE,YAAY;gBAChC,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW,CAAC;QAEtE,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS;SACV,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,sBAAsB,CAAC,OAOpC;QAMC,wEAAwE;QACxE,oEAAoE;QACpE,uEAAuE;QACvE,mDAAmD;QACnD,MAAM,UAAU,GAAG,MAAM,CAAC;QAC1B,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,QAAQ,EAAE,OAAO,CAAC,OAAO;YACzB,UAAU,EAAE,OAAO,CAAC,IAAI;YACxB,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,KAAK,EAAE;YACxC,aAAa,EAAE;gBACb,SAAS,EAAE,KAAK;gBAChB,QAAQ,EAAE,WAAW;gBACrB,WAAW,EAAE,UAAU;aACxB;YACD,cAAc,EAAE,IAAI;SACrB,CAAC;QAEF,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,UAAU,CAAC;QACtC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,kBAAkB,EAAE,UAAU,CAAC;gBACvE,kBAAkB,EAAE,YAAY;gBAChC,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,cAAc,CACtB,YAAY,OAAO,CAAC,OAAO,iCAAiC,CAC7D,CAAC;QACJ,CAAC;QAED,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,MAAM,kBAAkB,CAC9D,QAAQ,CAAC,IAAI,EACb,YAAY,OAAO,CAAC,OAAO,EAAE,CAC9B,CAAC;QAEF,sEAAsE;QACtE,gDAAgD;QAChD,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QACxD,OAAO;YACL,KAAK;YACL,SAAS,EAAE,WAAW;YACtB,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,YAAY,CAAC;QAExC,MAAM,IAAI,GAA4B;YACpC,aAAa,EAAE;gBACb,SAAS,EAAE,KAAK;gBAChB,QAAQ,EAAE,WAAW;gBACrB,WAAW,EAAE,MAAM;aACpB;YACD,GAAG,OAAO,CAAC,eAAe;YAC1B,QAAQ,EAAE,OAAO,CAAC,OAAO;YACzB,UAAU,EAAE,OAAO,CAAC,IAAI;YACxB,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,CAAC,KAAK,EAAE;SACzC,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,kBAAkB,EAAE,UAAU,CAAC;gBACvE,kBAAkB,EAAE,YAAY;gBAChC,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,EAAE,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEnE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,YAAY,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACvE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW;SAC/D,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,OAAO;gBACL,eAAe,EAAE;oBACf,aAAa,EAAE;wBACb,SAAS,EAAE,KAAK;wBAChB,QAAQ,EAAE,WAAW;wBACrB,WAAW,EAAE,MAAM;qBACpB;iBACF;gBACD,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;;AAGH,MAAM,UAAU,cAAc,CAAC,SAAuC,EAAE;IACtE,MAAM,QAAQ,GAAG,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAEpD,OAAO,SAAS,QAAQ,CAAC,OAAgB;QACvC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC;AASD,MAAM,iBAAiB,GAAG,IAAI,CAAC;AAE/B;;;;;;GAMG;AACH,KAAK,UAAU,kBAAkB,CAC/B,IAAgC,EAChC,UAAkB;IAKlB,MAAM,UAAU,GAAiB,EAAE,CAAC;IACpC,MAAM,iBAAiB,GAA6B,EAAE,CAAC;IAEvD,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;IAChC,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;IAClC,IAAI,MAAM,GAAG,EAAE,CAAC;IAEhB,MAAM,UAAU,GAAG,CAAC,GAAW,EAAQ,EAAE;QACvC,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,KAAK,MAAM,OAAO,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACrE,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC7B,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;QACD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO;QACT,CAAC;QACD,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,MAAwB,CAAC;QAC7B,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAqB,CAAC;QAChD,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,cAAc,CACtB,GAAG,UAAU,0CAA0C,CACxD,CAAC;QACJ,CAAC;QACD,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC/D,UAAU,CAAC,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;QACnD,CAAC;aAAM,IAAI,MAAM,CAAC,IAAI,KAAK,YAAY,IAAI,MAAM,CAAC,eAAe,EAAE,CAAC;YAClE,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QACjD,CAAC;aAAM,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YACnC,MAAM,IAAI,cAAc,CACtB,GAAG,UAAU,gBAAgB,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CACtD,CAAC;QACJ,CAAC;IACH,CAAC,CAAC;IAEF,IAAI,CAAC;QACH,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YAC5C,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBAC3B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,UAAU,CAAC,MAAM,CAAC,CAAC;gBACrB,CAAC;gBACD,MAAM;YACR,CAAC;YACD,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;YAClD,IAAI,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YACtC,OAAO,QAAQ,KAAK,CAAC,CAAC,EAAE,CAAC;gBACvB,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;gBACtC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;gBACpC,UAAU,CAAC,GAAG,CAAC,CAAC;gBAChB,QAAQ,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YACpC,CAAC;QACH,CAAC;IACH,CAAC;YAAS,CAAC;QACT,MAAM,CAAC,WAAW,EAAE,CAAC;IACvB,CAAC;IAED,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IAClE,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,QAAQ,CAAC,CAAC;IACvC,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACxB,MAAM,IAAI,IAAI,CAAC,UAAU,CAAC;IAC5B,CAAC;IACD,OAAO;QACL,KAAK;QACL,UAAU,EAAE,0BAA0B,CAAC,iBAAiB,CAAC;KAC1D,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { WordTimestamp } from "../../timestamps.js";
|
|
2
|
+
/**
|
|
3
|
+
* Shape of the `alignment` / `normalized_alignment` object ElevenLabs returns
|
|
4
|
+
* from `/v1/text-to-speech/{voice_id}/with-timestamps`.
|
|
5
|
+
*
|
|
6
|
+
* Three parallel arrays — `characters[i]` was spoken from
|
|
7
|
+
* `character_start_times_seconds[i]` to `character_end_times_seconds[i]`.
|
|
8
|
+
*/
|
|
9
|
+
export interface ElevenLabsAlignment {
|
|
10
|
+
readonly character_end_times_seconds: readonly number[];
|
|
11
|
+
readonly character_start_times_seconds: readonly number[];
|
|
12
|
+
readonly characters: readonly string[];
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Aggregates ElevenLabs character-level alignment into word-level timestamps
|
|
16
|
+
* by splitting on whitespace. Non-whitespace runs become words; the word's
|
|
17
|
+
* start is the first char's start time, the end is the last char's end time.
|
|
18
|
+
*
|
|
19
|
+
* Prefer `normalized_alignment` when the input contains numbers or
|
|
20
|
+
* abbreviations — ElevenLabs expands those during synthesis ("$5" → "five
|
|
21
|
+
* dollars"), and normalized alignment matches what was actually spoken.
|
|
22
|
+
*/
|
|
23
|
+
export declare function alignmentToWordTimestamps(alignment: ElevenLabsAlignment): WordTimestamp[];
|
|
24
|
+
//# sourceMappingURL=alignment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.d.ts","sourceRoot":"","sources":["../../../src/providers/elevenlabs/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD;;;;;;GAMG;AACH,MAAM,WAAW,mBAAmB;IAClC,QAAQ,CAAC,2BAA2B,EAAE,SAAS,MAAM,EAAE,CAAC;IACxD,QAAQ,CAAC,6BAA6B,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1D,QAAQ,CAAC,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;CACxC;AAID;;;;;;;;GAQG;AACH,wBAAgB,yBAAyB,CACvC,SAAS,EAAE,mBAAmB,GAC7B,aAAa,EAAE,CA2CjB"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
const WHITESPACE_CHAR = /^\s$/;
|
|
2
|
+
/**
|
|
3
|
+
* Aggregates ElevenLabs character-level alignment into word-level timestamps
|
|
4
|
+
* by splitting on whitespace. Non-whitespace runs become words; the word's
|
|
5
|
+
* start is the first char's start time, the end is the last char's end time.
|
|
6
|
+
*
|
|
7
|
+
* Prefer `normalized_alignment` when the input contains numbers or
|
|
8
|
+
* abbreviations — ElevenLabs expands those during synthesis ("$5" → "five
|
|
9
|
+
* dollars"), and normalized alignment matches what was actually spoken.
|
|
10
|
+
*/
|
|
11
|
+
export function alignmentToWordTimestamps(alignment) {
|
|
12
|
+
const chars = alignment.characters;
|
|
13
|
+
const starts = alignment.character_start_times_seconds;
|
|
14
|
+
const ends = alignment.character_end_times_seconds;
|
|
15
|
+
if (chars.length === 0) {
|
|
16
|
+
return [];
|
|
17
|
+
}
|
|
18
|
+
const words = [];
|
|
19
|
+
let buf = "";
|
|
20
|
+
let wordStart = 0;
|
|
21
|
+
let wordEnd = 0;
|
|
22
|
+
let inWord = false;
|
|
23
|
+
for (let i = 0; i < chars.length; i++) {
|
|
24
|
+
const c = chars[i] ?? "";
|
|
25
|
+
const isWs = WHITESPACE_CHAR.test(c);
|
|
26
|
+
if (isWs) {
|
|
27
|
+
if (inWord) {
|
|
28
|
+
words.push({ text: buf, start: wordStart, end: wordEnd });
|
|
29
|
+
buf = "";
|
|
30
|
+
inWord = false;
|
|
31
|
+
}
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
const s = starts[i] ?? 0;
|
|
35
|
+
const e = ends[i] ?? s;
|
|
36
|
+
if (!inWord) {
|
|
37
|
+
wordStart = s;
|
|
38
|
+
inWord = true;
|
|
39
|
+
}
|
|
40
|
+
buf += c;
|
|
41
|
+
wordEnd = e;
|
|
42
|
+
}
|
|
43
|
+
if (inWord && buf.length > 0) {
|
|
44
|
+
words.push({ text: buf, start: wordStart, end: wordEnd });
|
|
45
|
+
}
|
|
46
|
+
return words;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=alignment.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/elevenlabs/alignment.ts"],"names":[],"mappings":"AAeA,MAAM,eAAe,GAAG,MAAM,CAAC;AAE/B;;;;;;;;GAQG;AACH,MAAM,UAAU,yBAAyB,CACvC,SAA8B;IAE9B,MAAM,KAAK,GAAG,SAAS,CAAC,UAAU,CAAC;IACnC,MAAM,MAAM,GAAG,SAAS,CAAC,6BAA6B,CAAC;IACvD,MAAM,IAAI,GAAG,SAAS,CAAC,2BAA2B,CAAC;IAEnD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAErC,IAAI,IAAI,EAAE,CAAC;YACT,IAAI,MAAM,EAAE,CAAC;gBACX,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;gBAC1D,GAAG,GAAG,EAAE,CAAC;gBACT,MAAM,GAAG,KAAK,CAAC;YACjB,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACzB,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACvB,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,SAAS,GAAG,CAAC,CAAC;YACd,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,GAAG,IAAI,CAAC,CAAC;QACT,OAAO,GAAG,CAAC,CAAC;IACd,CAAC;IAED,IAAI,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC"}
|