@speech-sdk/core 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +227 -108
- package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +0 -59
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-decode.d.ts +7 -0
- package/dist/audio-decode.d.ts.map +1 -0
- package/dist/audio-decode.js +109 -0
- package/dist/audio-decode.js.map +1 -0
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +5 -21
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-output.d.ts +39 -0
- package/dist/audio-output.d.ts.map +1 -0
- package/dist/audio-output.js +111 -0
- package/dist/audio-output.js.map +1 -0
- package/dist/audio-utils.d.ts +2 -10
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +57 -15
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +0 -108
- package/dist/captions.d.ts.map +1 -1
- package/dist/captions.js +8 -98
- package/dist/captions.js.map +1 -1
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -24
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +8 -183
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +9 -6
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +72 -51
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +7 -37
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +1 -5
- package/dist/derive-timestamps.d.ts.map +1 -1
- package/dist/derive-timestamps.js +1 -15
- package/dist/derive-timestamps.js.map +1 -1
- package/dist/encoders/mp3.d.ts +6 -0
- package/dist/encoders/mp3.d.ts.map +1 -0
- package/dist/encoders/mp3.js +54 -0
- package/dist/encoders/mp3.js.map +1 -0
- package/dist/errors.d.ts +20 -13
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +49 -15
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +5 -4
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +250 -93
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +7 -28
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +185 -94
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +7 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +2 -13
- package/dist/logger.js.map +1 -1
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/pronunciations/errors.d.ts +5 -0
- package/dist/pronunciations/errors.d.ts.map +1 -0
- package/dist/pronunciations/errors.js +8 -0
- package/dist/pronunciations/errors.js.map +1 -0
- package/dist/pronunciations/inverse-align.d.ts +4 -0
- package/dist/pronunciations/inverse-align.d.ts.map +1 -0
- package/dist/pronunciations/inverse-align.js +54 -0
- package/dist/pronunciations/inverse-align.js.map +1 -0
- package/dist/pronunciations/merge.d.ts +4 -0
- package/dist/pronunciations/merge.d.ts.map +1 -0
- package/dist/pronunciations/merge.js +13 -0
- package/dist/pronunciations/merge.js.map +1 -0
- package/dist/pronunciations/substitute.d.ts +6 -0
- package/dist/pronunciations/substitute.d.ts.map +1 -0
- package/dist/pronunciations/substitute.js +67 -0
- package/dist/pronunciations/substitute.js.map +1 -0
- package/dist/pronunciations/types.d.ts +18 -0
- package/dist/pronunciations/types.d.ts.map +1 -0
- package/dist/pronunciations/types.js +2 -0
- package/dist/pronunciations/types.js.map +1 -0
- package/dist/pronunciations/validate.d.ts +3 -0
- package/dist/pronunciations/validate.d.ts.map +1 -0
- package/dist/pronunciations/validate.js +26 -0
- package/dist/pronunciations/validate.js.map +1 -0
- package/dist/provider-utils.d.ts +4 -9
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +60 -51
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +0 -16
- package/dist/providers/cartesia/alignment.d.ts.map +1 -1
- package/dist/providers/cartesia/alignment.js +1 -6
- package/dist/providers/cartesia/alignment.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +29 -19
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +116 -80
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +23 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +51 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +7 -21
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
- package/dist/providers/elevenlabs/alignment.js +8 -9
- package/dist/providers/elevenlabs/alignment.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +14 -38
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +186 -169
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +11 -20
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +49 -37
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +14 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +47 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +76 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +251 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +12 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +180 -162
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +30 -35
- package/dist/providers/hume/alignment.d.ts.map +1 -1
- package/dist/providers/hume/alignment.js +14 -8
- package/dist/providers/hume/alignment.js.map +1 -1
- package/dist/providers/hume/index.d.ts +16 -16
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +79 -65
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +8 -22
- package/dist/providers/inworld/alignment.d.ts.map +1 -1
- package/dist/providers/inworld/alignment.js +9 -8
- package/dist/providers/inworld/alignment.js.map +1 -1
- package/dist/providers/inworld/index.d.ts +17 -20
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +79 -47
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +14 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +63 -48
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +10 -19
- package/dist/providers/murf/alignment.d.ts.map +1 -1
- package/dist/providers/murf/alignment.js +10 -5
- package/dist/providers/murf/alignment.js.map +1 -1
- package/dist/providers/murf/index.d.ts +15 -16
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +105 -58
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +43 -29
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +294 -106
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +8 -29
- package/dist/providers/resemble/alignment.d.ts.map +1 -1
- package/dist/providers/resemble/alignment.js +9 -12
- package/dist/providers/resemble/alignment.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +21 -11
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +89 -49
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/smallest-ai/index.d.ts +47 -0
- package/dist/providers/smallest-ai/index.d.ts.map +1 -0
- package/dist/providers/smallest-ai/index.js +107 -0
- package/dist/providers/smallest-ai/index.js.map +1 -0
- package/dist/providers/xai/index.d.ts +25 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +63 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +31 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +16 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +8 -51
- package/dist/resolve-provider.js.map +1 -1
- package/dist/retry-options.d.ts +6 -0
- package/dist/retry-options.d.ts.map +1 -0
- package/dist/retry-options.js +48 -0
- package/dist/retry-options.js.map +1 -0
- package/dist/speech-provider.d.ts +28 -53
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +5 -26
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +8 -9
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +0 -12
- package/dist/speech-to-text-provider.d.ts.map +1 -1
- package/dist/stream-speech.d.ts +4 -2
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +36 -22
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +3 -17
- package/dist/timestamps.d.ts.map +1 -1
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +31 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +4 -16
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +13 -66
- package/dist/stt-providers/openai/index.d.ts +0 -42
- package/dist/stt-providers/openai/index.d.ts.map +0 -1
- package/dist/stt-providers/openai/index.js +0 -184
- package/dist/stt-providers/openai/index.js.map +0 -1
package/dist/speech-result.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { SpeechMetadata } from "./metadata.js";
|
|
2
|
-
import type { WordTimestamp } from "./timestamps.js";
|
|
2
|
+
import type { ConversationWordTimestamp, WordTimestamp } from "./timestamps.js";
|
|
3
3
|
export interface GeneratedAudioFile {
|
|
4
4
|
readonly base64: string;
|
|
5
5
|
readonly mediaType: string;
|
|
@@ -9,17 +9,16 @@ export interface SpeechResult {
|
|
|
9
9
|
readonly audio: GeneratedAudioFile;
|
|
10
10
|
readonly metadata: SpeechMetadata;
|
|
11
11
|
readonly providerMetadata?: Record<string, unknown>;
|
|
12
|
-
/**
|
|
13
|
-
* Word-level alignment data. Populated when `timestamps: "on"` or when
|
|
14
|
-
* `timestamps: "auto"` (default) is combined with a TTS provider that
|
|
15
|
-
* returns alignment natively. Undefined otherwise.
|
|
16
|
-
*
|
|
17
|
-
* Timestamps are always word-granularity with start/end in seconds.
|
|
18
|
-
* Character- or phoneme-level native data is aggregated internally.
|
|
19
|
-
*/
|
|
20
12
|
readonly timestamps?: readonly WordTimestamp[];
|
|
21
13
|
readonly warnings?: string[];
|
|
22
14
|
}
|
|
15
|
+
export interface ConversationMetadata extends SpeechMetadata {
|
|
16
|
+
readonly perTurn?: readonly SpeechMetadata[];
|
|
17
|
+
}
|
|
18
|
+
export interface ConversationResult extends Omit<SpeechResult, "metadata" | "timestamps"> {
|
|
19
|
+
readonly metadata: ConversationMetadata;
|
|
20
|
+
readonly timestamps?: readonly ConversationWordTimestamp[];
|
|
21
|
+
}
|
|
23
22
|
export declare class DefaultGeneratedAudioFile implements GeneratedAudioFile {
|
|
24
23
|
readonly mediaType: string;
|
|
25
24
|
private readonly _data;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-result.d.ts","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"speech-result.d.ts","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,KAAK,EAAE,yBAAyB,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhF,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,KAAK,EAAE,kBAAkB,CAAC;IACnC,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAClC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpD,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IAC/C,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,oBAAqB,SAAQ,cAAc;IAG1D,QAAQ,CAAC,OAAO,CAAC,EAAE,SAAS,cAAc,EAAE,CAAC;CAC9C;AAED,MAAM,WAAW,kBACf,SAAQ,IAAI,CAAC,YAAY,EAAE,UAAU,GAAG,YAAY,CAAC;IACrD,QAAQ,CAAC,QAAQ,EAAE,oBAAoB,CAAC;IACxC,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,yBAAyB,EAAE,CAAC;CAC5D;AAED,qBAAa,yBAA0B,YAAW,kBAAkB;IAClE,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAE3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsB;IAC5C,OAAO,CAAC,WAAW,CAAC,CAAa;IACjC,OAAO,CAAC,OAAO,CAAC,CAAS;gBAEb,EACV,IAAI,EACJ,SAAS,GACV,EAAE;QAAE,IAAI,EAAE,MAAM,GAAG,UAAU,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE;IAKnD,IAAI,UAAU,IAAI,UAAU,CAe3B;IAED,IAAI,MAAM,IAAI,MAAM,CAcnB;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-result.js","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"speech-result.js","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AA6BA,MAAM,OAAO,yBAAyB;IAC3B,SAAS,CAAS;IAEV,KAAK,CAAsB;IACpC,WAAW,CAAc;IACzB,OAAO,CAAU;IAEzB,YAAY,EACV,IAAI,EACJ,SAAS,GACwC;QACjD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,IAAI,UAAU;QACZ,IAAI,IAAI,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7B,OAAO,IAAI,CAAC,WAAW,CAAC;QAC1B,CAAC;QACD,IAAI,IAAI,CAAC,KAAK,YAAY,UAAU,EAAE,CAAC;YACrC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC;QAChC,CAAC;aAAM,CAAC;YACN,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACtC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACxC,CAAC;YACD,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;QAC3B,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,IAAI,MAAM;QACR,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,IAAI,YAAY,GAAG,EAAE,CAAC;YACtB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC9B,YAAY,IAAI,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YAC5C,CAAC;YACD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC;QACpC,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF"}
|
|
@@ -1,21 +1,9 @@
|
|
|
1
1
|
import type { WordTimestamp } from "./timestamps.js";
|
|
2
|
-
/**
|
|
3
|
-
* Minimal info about an STT model. Parallels `ModelInfo` on the TTS side.
|
|
4
|
-
*/
|
|
5
2
|
export interface STTModelInfo {
|
|
6
3
|
readonly id: string;
|
|
7
4
|
readonly languages: readonly string[];
|
|
8
5
|
readonly releaseDate: string;
|
|
9
6
|
}
|
|
10
|
-
/**
|
|
11
|
-
* Transcribes generated audio and returns word-level timestamps. This is the
|
|
12
|
-
* "derived" path for `timestamps: "on"` — used when the TTS provider doesn't
|
|
13
|
-
* return alignment data natively.
|
|
14
|
-
*
|
|
15
|
-
* Providers return `WordTimestamp[]` with start/end in seconds. Normalization
|
|
16
|
-
* (ms → seconds, char/phoneme aggregation, tuple → object) happens inside the
|
|
17
|
-
* provider adapter so the public surface is uniform.
|
|
18
|
-
*/
|
|
19
7
|
export interface SpeechToTextProvider {
|
|
20
8
|
readonly defaultModel: string;
|
|
21
9
|
readonly id: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-to-text-provider.d.ts","sourceRoot":"","sources":["../src/speech-to-text-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD
|
|
1
|
+
{"version":3,"file":"speech-to-text-provider.d.ts","sourceRoot":"","sources":["../src/speech-to-text-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,MAAM,EAAE,SAAS,YAAY,EAAE,CAAC;IAEzC,UAAU,CAAC,OAAO,EAAE;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,UAAU,EAAE,aAAa,EAAE,CAAC;QAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,QAAQ,EAAE,oBAAoB,CAAC;CACzC"}
|
package/dist/stream-speech.d.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
import type { PronunciationsFor } from "./pronunciations/types.js";
|
|
1
2
|
import { type ResolvedModel, type Voice } from "./speech-provider.js";
|
|
2
3
|
import type { StreamSpeechResult } from "./stream-speech-result.js";
|
|
3
|
-
export declare function streamSpeech<V extends Voice = Voice>(options: {
|
|
4
|
-
model:
|
|
4
|
+
export declare function streamSpeech<V extends Voice = Voice, M extends string | ResolvedModel<V> = string | ResolvedModel<V>>(options: {
|
|
5
|
+
model: M;
|
|
5
6
|
text: string;
|
|
6
7
|
voice: V;
|
|
7
8
|
apiKey?: string;
|
|
@@ -9,5 +10,6 @@ export declare function streamSpeech<V extends Voice = Voice>(options: {
|
|
|
9
10
|
maxRetries?: number;
|
|
10
11
|
abortSignal?: AbortSignal;
|
|
11
12
|
headers?: Record<string, string>;
|
|
13
|
+
pronunciations?: PronunciationsFor<M>;
|
|
12
14
|
}): Promise<StreamSpeechResult>;
|
|
13
15
|
//# sourceMappingURL=stream-speech.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stream-speech.d.ts","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AASA,OAAO,
|
|
1
|
+
{"version":3,"file":"stream-speech.d.ts","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAKnE,OAAO,EAIL,KAAK,aAAa,EAClB,KAAK,KAAK,EACX,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAEpE,wBAAsB,YAAY,CAChC,CAAC,SAAS,KAAK,GAAG,KAAK,EACvB,CAAC,SAAS,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,GAAG,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,EAC/D,OAAO,EAAE;IACT,KAAK,EAAE,CAAC,CAAC;IACT,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;IACT,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,cAAc,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC,CAAC;CACvC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAmG9B"}
|
package/dist/stream-speech.js
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
1
|
import pRetry from "p-retry";
|
|
2
2
|
import { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
|
-
import {
|
|
3
|
+
import { NoSpeechGeneratedError, StreamingNotSupportedError, } from "./errors.js";
|
|
4
|
+
import { mergeRules } from "./pronunciations/merge.js";
|
|
5
|
+
import { substitute } from "./pronunciations/substitute.js";
|
|
6
|
+
import { validatePronunciationsInput } from "./pronunciations/validate.js";
|
|
4
7
|
import { resolveModel } from "./resolve-provider.js";
|
|
5
|
-
import {
|
|
8
|
+
import { buildRetryOptions } from "./retry-options.js";
|
|
9
|
+
import { FEATURES, hasFeature, isSpeechGatewayModel, } from "./speech-provider.js";
|
|
6
10
|
export async function streamSpeech(options) {
|
|
7
11
|
const { model, voice, providerOptions, abortSignal, headers } = options;
|
|
8
12
|
const maxRetries = options.maxRetries ?? 2;
|
|
9
13
|
const resolved = resolveModel(model, { apiKey: options.apiKey });
|
|
10
14
|
const modelIdentifier = `${resolved.provider.id}/${resolved.modelId}`;
|
|
15
|
+
const isGateway = isSpeechGatewayModel(resolved);
|
|
16
|
+
validatePronunciationsInput(options.pronunciations, isGateway);
|
|
11
17
|
const modelInfo = resolved.provider.models.find((m) => m.id === resolved.modelId);
|
|
12
18
|
if (modelInfo && !hasFeature(modelInfo, FEATURES.STREAMING)) {
|
|
13
19
|
throw new StreamingNotSupportedError(modelIdentifier);
|
|
@@ -35,32 +41,40 @@ export async function streamSpeech(options) {
|
|
|
35
41
|
? `Text is empty after removing unsupported audio tags for ${modelIdentifier}.`
|
|
36
42
|
: "Text must not be empty.");
|
|
37
43
|
}
|
|
44
|
+
let textToSend = processedText;
|
|
45
|
+
if (!isGateway && options.pronunciations?.rules?.length) {
|
|
46
|
+
const ruleMap = mergeRules(options.pronunciations.rules);
|
|
47
|
+
textToSend = substitute(processedText, ruleMap).text;
|
|
48
|
+
}
|
|
38
49
|
const streamFn = resolved.provider.stream.bind(resolved.provider);
|
|
39
50
|
const startTime = performance.now();
|
|
40
|
-
const result = await pRetry(() =>
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
51
|
+
const result = await pRetry(() => {
|
|
52
|
+
if (isGateway) {
|
|
53
|
+
const gatewayProvider = resolved.provider;
|
|
54
|
+
return gatewayProvider.stream({
|
|
55
|
+
modelId: resolved.modelId,
|
|
56
|
+
text: textToSend,
|
|
57
|
+
voice: voice,
|
|
58
|
+
providerOptions,
|
|
59
|
+
abortSignal,
|
|
60
|
+
headers,
|
|
61
|
+
pronunciations: options.pronunciations,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
return streamFn({
|
|
65
|
+
modelId: resolved.modelId,
|
|
66
|
+
text: textToSend,
|
|
67
|
+
voice,
|
|
68
|
+
providerOptions,
|
|
69
|
+
abortSignal,
|
|
70
|
+
headers,
|
|
71
|
+
});
|
|
72
|
+
}, buildRetryOptions({ maxRetries, abortSignal }));
|
|
57
73
|
const ttfbMs = Math.round(performance.now() - startTime);
|
|
58
74
|
const metadata = {
|
|
59
75
|
latencyMs: ttfbMs,
|
|
60
76
|
ttfbMs,
|
|
61
|
-
inputChars:
|
|
62
|
-
provider: resolved.provider.id,
|
|
63
|
-
model: resolved.modelId,
|
|
77
|
+
inputChars: options.text.length,
|
|
64
78
|
...(result.audioDurationMs != null && {
|
|
65
79
|
audioDurationMs: result.audioDurationMs,
|
|
66
80
|
}),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stream-speech.js","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,
|
|
1
|
+
{"version":3,"file":"stream-speech.js","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,aAAa,CAAC;AAErB,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,gCAAgC,CAAC;AAE5D,OAAO,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAE3E,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,QAAQ,EACR,UAAU,EACV,oBAAoB,GAGrB,MAAM,sBAAsB,CAAC;AAG9B,MAAM,CAAC,KAAK,UAAU,YAAY,CAGhC,OAUD;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IACtE,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IACjD,2BAA2B,CAAC,OAAO,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;IAE/D,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,IAAI,SAAS,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IACD,IAAI,OAAO,QAAQ,CAAC,QAAQ,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;QACnD,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IAED,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,IAAI,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QACxD,MAAM,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QACzD,UAAU,GAAG,UAAU,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IACvD,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAElE,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,EAAE;QAC/B,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,eAAe,GAAG,QAAQ,CAAC,QAAiC,CAAC;YACnE,OAAO,eAAe,CAAC,MAAM,CAAC;gBAC5B,OAAO,EAAE,QAAQ,CAAC,OAAO;gBACzB,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,KAA0B;gBACjC,eAAe;gBACf,WAAW;gBACX,OAAO;gBACP,cAAc,EAAE,OAAO,CAAC,cAAc;aACvC,CAAC,CAAC;QACL,CAAC;QACD,OAAO,QAAQ,CAAC;YACd,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,IAAI,EAAE,UAAU;YAChB,KAAK;YACL,eAAe;YACf,WAAW;YACX,OAAO;SACR,CAAC,CAAC;IACL,CAAC,EAAE,iBAAiB,CAAC,EAAE,UAAU,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IAEzD,MAAM,QAAQ,GAAmB;QAC/B,SAAS,EAAE,MAAM;QACjB,MAAM;QACN,UAAU,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM;QAC/B,GAAG,CAAC,MAAM,CAAC,eAAe,IAAI,IAAI,IAAI;YACpC,eAAe,EAAE,MAAM,CAAC,eAAe;SACxC,CAAC;KACH,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,MAAM;QACpB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;KACrD,CAAC;AACJ,CAAC"}
|
package/dist/timestamps.d.ts
CHANGED
|
@@ -1,23 +1,9 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Word-granularity alignment data. Timestamps are always in seconds from
|
|
3
|
-
* the start of the generated audio. Providers that natively return character
|
|
4
|
-
* or phoneme granularity are aggregated to words internally.
|
|
5
|
-
*/
|
|
6
1
|
export interface WordTimestamp {
|
|
7
2
|
readonly end: number;
|
|
8
3
|
readonly start: number;
|
|
9
4
|
readonly text: string;
|
|
10
5
|
}
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
* - `"auto"` (default): return timestamps only if the TTS provider supplies
|
|
15
|
-
* them natively. Free, no extra API calls.
|
|
16
|
-
* - `"on"`: always return timestamps. Uses native data when available;
|
|
17
|
-
* otherwise falls back to a speech-to-text round-trip of the synthesized
|
|
18
|
-
* audio (cost + latency implications).
|
|
19
|
-
* - `"off"`: never return timestamps, even when the provider would give them
|
|
20
|
-
* away for free.
|
|
21
|
-
*/
|
|
22
|
-
export type TimestampMode = "on" | "auto" | "off";
|
|
6
|
+
export interface ConversationWordTimestamp extends WordTimestamp {
|
|
7
|
+
readonly turnIndex: number;
|
|
8
|
+
}
|
|
23
9
|
//# sourceMappingURL=timestamps.d.ts.map
|
package/dist/timestamps.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"timestamps.d.ts","sourceRoot":"","sources":["../src/timestamps.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"timestamps.d.ts","sourceRoot":"","sources":["../src/timestamps.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,yBAA0B,SAAQ,aAAa;IAC9D,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
package/dist/turns.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ConversationWordTimestamp } from "./timestamps.js";
|
|
2
|
+
export interface TurnTimestamp {
|
|
3
|
+
readonly end: number;
|
|
4
|
+
readonly start: number;
|
|
5
|
+
readonly text: string;
|
|
6
|
+
readonly turnIndex: number;
|
|
7
|
+
}
|
|
8
|
+
export declare function timestampsToTurns(timestamps: readonly ConversationWordTimestamp[]): readonly TurnTimestamp[];
|
|
9
|
+
//# sourceMappingURL=turns.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"turns.d.ts","sourceRoot":"","sources":["../src/turns.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,iBAAiB,CAAC;AAEjE,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAKD,wBAAgB,iBAAiB,CAC/B,UAAU,EAAE,SAAS,yBAAyB,EAAE,GAC/C,SAAS,aAAa,EAAE,CAiB1B"}
|
package/dist/turns.js
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// Assumes turnIndex runs are monotonic; non-adjacent runs of the same turnIndex would produce duplicate entries.
|
|
2
|
+
export function timestampsToTurns(timestamps) {
|
|
3
|
+
const turns = [];
|
|
4
|
+
for (const word of timestamps) {
|
|
5
|
+
const last = turns.at(-1);
|
|
6
|
+
if (last && last.turnIndex === word.turnIndex) {
|
|
7
|
+
last.end = word.end;
|
|
8
|
+
last.text = `${last.text} ${word.text}`;
|
|
9
|
+
}
|
|
10
|
+
else {
|
|
11
|
+
turns.push({
|
|
12
|
+
turnIndex: word.turnIndex,
|
|
13
|
+
start: word.start,
|
|
14
|
+
end: word.end,
|
|
15
|
+
text: word.text,
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return turns;
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=turns.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"turns.js","sourceRoot":"","sources":["../src/turns.ts"],"names":[],"mappings":"AAWA,iHAAiH;AACjH,MAAM,UAAU,iBAAiB,CAC/B,UAAgD;IAEhD,MAAM,KAAK,GAA6B,EAAE,CAAC;IAC3C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YAC9C,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC;YACpB,IAAI,CAAC,IAAI,GAAG,GAAG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAC1C,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC;gBACT,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,IAAI,EAAE,IAAI,CAAC,IAAI;aAChB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,12 +1,43 @@
|
|
|
1
|
+
import type { AudioOutput } from "./audio-output.js";
|
|
2
|
+
import type { PronunciationsInput } from "./pronunciations/types.js";
|
|
1
3
|
import type { ResolvedModel, Voice } from "./speech-provider.js";
|
|
4
|
+
export type { AudioOutput, AudioOutputFormat } from "./audio-output.js";
|
|
5
|
+
export type { CaptionFormat, CaptionsOptions } from "./captions.js";
|
|
6
|
+
export type { ConversationTurn, GenerateConversationOptions, } from "./conversation/types.js";
|
|
7
|
+
export type { SpeechMetadata } from "./metadata.js";
|
|
8
|
+
export type { Pronunciation, PronunciationsInput, } from "./pronunciations/types.js";
|
|
9
|
+
export type { CartesiaSpeechProviderConfig } from "./providers/cartesia/index.js";
|
|
10
|
+
export type { DeepgramSpeechProviderConfig } from "./providers/deepgram/index.js";
|
|
11
|
+
export type { ElevenLabsSpeechProviderConfig } from "./providers/elevenlabs/index.js";
|
|
12
|
+
export type { FalSpeechProviderConfig } from "./providers/fal/index.js";
|
|
13
|
+
export type { FishAudioSpeechProviderConfig } from "./providers/fish-audio/index.js";
|
|
14
|
+
export type { SpeechGatewayProviderConfig } from "./providers/gateway/index.js";
|
|
15
|
+
export type { GoogleSpeechProviderConfig } from "./providers/google/index.js";
|
|
16
|
+
export type { HumeSpeechProviderConfig } from "./providers/hume/index.js";
|
|
17
|
+
export type { InworldSpeechProviderConfig } from "./providers/inworld/index.js";
|
|
18
|
+
export type { MistralSpeechProviderConfig } from "./providers/mistral/index.js";
|
|
19
|
+
export type { MurfSpeechProviderConfig } from "./providers/murf/index.js";
|
|
20
|
+
export type { OpenAISpeechProviderConfig } from "./providers/openai/index.js";
|
|
21
|
+
export type { ResembleSpeechProviderConfig } from "./providers/resemble/index.js";
|
|
22
|
+
export type { XaiSpeechProviderConfig } from "./providers/xai/index.js";
|
|
23
|
+
export type { Feature, ModelInfo, ResolvedModel, SpeechProvider, Voice, } from "./speech-provider.js";
|
|
24
|
+
export type { ConversationResult, GeneratedAudioFile, SpeechResult, } from "./speech-result.js";
|
|
25
|
+
export type { ResolvedSTTModel, SpeechToTextProvider, STTModelInfo, } from "./speech-to-text-provider.js";
|
|
26
|
+
export type { StreamSpeechResult } from "./stream-speech-result.js";
|
|
27
|
+
export type { ConversationWordTimestamp, WordTimestamp, } from "./timestamps.js";
|
|
28
|
+
export type { TurnTimestamp } from "./turns.js";
|
|
2
29
|
export interface GenerateSpeechOptions<V extends Voice = Voice> {
|
|
3
30
|
abortSignal?: AbortSignal;
|
|
4
31
|
apiKey?: string;
|
|
5
32
|
headers?: Record<string, string>;
|
|
6
33
|
maxRetries?: number;
|
|
7
34
|
model: string | ResolvedModel<V>;
|
|
35
|
+
output?: AudioOutput;
|
|
36
|
+
pronunciations?: PronunciationsInput;
|
|
8
37
|
providerOptions?: Record<string, unknown>;
|
|
9
38
|
text: string;
|
|
39
|
+
timestamps?: boolean;
|
|
10
40
|
voice: V;
|
|
41
|
+
volumeDbfs?: number;
|
|
11
42
|
}
|
|
12
43
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAEjE,MAAM,WAAW,qBAAqB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC5D,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AACrE,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAEjE,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACxE,YAAY,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACpE,YAAY,EACV,gBAAgB,EAChB,2BAA2B,GAC5B,MAAM,yBAAyB,CAAC;AACjC,YAAY,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,YAAY,EACV,aAAa,EACb,mBAAmB,GACpB,MAAM,2BAA2B,CAAC;AACnC,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,8BAA8B,EAAE,MAAM,iCAAiC,CAAC;AACtF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EAAE,6BAA6B,EAAE,MAAM,iCAAiC,CAAC;AACrF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EACV,OAAO,EACP,SAAS,EACT,aAAa,EACb,cAAc,EACd,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,kBAAkB,EAClB,kBAAkB,EAClB,YAAY,GACb,MAAM,oBAAoB,CAAC;AAC5B,YAAY,EACV,gBAAgB,EAChB,oBAAoB,EACpB,YAAY,GACb,MAAM,8BAA8B,CAAC;AACtC,YAAY,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,YAAY,EACV,yBAAyB,EACzB,aAAa,GACd,MAAM,iBAAiB,CAAC;AACzB,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD,MAAM,WAAW,qBAAqB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC5D,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,cAAc,CAAC,EAAE,mBAAmB,CAAC;IACrC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,KAAK,EAAE,CAAC,CAAC;IACT,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
|
package/dist/volume-adjust.d.ts
CHANGED
|
@@ -3,12 +3,6 @@ interface AdjustVolumeInput {
|
|
|
3
3
|
readonly mediaType: string;
|
|
4
4
|
readonly volumeDbfs: number;
|
|
5
5
|
}
|
|
6
|
-
/**
|
|
7
|
-
* Decode the provider's PCM/WAV output, RMS-normalize to the target dBFS,
|
|
8
|
-
* and re-encode as 16-bit mono WAV. Lazy-loaded by generateSpeech only when
|
|
9
|
-
* `volumeDbfs` is set so callers that never use volume adjustment don't pay
|
|
10
|
-
* for the WAV mux dependency chain at import time.
|
|
11
|
-
*/
|
|
12
6
|
export declare function adjustVolume(input: AdjustVolumeInput): Promise<Uint8Array>;
|
|
13
7
|
export {};
|
|
14
8
|
//# sourceMappingURL=volume-adjust.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"volume-adjust.d.ts","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"volume-adjust.d.ts","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAQA,UAAU,iBAAiB;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;IACpC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,wBAAsB,YAAY,CAChC,KAAK,EAAE,iBAAiB,GACvB,OAAO,CAAC,UAAU,CAAC,CAgBrB"}
|
package/dist/volume-adjust.js
CHANGED
|
@@ -1,27 +1,15 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
* and re-encode as 16-bit mono WAV. Lazy-loaded by generateSpeech only when
|
|
5
|
-
* `volumeDbfs` is set so callers that never use volume adjustment don't pay
|
|
6
|
-
* for the WAV mux dependency chain at import time.
|
|
7
|
-
*/
|
|
1
|
+
import { decodeAudioToPcm16 } from "./audio-decode.js";
|
|
2
|
+
import { base64ToUint8Array } from "./audio-utils.js";
|
|
3
|
+
import { concatPcmToWav, dbfsToInt16Rms, normalizeRms, } from "./conversation/pcm-concat.js";
|
|
8
4
|
export async function adjustVolume(input) {
|
|
9
5
|
const bytes = input.audio instanceof Uint8Array
|
|
10
6
|
? input.audio
|
|
11
7
|
: base64ToUint8Array(input.audio);
|
|
12
|
-
const segment =
|
|
8
|
+
const segment = await decodeAudioToPcm16(bytes, input.mediaType);
|
|
13
9
|
const [normalized] = normalizeRms([segment], dbfsToInt16Rms(input.volumeDbfs));
|
|
14
10
|
return await concatPcmToWav([normalized], {
|
|
15
11
|
gapMs: 0,
|
|
16
12
|
targetSampleRate: normalized.sampleRate,
|
|
17
13
|
});
|
|
18
14
|
}
|
|
19
|
-
function base64ToUint8Array(b64) {
|
|
20
|
-
const binaryString = atob(b64);
|
|
21
|
-
const out = new Uint8Array(binaryString.length);
|
|
22
|
-
for (let i = 0; i < binaryString.length; i++) {
|
|
23
|
-
out[i] = binaryString.charCodeAt(i);
|
|
24
|
-
}
|
|
25
|
-
return out;
|
|
26
|
-
}
|
|
27
15
|
//# sourceMappingURL=volume-adjust.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"volume-adjust.js","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,
|
|
1
|
+
{"version":3,"file":"volume-adjust.js","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EACL,cAAc,EACd,cAAc,EACd,YAAY,GACb,MAAM,8BAA8B,CAAC;AAQtC,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,KAAwB;IAExB,MAAM,KAAK,GACT,KAAK,CAAC,KAAK,YAAY,UAAU;QAC/B,CAAC,CAAC,KAAK,CAAC,KAAK;QACb,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,KAAK,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IACjE,MAAM,CAAC,UAAU,CAAC,GAAG,YAAY,CAC/B,CAAC,OAAO,CAAC,EACT,cAAc,CAAC,KAAK,CAAC,UAAU,CAAC,CACjC,CAAC;IAEF,OAAO,MAAM,cAAc,CAAC,CAAC,UAAU,CAAC,EAAE;QACxC,KAAK,EAAE,CAAC;QACR,gBAAgB,EAAE,UAAU,CAAC,UAAU;KACxC,CAAC,CAAC;AACL,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@speech-sdk/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0",
|
|
4
4
|
"description": "Universal, cross-platform text-to-speech SDK with multi-provider support.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -10,75 +10,20 @@
|
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
11
11
|
"default": "./dist/index.js"
|
|
12
12
|
},
|
|
13
|
-
"./
|
|
14
|
-
"types": "./dist/
|
|
15
|
-
"default": "./dist/
|
|
13
|
+
"./providers": {
|
|
14
|
+
"types": "./dist/providers.d.ts",
|
|
15
|
+
"default": "./dist/providers.js"
|
|
16
16
|
},
|
|
17
|
-
"./
|
|
18
|
-
"types": "./dist/
|
|
19
|
-
"default": "./dist/
|
|
20
|
-
},
|
|
21
|
-
"./openai": {
|
|
22
|
-
"types": "./dist/providers/openai/index.d.ts",
|
|
23
|
-
"default": "./dist/providers/openai/index.js"
|
|
24
|
-
},
|
|
25
|
-
"./elevenlabs": {
|
|
26
|
-
"types": "./dist/providers/elevenlabs/index.d.ts",
|
|
27
|
-
"default": "./dist/providers/elevenlabs/index.js"
|
|
28
|
-
},
|
|
29
|
-
"./deepgram": {
|
|
30
|
-
"types": "./dist/providers/deepgram/index.d.ts",
|
|
31
|
-
"default": "./dist/providers/deepgram/index.js"
|
|
32
|
-
},
|
|
33
|
-
"./cartesia": {
|
|
34
|
-
"types": "./dist/providers/cartesia/index.d.ts",
|
|
35
|
-
"default": "./dist/providers/cartesia/index.js"
|
|
36
|
-
},
|
|
37
|
-
"./hume": {
|
|
38
|
-
"types": "./dist/providers/hume/index.d.ts",
|
|
39
|
-
"default": "./dist/providers/hume/index.js"
|
|
40
|
-
},
|
|
41
|
-
"./inworld": {
|
|
42
|
-
"types": "./dist/providers/inworld/index.d.ts",
|
|
43
|
-
"default": "./dist/providers/inworld/index.js"
|
|
44
|
-
},
|
|
45
|
-
"./google": {
|
|
46
|
-
"types": "./dist/providers/google/index.d.ts",
|
|
47
|
-
"default": "./dist/providers/google/index.js"
|
|
48
|
-
},
|
|
49
|
-
"./fish-audio": {
|
|
50
|
-
"types": "./dist/providers/fish-audio/index.d.ts",
|
|
51
|
-
"default": "./dist/providers/fish-audio/index.js"
|
|
52
|
-
},
|
|
53
|
-
"./murf": {
|
|
54
|
-
"types": "./dist/providers/murf/index.d.ts",
|
|
55
|
-
"default": "./dist/providers/murf/index.js"
|
|
56
|
-
},
|
|
57
|
-
"./resemble": {
|
|
58
|
-
"types": "./dist/providers/resemble/index.d.ts",
|
|
59
|
-
"default": "./dist/providers/resemble/index.js"
|
|
60
|
-
},
|
|
61
|
-
"./fal-ai": {
|
|
62
|
-
"types": "./dist/providers/fal/index.d.ts",
|
|
63
|
-
"default": "./dist/providers/fal/index.js"
|
|
64
|
-
},
|
|
65
|
-
"./mistral": {
|
|
66
|
-
"types": "./dist/providers/mistral/index.d.ts",
|
|
67
|
-
"default": "./dist/providers/mistral/index.js"
|
|
68
|
-
},
|
|
69
|
-
"./xai": {
|
|
70
|
-
"types": "./dist/providers/xai/index.d.ts",
|
|
71
|
-
"default": "./dist/providers/xai/index.js"
|
|
72
|
-
},
|
|
73
|
-
"./stt/openai": {
|
|
74
|
-
"types": "./dist/stt-providers/openai/index.d.ts",
|
|
75
|
-
"default": "./dist/stt-providers/openai/index.js"
|
|
17
|
+
"./types": {
|
|
18
|
+
"types": "./dist/types.d.ts",
|
|
19
|
+
"default": "./dist/types.js"
|
|
76
20
|
}
|
|
77
21
|
},
|
|
78
22
|
"files": [
|
|
79
23
|
"dist",
|
|
80
24
|
"README.md"
|
|
81
25
|
],
|
|
26
|
+
"sideEffects": false,
|
|
82
27
|
"keywords": [
|
|
83
28
|
"tts",
|
|
84
29
|
"text-to-speech",
|
|
@@ -94,15 +39,17 @@
|
|
|
94
39
|
"url": "https://github.com/Jellypod-Inc/speech-sdk"
|
|
95
40
|
},
|
|
96
41
|
"dependencies": {
|
|
42
|
+
"@mediabunny/mp3-encoder": "^1.42.0",
|
|
97
43
|
"mediabunny": "^1.40.1",
|
|
98
|
-
"p-retry": "^8.0.0"
|
|
44
|
+
"p-retry": "^8.0.0",
|
|
45
|
+
"zod": "^4.3.6"
|
|
99
46
|
},
|
|
100
47
|
"devDependencies": {
|
|
101
|
-
"@biomejs/biome": "2.4.
|
|
48
|
+
"@biomejs/biome": "2.4.13",
|
|
102
49
|
"@types/node": "^25.5.0",
|
|
103
50
|
"dotenv": "^17.3.1",
|
|
104
51
|
"typescript": "^5.8.0",
|
|
105
|
-
"ultracite": "7.
|
|
52
|
+
"ultracite": "7.6.2",
|
|
106
53
|
"vite": "^7.3.2",
|
|
107
54
|
"vitest": "^4.1.3"
|
|
108
55
|
},
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import type { ResolvedSTTModel, SpeechToTextProvider } from "../../speech-to-text-provider.js";
|
|
2
|
-
import type { WordTimestamp } from "../../timestamps.js";
|
|
3
|
-
export interface OpenAISpeechToTextProviderConfig {
|
|
4
|
-
apiKey?: string;
|
|
5
|
-
baseURL?: string;
|
|
6
|
-
fetch?: typeof globalThis.fetch;
|
|
7
|
-
}
|
|
8
|
-
/**
|
|
9
|
-
* OpenAI Whisper / gpt-4o-transcribe adapter for the SDK's derived-timestamps
|
|
10
|
-
* path. Uses `/v1/audio/transcriptions` with `timestamp_granularities: ["word"]`
|
|
11
|
-
* and `response_format: "verbose_json"`.
|
|
12
|
-
*
|
|
13
|
-
* Note: `gpt-4o-transcribe-diarize` is intentionally not listed — that
|
|
14
|
-
* variant does not support `timestamp_granularities`.
|
|
15
|
-
*/
|
|
16
|
-
export declare class OpenAISpeechToTextProvider implements SpeechToTextProvider {
|
|
17
|
-
readonly id = "openai";
|
|
18
|
-
readonly defaultModel = "whisper-1";
|
|
19
|
-
readonly models: readonly [{
|
|
20
|
-
readonly id: "whisper-1";
|
|
21
|
-
readonly releaseDate: "2023-03-01";
|
|
22
|
-
readonly languages: readonly ["af", "ar", "az", "be", "bg", "bn", "bs", "ca", "cs", "cy", "da", "de", "el", "en", "es", "et", "fa", "fi", "fr", "gl", "he", "hi", "hr", "hu", "hy", "id", "is", "it", "ja", "kk", "kn", "ko", "lt", "lv", "mi", "mk", "mr", "ms", "ne", "nl", "no", "pl", "pt", "ro", "ru", "sk", "sl", "sr", "sv", "sw", "ta", "th", "tl", "tr", "uk", "ur", "vi", "zh"];
|
|
23
|
-
}];
|
|
24
|
-
private readonly apiKey;
|
|
25
|
-
private readonly baseURL;
|
|
26
|
-
private readonly fetchFn;
|
|
27
|
-
constructor(config?: OpenAISpeechToTextProviderConfig);
|
|
28
|
-
transcribe(options: {
|
|
29
|
-
modelId: string;
|
|
30
|
-
audio: Uint8Array;
|
|
31
|
-
mediaType: string;
|
|
32
|
-
language?: string;
|
|
33
|
-
abortSignal?: AbortSignal;
|
|
34
|
-
headers?: Record<string, string>;
|
|
35
|
-
}): Promise<{
|
|
36
|
-
timestamps: WordTimestamp[];
|
|
37
|
-
text?: string;
|
|
38
|
-
providerMetadata?: Record<string, unknown>;
|
|
39
|
-
}>;
|
|
40
|
-
}
|
|
41
|
-
export declare function createOpenAISTT(config?: OpenAISpeechToTextProviderConfig): (modelId?: string) => ResolvedSTTModel;
|
|
42
|
-
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/stt-providers/openai/index.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EACV,gBAAgB,EAChB,oBAAoB,EACrB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,MAAM,WAAW,gCAAgC;IAC/C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAiED;;;;;;;GAOG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,QAAQ,CAAC,EAAE,YAAY;IACvB,QAAQ,CAAC,YAAY,eAAe;IAMpC,QAAQ,CAAC,MAAM;;;;OAMJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,GAAE,gCAAqC;IAMnD,UAAU,CAAC,OAAO,EAAE;QACxB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,UAAU,EAAE,aAAa,EAAE,CAAC;QAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;CAsDH;AAED,wBAAgB,eAAe,CAAC,MAAM,GAAE,gCAAqC,IAGjD,UAAU,MAAM,KAAG,gBAAgB,CAM9D"}
|