@speech-sdk/core 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +227 -108
- package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +0 -59
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-decode.d.ts +7 -0
- package/dist/audio-decode.d.ts.map +1 -0
- package/dist/audio-decode.js +109 -0
- package/dist/audio-decode.js.map +1 -0
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +5 -21
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-output.d.ts +39 -0
- package/dist/audio-output.d.ts.map +1 -0
- package/dist/audio-output.js +111 -0
- package/dist/audio-output.js.map +1 -0
- package/dist/audio-utils.d.ts +2 -10
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +57 -15
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +0 -108
- package/dist/captions.d.ts.map +1 -1
- package/dist/captions.js +8 -98
- package/dist/captions.js.map +1 -1
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -24
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +8 -183
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +9 -6
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +72 -51
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +7 -37
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +1 -5
- package/dist/derive-timestamps.d.ts.map +1 -1
- package/dist/derive-timestamps.js +1 -15
- package/dist/derive-timestamps.js.map +1 -1
- package/dist/encoders/mp3.d.ts +6 -0
- package/dist/encoders/mp3.d.ts.map +1 -0
- package/dist/encoders/mp3.js +54 -0
- package/dist/encoders/mp3.js.map +1 -0
- package/dist/errors.d.ts +20 -13
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +49 -15
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +5 -4
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +250 -93
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +7 -28
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +185 -94
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +7 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +2 -13
- package/dist/logger.js.map +1 -1
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/pronunciations/errors.d.ts +5 -0
- package/dist/pronunciations/errors.d.ts.map +1 -0
- package/dist/pronunciations/errors.js +8 -0
- package/dist/pronunciations/errors.js.map +1 -0
- package/dist/pronunciations/inverse-align.d.ts +4 -0
- package/dist/pronunciations/inverse-align.d.ts.map +1 -0
- package/dist/pronunciations/inverse-align.js +54 -0
- package/dist/pronunciations/inverse-align.js.map +1 -0
- package/dist/pronunciations/merge.d.ts +4 -0
- package/dist/pronunciations/merge.d.ts.map +1 -0
- package/dist/pronunciations/merge.js +13 -0
- package/dist/pronunciations/merge.js.map +1 -0
- package/dist/pronunciations/substitute.d.ts +6 -0
- package/dist/pronunciations/substitute.d.ts.map +1 -0
- package/dist/pronunciations/substitute.js +67 -0
- package/dist/pronunciations/substitute.js.map +1 -0
- package/dist/pronunciations/types.d.ts +18 -0
- package/dist/pronunciations/types.d.ts.map +1 -0
- package/dist/pronunciations/types.js +2 -0
- package/dist/pronunciations/types.js.map +1 -0
- package/dist/pronunciations/validate.d.ts +3 -0
- package/dist/pronunciations/validate.d.ts.map +1 -0
- package/dist/pronunciations/validate.js +26 -0
- package/dist/pronunciations/validate.js.map +1 -0
- package/dist/provider-utils.d.ts +4 -9
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +60 -51
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +0 -16
- package/dist/providers/cartesia/alignment.d.ts.map +1 -1
- package/dist/providers/cartesia/alignment.js +1 -6
- package/dist/providers/cartesia/alignment.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +29 -19
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +116 -80
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +23 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +51 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +7 -21
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
- package/dist/providers/elevenlabs/alignment.js +8 -9
- package/dist/providers/elevenlabs/alignment.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +14 -38
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +186 -169
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +11 -20
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +49 -37
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +14 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +47 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +76 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +251 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +12 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +180 -162
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +30 -35
- package/dist/providers/hume/alignment.d.ts.map +1 -1
- package/dist/providers/hume/alignment.js +14 -8
- package/dist/providers/hume/alignment.js.map +1 -1
- package/dist/providers/hume/index.d.ts +16 -16
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +79 -65
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +8 -22
- package/dist/providers/inworld/alignment.d.ts.map +1 -1
- package/dist/providers/inworld/alignment.js +9 -8
- package/dist/providers/inworld/alignment.js.map +1 -1
- package/dist/providers/inworld/index.d.ts +17 -20
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +79 -47
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +14 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +63 -48
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +10 -19
- package/dist/providers/murf/alignment.d.ts.map +1 -1
- package/dist/providers/murf/alignment.js +10 -5
- package/dist/providers/murf/alignment.js.map +1 -1
- package/dist/providers/murf/index.d.ts +15 -16
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +105 -58
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +43 -29
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +294 -106
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +8 -29
- package/dist/providers/resemble/alignment.d.ts.map +1 -1
- package/dist/providers/resemble/alignment.js +9 -12
- package/dist/providers/resemble/alignment.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +21 -11
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +89 -49
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/smallest-ai/index.d.ts +47 -0
- package/dist/providers/smallest-ai/index.d.ts.map +1 -0
- package/dist/providers/smallest-ai/index.js +107 -0
- package/dist/providers/smallest-ai/index.js.map +1 -0
- package/dist/providers/xai/index.d.ts +25 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +63 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +31 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +16 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +8 -51
- package/dist/resolve-provider.js.map +1 -1
- package/dist/retry-options.d.ts +6 -0
- package/dist/retry-options.d.ts.map +1 -0
- package/dist/retry-options.js +48 -0
- package/dist/retry-options.js.map +1 -0
- package/dist/speech-provider.d.ts +28 -53
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +5 -26
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +8 -9
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +0 -12
- package/dist/speech-to-text-provider.d.ts.map +1 -1
- package/dist/stream-speech.d.ts +4 -2
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +36 -22
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +3 -17
- package/dist/timestamps.d.ts.map +1 -1
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +31 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +4 -16
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +13 -66
- package/dist/stt-providers/openai/index.d.ts +0 -42
- package/dist/stt-providers/openai/index.d.ts.map +0 -1
- package/dist/stt-providers/openai/index.js +0 -184
- package/dist/stt-providers/openai/index.js.map +0 -1
|
@@ -1,16 +1,13 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
// Resemble `/synthesize` `audio_timestamps`: per-grapheme arrays, times in seconds; phoneme arrays unused.
|
|
3
|
+
export const resembleAudioTimestampsSchema = z.object({
|
|
4
|
+
graph_chars: z.array(z.string()),
|
|
5
|
+
graph_times: z.array(z.array(z.number())),
|
|
6
|
+
phon_chars: z.array(z.string()).optional(),
|
|
7
|
+
phon_times: z.array(z.array(z.number())).optional(),
|
|
8
|
+
});
|
|
1
9
|
const WHITESPACE_CHAR = /^\s$/;
|
|
2
|
-
|
|
3
|
-
* Aggregate Resemble's grapheme-level timing into word-level timestamps.
|
|
4
|
-
*
|
|
5
|
-
* Algorithm: walk `graph_chars` in order. Whitespace flushes the current
|
|
6
|
-
* word and is dropped. Non-whitespace characters (letters AND punctuation)
|
|
7
|
-
* accumulate into a buffer — punctuation stays attached to its adjacent
|
|
8
|
-
* word ("Hello," is one word) to mirror the ElevenLabs aggregator.
|
|
9
|
-
*
|
|
10
|
-
* Each entry in `graph_times` is `[startSeconds, endSeconds]`; the word
|
|
11
|
-
* inherits the first character's start and the last character's end.
|
|
12
|
-
* Entries with malformed timing tuples are skipped to avoid NaN bleed.
|
|
13
|
-
*/
|
|
10
|
+
// Whitespace flushes; punctuation stays attached to the adjacent word ("Hello,").
|
|
14
11
|
export function audioTimestampsToWordTimestamps(alignment) {
|
|
15
12
|
const chars = alignment.graph_chars;
|
|
16
13
|
const times = alignment.graph_times;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/resemble/alignment.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"alignment.js","sourceRoot":"","sources":["../../../src/providers/resemble/alignment.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,2GAA2G;AAC3G,MAAM,CAAC,MAAM,6BAA6B,GAAG,CAAC,CAAC,MAAM,CAAC;IACpD,WAAW,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;IAChC,WAAW,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IACzC,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,EAAE;IAC1C,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,EAAE;CACpD,CAAC,CAAC;AAKH,MAAM,eAAe,GAAG,MAAM,CAAC;AAE/B,kFAAkF;AAClF,MAAM,UAAU,+BAA+B,CAC7C,SAAkC;IAElC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IACpC,MAAM,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC;IACpC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAErC,IAAI,IAAI,EAAE,CAAC;YACT,IAAI,MAAM,EAAE,CAAC;gBACX,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;gBAC1D,GAAG,GAAG,EAAE,CAAC;gBACT,MAAM,GAAG,KAAK,CAAC;YACjB,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QACD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACf,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACf,IAAI,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAChD,SAAS;QACX,CAAC;QAED,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,SAAS,GAAG,CAAC,CAAC;YACd,MAAM,GAAG,IAAI,CAAC;QAChB,CAAC;QACD,GAAG,IAAI,CAAC,CAAC;QACT,OAAO,GAAG,CAAC,CAAC;IACd,CAAC;IAED,IAAI,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -1,22 +1,19 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { AudioOutput } from "../../audio-output.js";
|
|
2
|
+
import type { ModelInfo, ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
3
|
+
import type { ResolvedSTTModel } from "../../speech-to-text-provider.js";
|
|
2
4
|
import type { WordTimestamp } from "../../timestamps.js";
|
|
3
5
|
export interface ResembleSpeechProviderConfig {
|
|
4
6
|
apiKey?: string;
|
|
5
7
|
baseURL?: string;
|
|
8
|
+
fallbackSTT?: ResolvedSTTModel;
|
|
6
9
|
fetch?: typeof globalThis.fetch;
|
|
7
10
|
}
|
|
11
|
+
export declare const RESEMBLE_PROVIDER_ID: "resemble";
|
|
12
|
+
export declare const RESEMBLE_MODELS: readonly ModelInfo[];
|
|
8
13
|
export declare class ResembleSpeechProvider implements SpeechProvider<string, string> {
|
|
9
|
-
readonly id
|
|
14
|
+
readonly id: "resemble";
|
|
10
15
|
readonly defaultModel = "default";
|
|
11
|
-
readonly models: readonly [
|
|
12
|
-
readonly id: "default";
|
|
13
|
-
readonly releaseDate: "2025-09-04";
|
|
14
|
-
readonly languages: readonly ["en", "ar", "da", "de", "el", "es", "fi", "fr", "he", "hi", "it", "ja", "ko", "ms", "nl", "no", "pl", "pt", "ru", "sv", "sw", "tr", "zh"];
|
|
15
|
-
readonly features: readonly ["streaming", "open-source", "inline-voice-cloning", {
|
|
16
|
-
readonly id: "timestamps";
|
|
17
|
-
readonly mode: "native";
|
|
18
|
-
}];
|
|
19
|
-
}];
|
|
16
|
+
readonly models: readonly ModelInfo[];
|
|
20
17
|
private readonly apiKey;
|
|
21
18
|
private readonly baseURL;
|
|
22
19
|
private readonly fetchFn;
|
|
@@ -53,6 +50,19 @@ export declare class ResembleSpeechProvider implements SpeechProvider<string, st
|
|
|
53
50
|
};
|
|
54
51
|
mediaType: string;
|
|
55
52
|
} | undefined;
|
|
53
|
+
resolveOutputFormat(modelId: string, output: AudioOutput): {
|
|
54
|
+
providerOptions: {
|
|
55
|
+
output_format: string;
|
|
56
|
+
precision: string;
|
|
57
|
+
};
|
|
58
|
+
expectedMediaType: string;
|
|
59
|
+
} | {
|
|
60
|
+
providerOptions: {
|
|
61
|
+
output_format: string;
|
|
62
|
+
precision?: undefined;
|
|
63
|
+
};
|
|
64
|
+
expectedMediaType: string;
|
|
65
|
+
} | undefined;
|
|
56
66
|
}
|
|
57
67
|
export declare function createResemble(config?: ResembleSpeechProviderConfig): (modelId?: string) => ResolvedModel<string>;
|
|
58
68
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAMzD,OAAO,KAAK,EACV,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AACzE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAWzD,MAAM,WAAW,4BAA4B;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,gBAAgB,CAAC;IAC/B,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,eAAO,MAAM,oBAAoB,EAAG,UAAmB,CAAC;AAExD,eAAO,MAAM,eAAe,EAAE,SAAS,SAAS,EAoCtC,CAAC;AAEX,qBAAa,sBAAuB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAC3E,QAAQ,CAAC,EAAE,aAAwB;IACnC,QAAQ,CAAC,YAAY,aAAa;IAElC,QAAQ,CAAC,MAAM,uBAAmB;IAElC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,4BAA4B;IAM1C,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACjC,iBAAiB,CAAC,EAAE,OAAO,CAAC;KAC7B,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,MAAM,CAAC;QACd,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC3C,UAAU,CAAC,EAAE,aAAa,EAAE,CAAC;KAC9B,CAAC;IA0CI,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC;IAqCF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;IAWhC,mBAAmB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW;;;;;;;;;;;;;CA0BzD;AAWD,wBAAgB,cAAc,CAAC,MAAM,GAAE,4BAAiC,IAI7C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAOlE"}
|
|
@@ -1,45 +1,52 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
1
2
|
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
2
|
-
import { audioTimestampsToWordTimestamps, } from "./alignment.js";
|
|
3
|
+
import { audioTimestampsToWordTimestamps, resembleAudioTimestampsSchema, } from "./alignment.js";
|
|
4
|
+
const synthesizeResponseSchema = z.object({
|
|
5
|
+
audio_content: z.string(),
|
|
6
|
+
audio_timestamps: resembleAudioTimestampsSchema.optional(),
|
|
7
|
+
});
|
|
8
|
+
export const RESEMBLE_PROVIDER_ID = "resemble";
|
|
9
|
+
export const RESEMBLE_MODELS = [
|
|
10
|
+
{
|
|
11
|
+
id: "default",
|
|
12
|
+
releaseDate: "2025-09-04",
|
|
13
|
+
languages: [
|
|
14
|
+
"en",
|
|
15
|
+
"ar",
|
|
16
|
+
"da",
|
|
17
|
+
"de",
|
|
18
|
+
"el",
|
|
19
|
+
"es",
|
|
20
|
+
"fi",
|
|
21
|
+
"fr",
|
|
22
|
+
"he",
|
|
23
|
+
"hi",
|
|
24
|
+
"it",
|
|
25
|
+
"ja",
|
|
26
|
+
"ko",
|
|
27
|
+
"ms",
|
|
28
|
+
"nl",
|
|
29
|
+
"no",
|
|
30
|
+
"pl",
|
|
31
|
+
"pt",
|
|
32
|
+
"ru",
|
|
33
|
+
"sv",
|
|
34
|
+
"sw",
|
|
35
|
+
"tr",
|
|
36
|
+
"zh",
|
|
37
|
+
],
|
|
38
|
+
features: [
|
|
39
|
+
"streaming",
|
|
40
|
+
"open-source",
|
|
41
|
+
"inline-voice-cloning",
|
|
42
|
+
"timestamps",
|
|
43
|
+
],
|
|
44
|
+
},
|
|
45
|
+
];
|
|
3
46
|
export class ResembleSpeechProvider {
|
|
4
|
-
id =
|
|
47
|
+
id = RESEMBLE_PROVIDER_ID;
|
|
5
48
|
defaultModel = "default";
|
|
6
|
-
models =
|
|
7
|
-
{
|
|
8
|
-
id: "default",
|
|
9
|
-
releaseDate: "2025-09-04",
|
|
10
|
-
languages: [
|
|
11
|
-
"en",
|
|
12
|
-
"ar",
|
|
13
|
-
"da",
|
|
14
|
-
"de",
|
|
15
|
-
"el",
|
|
16
|
-
"es",
|
|
17
|
-
"fi",
|
|
18
|
-
"fr",
|
|
19
|
-
"he",
|
|
20
|
-
"hi",
|
|
21
|
-
"it",
|
|
22
|
-
"ja",
|
|
23
|
-
"ko",
|
|
24
|
-
"ms",
|
|
25
|
-
"nl",
|
|
26
|
-
"no",
|
|
27
|
-
"pl",
|
|
28
|
-
"pt",
|
|
29
|
-
"ru",
|
|
30
|
-
"sv",
|
|
31
|
-
"sw",
|
|
32
|
-
"tr",
|
|
33
|
-
"zh",
|
|
34
|
-
],
|
|
35
|
-
features: [
|
|
36
|
-
"streaming",
|
|
37
|
-
"open-source",
|
|
38
|
-
"inline-voice-cloning",
|
|
39
|
-
{ id: "timestamps", mode: "native" },
|
|
40
|
-
],
|
|
41
|
-
},
|
|
42
|
-
];
|
|
49
|
+
models = RESEMBLE_MODELS;
|
|
43
50
|
apiKey;
|
|
44
51
|
baseURL;
|
|
45
52
|
fetchFn;
|
|
@@ -66,16 +73,15 @@ export class ResembleSpeechProvider {
|
|
|
66
73
|
body: JSON.stringify(body),
|
|
67
74
|
signal: options.abortSignal,
|
|
68
75
|
});
|
|
69
|
-
await handleErrorResponse(response
|
|
70
|
-
//
|
|
71
|
-
|
|
72
|
-
const json = (await response.json());
|
|
76
|
+
await handleErrorResponse(response);
|
|
77
|
+
// Gate timestamp projection on caller opt-in, not the always-present audio_timestamps field.
|
|
78
|
+
const json = synthesizeResponseSchema.parse(await response.json());
|
|
73
79
|
const timestamps = options.includeTimestamps && json.audio_timestamps
|
|
74
80
|
? audioTimestampsToWordTimestamps(json.audio_timestamps)
|
|
75
81
|
: undefined;
|
|
76
82
|
return {
|
|
77
83
|
audio: json.audio_content,
|
|
78
|
-
mediaType:
|
|
84
|
+
mediaType: resembleMediaType(body.output_format),
|
|
79
85
|
timestamps,
|
|
80
86
|
};
|
|
81
87
|
}
|
|
@@ -97,7 +103,7 @@ export class ResembleSpeechProvider {
|
|
|
97
103
|
body: JSON.stringify(body),
|
|
98
104
|
signal: options.abortSignal,
|
|
99
105
|
});
|
|
100
|
-
await handleErrorResponse(response
|
|
106
|
+
await handleErrorResponse(response);
|
|
101
107
|
if (!response.body) {
|
|
102
108
|
throw new Error(`resemble/${options.modelId}: response has no body`);
|
|
103
109
|
}
|
|
@@ -108,23 +114,57 @@ export class ResembleSpeechProvider {
|
|
|
108
114
|
}
|
|
109
115
|
getStitchOptions(modelId) {
|
|
110
116
|
if (this.models.some((m) => m.id === modelId)) {
|
|
111
|
-
// Resemble
|
|
112
|
-
// the stitch decoder doesn't accept; pin precision to PCM_16 so the
|
|
113
|
-
// returned WAV is 16-bit signed PCM.
|
|
117
|
+
// Pin precision to PCM_16 — Resemble defaults to PCM_32 (float WAV) which the stitch decoder rejects.
|
|
114
118
|
return {
|
|
115
119
|
providerOptions: { precision: "PCM_16" },
|
|
116
120
|
mediaType: "audio/wav",
|
|
117
121
|
};
|
|
118
122
|
}
|
|
119
|
-
return
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
resolveOutputFormat(modelId, output) {
|
|
126
|
+
if (!this.models.some((m) => m.id === modelId)) {
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
switch (output.format) {
|
|
130
|
+
case "wav":
|
|
131
|
+
// Pin precision to PCM_16 — Resemble defaults to PCM_32 (float WAV) which downstream decoders reject.
|
|
132
|
+
return {
|
|
133
|
+
providerOptions: { output_format: "wav", precision: "PCM_16" },
|
|
134
|
+
expectedMediaType: "audio/wav",
|
|
135
|
+
};
|
|
136
|
+
case "mp3":
|
|
137
|
+
return {
|
|
138
|
+
providerOptions: { output_format: "mp3" },
|
|
139
|
+
expectedMediaType: "audio/mpeg",
|
|
140
|
+
};
|
|
141
|
+
case "pcm":
|
|
142
|
+
// No native pcm container; request wav (PCM_16) and let the SDK unwrap via mediabunny.
|
|
143
|
+
return {
|
|
144
|
+
providerOptions: { output_format: "wav", precision: "PCM_16" },
|
|
145
|
+
expectedMediaType: "audio/wav",
|
|
146
|
+
};
|
|
147
|
+
default:
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
function resembleMediaType(outputFormat) {
|
|
153
|
+
switch (typeof outputFormat === "string" ? outputFormat.toLowerCase() : "") {
|
|
154
|
+
case "mp3":
|
|
155
|
+
return "audio/mpeg";
|
|
156
|
+
default:
|
|
157
|
+
return "audio/wav";
|
|
120
158
|
}
|
|
121
159
|
}
|
|
122
160
|
export function createResemble(config = {}) {
|
|
123
161
|
const provider = new ResembleSpeechProvider(config);
|
|
162
|
+
const fallbackSTT = config.fallbackSTT;
|
|
124
163
|
return function resemble(modelId) {
|
|
125
164
|
return {
|
|
126
165
|
provider,
|
|
127
166
|
modelId: modelId ?? provider.defaultModel,
|
|
167
|
+
...(fallbackSTT && { fallbackSTT }),
|
|
128
168
|
};
|
|
129
169
|
};
|
|
130
170
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/resemble/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AAQjC,OAAO,EACL,+BAA+B,EAC/B,6BAA6B,GAC9B,MAAM,gBAAgB,CAAC;AAExB,MAAM,wBAAwB,GAAG,CAAC,CAAC,MAAM,CAAC;IACxC,aAAa,EAAE,CAAC,CAAC,MAAM,EAAE;IACzB,gBAAgB,EAAE,6BAA6B,CAAC,QAAQ,EAAE;CAC3D,CAAC,CAAC;AASH,MAAM,CAAC,MAAM,oBAAoB,GAAG,UAAmB,CAAC;AAExD,MAAM,CAAC,MAAM,eAAe,GAAyB;IACnD;QACE,EAAE,EAAE,SAAS;QACb,WAAW,EAAE,YAAY;QACzB,SAAS,EAAE;YACT,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;SACL;QACD,QAAQ,EAAE;YACR,WAAW;YACX,aAAa;YACb,sBAAsB;YACtB,YAAY;SACb;KACF;CACO,CAAC;AAEX,MAAM,OAAO,sBAAsB;IACxB,EAAE,GAAG,oBAAoB,CAAC;IAC1B,YAAY,GAAG,SAAS,CAAC;IAEzB,MAAM,GAAG,eAAe,CAAC;IAEjB,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAoC;QAC9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,+BAA+B,CAAC;QACjE,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAQd;QAMC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,aAAa,CAAC;QAEzC,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,OAAO,CAAC,KAAK;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,aAAa,CAC1B,IAAI,CAAC,MAAM,EACX,kBAAkB,EAClB,UAAU,CACX;gBACD,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,6FAA6F;QAC7F,MAAM,IAAI,GAAG,wBAAwB,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;QAEnE,MAAM,UAAU,GACd,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC,gBAAgB;YAChD,CAAC,CAAC,+BAA+B,CAAC,IAAI,CAAC,gBAAgB,CAAC;YACxD,CAAC,CAAC,SAAS,CAAC;QAEhB,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,aAAa;YACzB,SAAS,EAAE,iBAAiB,CAAC,IAAI,CAAC,aAAa,CAAC;YAChD,UAAU;SACX,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAOZ;QAKC,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,OAAO,SAAS,CAAC;QAErC,MAAM,IAAI,GAA4B;YACpC,GAAG,OAAO,CAAC,eAAe;YAC1B,UAAU,EAAE,OAAO,CAAC,KAAK;YACzB,IAAI,EAAE,OAAO,CAAC,IAAI;SACnB,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE;YACvC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,aAAa,CAC1B,IAAI,CAAC,MAAM,EACX,kBAAkB,EAClB,UAAU,CACX;gBACD,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CAAC,CAAC;QAEH,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CAAC,YAAY,OAAO,CAAC,OAAO,wBAAwB,CAAC,CAAC;QACvE,CAAC;QAED,OAAO;YACL,MAAM,EAAE,QAAQ,CAAC,IAAI;YACrB,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW;SAC/D,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,sGAAsG;YACtG,OAAO;gBACL,eAAe,EAAE,EAAE,SAAS,EAAE,QAAQ,EAAE;gBACxC,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO;IACT,CAAC;IAED,mBAAmB,CAAC,OAAe,EAAE,MAAmB;QACtD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC/C,OAAO;QACT,CAAC;QACD,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;YACtB,KAAK,KAAK;gBACR,sGAAsG;gBACtG,OAAO;oBACL,eAAe,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE;oBAC9D,iBAAiB,EAAE,WAAW;iBAC/B,CAAC;YACJ,KAAK,KAAK;gBACR,OAAO;oBACL,eAAe,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE;oBACzC,iBAAiB,EAAE,YAAY;iBAChC,CAAC;YACJ,KAAK,KAAK;gBACR,uFAAuF;gBACvF,OAAO;oBACL,eAAe,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE;oBAC9D,iBAAiB,EAAE,WAAW;iBAC/B,CAAC;YACJ;gBACE,OAAO;QACX,CAAC;IACH,CAAC;CACF;AAED,SAAS,iBAAiB,CAAC,YAAqB;IAC9C,QAAQ,OAAO,YAAY,KAAK,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAC3E,KAAK,KAAK;YACR,OAAO,YAAY,CAAC;QACtB;YACE,OAAO,WAAW,CAAC;IACvB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,SAAuC,EAAE;IACtE,MAAM,QAAQ,GAAG,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC;IACpD,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;IAEvC,OAAO,SAAS,QAAQ,CAAC,OAAgB;QACvC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;YACzC,GAAG,CAAC,WAAW,IAAI,EAAE,WAAW,EAAE,CAAC;SACpC,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { AudioOutput } from "../../audio-output.js";
|
|
2
|
+
import type { ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
3
|
+
export interface SmallestAISpeechProviderConfig {
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
baseURL?: string;
|
|
6
|
+
fetch?: typeof globalThis.fetch;
|
|
7
|
+
}
|
|
8
|
+
export declare class SmallestAISpeechProvider implements SpeechProvider<string, string> {
|
|
9
|
+
readonly id = "smallest-ai";
|
|
10
|
+
readonly defaultModel = "lightning-v3.1";
|
|
11
|
+
readonly models: readonly [{
|
|
12
|
+
readonly id: "lightning-v3.1";
|
|
13
|
+
readonly releaseDate: "2025-01-01";
|
|
14
|
+
readonly languages: readonly ["en", "hi", "es", "ta"];
|
|
15
|
+
readonly features: readonly [];
|
|
16
|
+
}];
|
|
17
|
+
private readonly apiKey;
|
|
18
|
+
private readonly baseURL;
|
|
19
|
+
private readonly fetchFn;
|
|
20
|
+
constructor(config: SmallestAISpeechProviderConfig);
|
|
21
|
+
generate(options: {
|
|
22
|
+
modelId: string;
|
|
23
|
+
text: string;
|
|
24
|
+
voice?: string;
|
|
25
|
+
providerOptions?: Record<string, unknown>;
|
|
26
|
+
abortSignal?: AbortSignal;
|
|
27
|
+
headers?: Record<string, string>;
|
|
28
|
+
}): Promise<{
|
|
29
|
+
audio: Uint8Array;
|
|
30
|
+
mediaType: string;
|
|
31
|
+
}>;
|
|
32
|
+
getStitchOptions(modelId: string): {
|
|
33
|
+
providerOptions: {
|
|
34
|
+
output_format: string;
|
|
35
|
+
};
|
|
36
|
+
mediaType: string;
|
|
37
|
+
} | undefined;
|
|
38
|
+
resolveOutputFormat(modelId: string, output: AudioOutput): {
|
|
39
|
+
providerOptions: {
|
|
40
|
+
output_format: string;
|
|
41
|
+
sample_rate: number;
|
|
42
|
+
};
|
|
43
|
+
expectedMediaType: string;
|
|
44
|
+
} | undefined;
|
|
45
|
+
}
|
|
46
|
+
export declare function createSmallestAI(config?: SmallestAISpeechProviderConfig): (modelId?: string) => ResolvedModel<string>;
|
|
47
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/smallest-ai/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAMzD,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAE9E,MAAM,WAAW,8BAA8B;IAC7C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,qBAAa,wBACX,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IAEzC,QAAQ,CAAC,EAAE,iBAAiB;IAC5B,QAAQ,CAAC,YAAY,oBAAoB;IAEzC,QAAQ,CAAC,MAAM;;;;;OAOJ;IAEX,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,8BAA8B;IAM5C,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IAyCF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;IAUhC,mBAAmB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW;;;;;;;CAwBzD;AAgBD,wBAAgB,gBAAgB,CAAC,MAAM,GAAE,8BAAmC,IAE/C,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAMpE"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
|
|
2
|
+
export class SmallestAISpeechProvider {
|
|
3
|
+
id = "smallest-ai";
|
|
4
|
+
defaultModel = "lightning-v3.1";
|
|
5
|
+
models = [
|
|
6
|
+
{
|
|
7
|
+
id: "lightning-v3.1",
|
|
8
|
+
releaseDate: "2025-01-01",
|
|
9
|
+
languages: ["en", "hi", "es", "ta"],
|
|
10
|
+
features: [],
|
|
11
|
+
},
|
|
12
|
+
];
|
|
13
|
+
apiKey;
|
|
14
|
+
baseURL;
|
|
15
|
+
fetchFn;
|
|
16
|
+
constructor(config) {
|
|
17
|
+
this.apiKey = config.apiKey;
|
|
18
|
+
this.baseURL = config.baseURL ?? "https://api.smallest.ai/waves/v1";
|
|
19
|
+
this.fetchFn = config.fetch ?? globalThis.fetch.bind(globalThis);
|
|
20
|
+
}
|
|
21
|
+
async generate(options) {
|
|
22
|
+
const outputFormat = options.providerOptions?.output_format ?? "wav";
|
|
23
|
+
const body = {
|
|
24
|
+
voice_id: options.voice ?? "magnus",
|
|
25
|
+
language: "auto",
|
|
26
|
+
...options.providerOptions,
|
|
27
|
+
text: options.text,
|
|
28
|
+
output_format: outputFormat,
|
|
29
|
+
};
|
|
30
|
+
const response = await this.fetchFn(`${this.baseURL}/${options.modelId}/get_speech`, {
|
|
31
|
+
method: "POST",
|
|
32
|
+
headers: {
|
|
33
|
+
"Content-Type": "application/json",
|
|
34
|
+
Authorization: `Bearer ${resolveApiKey(this.apiKey, "SMALLEST_API_KEY", "Smallest AI")}`,
|
|
35
|
+
"X-User-Agent": SDK_USER_AGENT,
|
|
36
|
+
"X-Source": "jellypod-speech-sdk",
|
|
37
|
+
...options.headers,
|
|
38
|
+
},
|
|
39
|
+
body: JSON.stringify(body),
|
|
40
|
+
signal: options.abortSignal,
|
|
41
|
+
});
|
|
42
|
+
await handleErrorResponse(response);
|
|
43
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
44
|
+
const mediaType = response.headers.get("content-type") ??
|
|
45
|
+
smallestAIMediaType(outputFormat, body.sample_rate);
|
|
46
|
+
return {
|
|
47
|
+
audio: new Uint8Array(arrayBuffer),
|
|
48
|
+
mediaType,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
getStitchOptions(modelId) {
|
|
52
|
+
if (this.models.some((m) => m.id === modelId)) {
|
|
53
|
+
return {
|
|
54
|
+
providerOptions: { output_format: "wav" },
|
|
55
|
+
mediaType: "audio/wav",
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
resolveOutputFormat(modelId, output) {
|
|
61
|
+
if (!this.models.some((m) => m.id === modelId)) {
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
switch (output.format) {
|
|
65
|
+
case "wav":
|
|
66
|
+
return {
|
|
67
|
+
providerOptions: { output_format: "wav", sample_rate: 24_000 },
|
|
68
|
+
expectedMediaType: "audio/wav",
|
|
69
|
+
};
|
|
70
|
+
case "mp3":
|
|
71
|
+
return {
|
|
72
|
+
providerOptions: { output_format: "mp3", sample_rate: 24_000 },
|
|
73
|
+
expectedMediaType: "audio/mpeg",
|
|
74
|
+
};
|
|
75
|
+
case "pcm":
|
|
76
|
+
return {
|
|
77
|
+
providerOptions: { output_format: "pcm", sample_rate: 24_000 },
|
|
78
|
+
expectedMediaType: "audio/pcm;rate=24000",
|
|
79
|
+
};
|
|
80
|
+
default:
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
function smallestAIMediaType(format, sampleRate) {
|
|
86
|
+
const rate = typeof sampleRate === "number" ? sampleRate : 24_000;
|
|
87
|
+
switch (typeof format === "string" ? format.toLowerCase() : "wav") {
|
|
88
|
+
case "mp3":
|
|
89
|
+
return "audio/mpeg";
|
|
90
|
+
case "pcm":
|
|
91
|
+
return `audio/pcm;rate=${rate}`;
|
|
92
|
+
case "mulaw":
|
|
93
|
+
return "audio/basic";
|
|
94
|
+
default:
|
|
95
|
+
return "audio/wav";
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
export function createSmallestAI(config = {}) {
|
|
99
|
+
const provider = new SmallestAISpeechProvider(config);
|
|
100
|
+
return function smallestAI(modelId) {
|
|
101
|
+
return {
|
|
102
|
+
provider,
|
|
103
|
+
modelId: modelId ?? provider.defaultModel,
|
|
104
|
+
};
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/providers/smallest-ai/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AASjC,MAAM,OAAO,wBAAwB;IAG1B,EAAE,GAAG,aAAa,CAAC;IACnB,YAAY,GAAG,gBAAgB,CAAC;IAEhC,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,gBAAgB;YACpB,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAU;YAC5C,QAAQ,EAAE,EAAE;SACb;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,MAAsC;QAChD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,kCAAkC,CAAC;QACpE,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAOd;QAIC,MAAM,YAAY,GACf,OAAO,CAAC,eAAe,EAAE,aAAoC,IAAI,KAAK,CAAC;QAE1E,MAAM,IAAI,GAA4B;YACpC,QAAQ,EAAE,OAAO,CAAC,KAAK,IAAI,QAAQ;YACnC,QAAQ,EAAE,MAAM;YAChB,GAAG,OAAO,CAAC,eAAe;YAC1B,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,aAAa,EAAE,YAAY;SAC5B,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CACjC,GAAG,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,aAAa,EAC/C;YACE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,kBAAkB,EAAE,aAAa,CAAC,EAAE;gBACxF,cAAc,EAAE,cAAc;gBAC9B,UAAU,EAAE,qBAAqB;gBACjC,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CACF,CAAC;QAEF,MAAM,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QAEpC,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,SAAS,GACb,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;YACpC,mBAAmB,CAAC,YAAY,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;QAEtD,OAAO;YACL,KAAK,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;YAClC,SAAS;SACV,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,OAAe;QAC9B,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC9C,OAAO;gBACL,eAAe,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE;gBACzC,SAAS,EAAE,WAAW;aACvB,CAAC;QACJ,CAAC;QACD,OAAO;IACT,CAAC;IAED,mBAAmB,CAAC,OAAe,EAAE,MAAmB;QACtD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,OAAO,CAAC,EAAE,CAAC;YAC/C,OAAO;QACT,CAAC;QACD,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;YACtB,KAAK,KAAK;gBACR,OAAO;oBACL,eAAe,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE;oBAC9D,iBAAiB,EAAE,WAAW;iBAC/B,CAAC;YACJ,KAAK,KAAK;gBACR,OAAO;oBACL,eAAe,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE;oBAC9D,iBAAiB,EAAE,YAAY;iBAChC,CAAC;YACJ,KAAK,KAAK;gBACR,OAAO;oBACL,eAAe,EAAE,EAAE,aAAa,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE;oBAC9D,iBAAiB,EAAE,sBAAsB;iBAC1C,CAAC;YACJ;gBACE,OAAO;QACX,CAAC;IACH,CAAC;CACF;AAED,SAAS,mBAAmB,CAAC,MAAe,EAAE,UAAmB;IAC/D,MAAM,IAAI,GAAG,OAAO,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC;IAClE,QAAQ,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC;QAClE,KAAK,KAAK;YACR,OAAO,YAAY,CAAC;QACtB,KAAK,KAAK;YACR,OAAO,kBAAkB,IAAI,EAAE,CAAC;QAClC,KAAK,OAAO;YACV,OAAO,aAAa,CAAC;QACvB;YACE,OAAO,WAAW,CAAC;IACvB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,SAAyC,EAAE;IAC1E,MAAM,QAAQ,GAAG,IAAI,wBAAwB,CAAC,MAAM,CAAC,CAAC;IACtD,OAAO,SAAS,UAAU,CAAC,OAAgB;QACzC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -1,19 +1,18 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { AudioOutput } from "../../audio-output.js";
|
|
2
|
+
import type { ModelInfo, ResolvedModel, SpeechProvider } from "../../speech-provider.js";
|
|
3
|
+
import type { ResolvedSTTModel } from "../../speech-to-text-provider.js";
|
|
2
4
|
export interface XaiSpeechProviderConfig {
|
|
3
5
|
apiKey?: string;
|
|
4
6
|
baseURL?: string;
|
|
7
|
+
fallbackSTT?: ResolvedSTTModel;
|
|
5
8
|
fetch?: typeof globalThis.fetch;
|
|
6
9
|
}
|
|
10
|
+
export declare const XAI_PROVIDER_ID: "xai";
|
|
11
|
+
export declare const XAI_MODELS: readonly ModelInfo[];
|
|
7
12
|
export declare class XaiSpeechProvider implements SpeechProvider<string, string> {
|
|
8
|
-
readonly id
|
|
13
|
+
readonly id: "xai";
|
|
9
14
|
readonly defaultModel = "grok-tts";
|
|
10
|
-
|
|
11
|
-
readonly models: readonly [{
|
|
12
|
-
readonly id: "grok-tts";
|
|
13
|
-
readonly releaseDate: "2025-11-01";
|
|
14
|
-
readonly languages: readonly ["en", "ar", "bn", "zh", "fr", "de", "hi", "id", "it", "ja", "ko", "pt", "ru", "es", "tr", "vi"];
|
|
15
|
-
readonly features: readonly ["streaming", "audio-tags"];
|
|
16
|
-
}];
|
|
15
|
+
readonly models: readonly ModelInfo[];
|
|
17
16
|
private readonly apiKey;
|
|
18
17
|
private readonly baseURL;
|
|
19
18
|
private readonly fetchFn;
|
|
@@ -55,6 +54,23 @@ export declare class XaiSpeechProvider implements SpeechProvider<string, string>
|
|
|
55
54
|
};
|
|
56
55
|
mediaType: string;
|
|
57
56
|
} | undefined;
|
|
57
|
+
resolveOutputFormat(modelId: string, output: AudioOutput): {
|
|
58
|
+
providerOptions: {
|
|
59
|
+
output_format: {
|
|
60
|
+
codec: string;
|
|
61
|
+
sample_rate?: undefined;
|
|
62
|
+
};
|
|
63
|
+
};
|
|
64
|
+
expectedMediaType: string;
|
|
65
|
+
} | {
|
|
66
|
+
providerOptions: {
|
|
67
|
+
output_format: {
|
|
68
|
+
codec: string;
|
|
69
|
+
sample_rate: number;
|
|
70
|
+
};
|
|
71
|
+
};
|
|
72
|
+
expectedMediaType: string;
|
|
73
|
+
} | undefined;
|
|
58
74
|
}
|
|
59
75
|
export declare function createXai(config?: XaiSpeechProviderConfig): (modelId?: string) => ResolvedModel<string>;
|
|
60
76
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/xai/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/providers/xai/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAMzD,OAAO,KAAK,EACV,SAAS,EACT,aAAa,EACb,cAAc,EACf,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,WAAW,uBAAuB;IACtC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,gBAAgB,CAAC;IAC/B,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;CACjC;AAED,eAAO,MAAM,eAAe,EAAG,KAAc,CAAC;AAsB9C,eAAO,MAAM,UAAU,EAAE,SAAS,SAAS,EAOjC,CAAC;AAEX,qBAAa,iBAAkB,YAAW,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC;IACtE,QAAQ,CAAC,EAAE,QAAmB;IAC9B,QAAQ,CAAC,YAAY,cAAc;IAEnC,QAAQ,CAAC,MAAM,uBAAc;IAE7B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,MAAM,EAAE,uBAAuB;IAO3C,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,EAAE,CAAA;KAAE;IAIpE,OAAO,CAAC,SAAS;IAiBjB,OAAO,CAAC,iBAAiB;IAgBzB,OAAO,CAAC,aAAa;IAKf,QAAQ,CAAC,OAAO,EAAE;QACtB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IA2BI,MAAM,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,MAAM,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC1C,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,MAAM,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IA4BF,gBAAgB,CAAC,OAAO,EAAE,MAAM;;;;;;;;IAUhC,mBAAmB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW;;;;;;;;;;;;;;;;;CA0BzD;AAED,wBAAgB,SAAS,CAAC,MAAM,GAAE,uBAA4B,IAGxC,UAAU,MAAM,KAAG,aAAa,CAAC,MAAM,CAAC,CAO7D"}
|