@speech-sdk/core 0.6.2 → 0.8.0-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -21
- package/README.md +267 -264
- package/dist/__tests__/e2e/_save-audio.d.ts +5 -24
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +19 -42
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +3 -10
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-utils.d.ts +1 -9
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +10 -13
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +29 -0
- package/dist/captions.d.ts.map +1 -0
- package/dist/captions.js +193 -0
- package/dist/captions.js.map +1 -0
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -23
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +5 -43
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +3 -1
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +54 -13
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +1 -19
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +10 -0
- package/dist/derive-timestamps.d.ts.map +1 -0
- package/dist/derive-timestamps.js +24 -0
- package/dist/derive-timestamps.js.map +1 -0
- package/dist/errors.d.ts +20 -2
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +28 -2
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +5 -4
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +191 -38
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +2 -10
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +111 -33
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +5 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +2 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +29 -0
- package/dist/logger.js.map +1 -0
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/provider-utils.d.ts +3 -1
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +36 -39
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +8 -0
- package/dist/providers/cartesia/alignment.d.ts.map +1 -0
- package/dist/providers/cartesia/alignment.js +18 -0
- package/dist/providers/cartesia/alignment.js.map +1 -0
- package/dist/providers/cartesia/index.d.ts +11 -13
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +184 -61
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +7 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +17 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +10 -0
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -0
- package/dist/providers/elevenlabs/alignment.js +47 -0
- package/dist/providers/elevenlabs/alignment.js.map +1 -0
- package/dist/providers/elevenlabs/index.d.ts +10 -26
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +216 -154
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +7 -43
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +37 -86
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +7 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +23 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +68 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +236 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +7 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +161 -151
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +33 -0
- package/dist/providers/hume/alignment.d.ts.map +1 -0
- package/dist/providers/hume/alignment.js +37 -0
- package/dist/providers/hume/alignment.js.map +1 -0
- package/dist/providers/hume/index.d.ts +11 -13
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +105 -41
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +11 -0
- package/dist/providers/inworld/alignment.d.ts.map +1 -0
- package/dist/providers/inworld/alignment.js +24 -0
- package/dist/providers/inworld/alignment.js.map +1 -0
- package/dist/providers/inworld/index.d.ts +10 -14
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +55 -38
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +7 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +39 -38
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +13 -0
- package/dist/providers/murf/alignment.d.ts.map +1 -0
- package/dist/providers/murf/alignment.js +22 -0
- package/dist/providers/murf/alignment.js.map +1 -0
- package/dist/providers/murf/index.d.ts +11 -13
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +73 -56
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +36 -20
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +270 -102
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +11 -0
- package/dist/providers/resemble/alignment.d.ts.map +1 -0
- package/dist/providers/resemble/alignment.js +54 -0
- package/dist/providers/resemble/alignment.js.map +1 -0
- package/dist/providers/resemble/index.d.ts +10 -8
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +58 -40
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts +7 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +37 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +29 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +15 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +7 -59
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +19 -15
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +9 -14
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +5 -0
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +28 -0
- package/dist/speech-to-text-provider.d.ts.map +1 -0
- package/dist/speech-to-text-provider.js +2 -0
- package/dist/speech-to-text-provider.js.map +1 -0
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +2 -3
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +9 -0
- package/dist/timestamps.d.ts.map +1 -0
- package/dist/timestamps.js +2 -0
- package/dist/timestamps.js.map +1 -0
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +0 -6
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +12 -63
package/dist/speech-provider.js
CHANGED
|
@@ -1,24 +1,19 @@
|
|
|
1
|
-
|
|
1
|
+
import { SPEECH_GATEWAY_PROVIDER_ID, } from "./providers/gateway/index.js";
|
|
2
2
|
export const FEATURES = {
|
|
3
3
|
STREAMING: "streaming",
|
|
4
4
|
AUDIO_TAGS: "audio-tags",
|
|
5
5
|
INLINE_VOICE_CLONING: "inline-voice-cloning",
|
|
6
6
|
OPEN_SOURCE: "open-source",
|
|
7
|
+
TIMESTAMPS: "timestamps",
|
|
7
8
|
};
|
|
8
9
|
export function hasFeature(model, id) {
|
|
9
|
-
|
|
10
|
-
if (typeof f === "string" ? f === id : f.id === id) {
|
|
11
|
-
return true;
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
return false;
|
|
10
|
+
return model.features.some((f) => typeof f === "string" ? f === id : f.id === id);
|
|
15
11
|
}
|
|
16
|
-
export function
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
return undefined;
|
|
12
|
+
export function isSpeechGatewayModel(model) {
|
|
13
|
+
return model.provider.id === SPEECH_GATEWAY_PROVIDER_ID;
|
|
14
|
+
}
|
|
15
|
+
export function modelDeclaresNativeTimestamps(resolved) {
|
|
16
|
+
const modelInfo = resolved.provider.models?.find((m) => m.id === resolved.modelId);
|
|
17
|
+
return modelInfo != null && hasFeature(modelInfo, FEATURES.TIMESTAMPS);
|
|
23
18
|
}
|
|
24
19
|
//# sourceMappingURL=speech-provider.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-provider.js","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"speech-provider.js","sourceRoot":"","sources":["../src/speech-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,0BAA0B,GAE3B,MAAM,8BAA8B,CAAC;AAoBtC,MAAM,CAAC,MAAM,QAAQ,GAAG;IACtB,SAAS,EAAE,WAAW;IACtB,UAAU,EAAE,YAAY;IACxB,oBAAoB,EAAE,sBAAsB;IAC5C,WAAW,EAAE,aAAa;IAC1B,UAAU,EAAE,YAAY;CAChB,CAAC;AAEX,MAAM,UAAU,UAAU,CAAC,KAAgB,EAAE,EAAU;IACrD,OAAO,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAC/B,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAC/C,CAAC;AACJ,CAAC;AA8ED,MAAM,UAAU,oBAAoB,CAClC,KAAuB;IAEvB,OAAO,KAAK,CAAC,QAAQ,CAAC,EAAE,KAAK,0BAA0B,CAAC;AAC1D,CAAC;AAED,MAAM,UAAU,6BAA6B,CAC3C,QAAuB;IAEvB,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAC9C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,OAAO,SAAS,IAAI,IAAI,IAAI,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,UAAU,CAAC,CAAC;AACzE,CAAC"}
|
package/dist/speech-result.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { SpeechMetadata } from "./metadata.js";
|
|
2
|
+
import type { ConversationWordTimestamp, WordTimestamp } from "./timestamps.js";
|
|
2
3
|
export interface GeneratedAudioFile {
|
|
3
4
|
readonly base64: string;
|
|
4
5
|
readonly mediaType: string;
|
|
@@ -8,8 +9,12 @@ export interface SpeechResult {
|
|
|
8
9
|
readonly audio: GeneratedAudioFile;
|
|
9
10
|
readonly metadata: SpeechMetadata;
|
|
10
11
|
readonly providerMetadata?: Record<string, unknown>;
|
|
12
|
+
readonly timestamps?: readonly WordTimestamp[];
|
|
11
13
|
readonly warnings?: string[];
|
|
12
14
|
}
|
|
15
|
+
export interface ConversationResult extends Omit<SpeechResult, "timestamps"> {
|
|
16
|
+
readonly timestamps?: readonly ConversationWordTimestamp[];
|
|
17
|
+
}
|
|
13
18
|
export declare class DefaultGeneratedAudioFile implements GeneratedAudioFile {
|
|
14
19
|
readonly mediaType: string;
|
|
15
20
|
private readonly _data;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-result.d.ts","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"speech-result.d.ts","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,OAAO,KAAK,EAAE,yBAAyB,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhF,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,KAAK,EAAE,kBAAkB,CAAC;IACnC,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAClC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACpD,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IAC/C,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAmB,SAAQ,IAAI,CAAC,YAAY,EAAE,YAAY,CAAC;IAC1E,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,yBAAyB,EAAE,CAAC;CAC5D;AAED,qBAAa,yBAA0B,YAAW,kBAAkB;IAClE,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAE3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsB;IAC5C,OAAO,CAAC,WAAW,CAAC,CAAa;IACjC,OAAO,CAAC,OAAO,CAAC,CAAS;gBAEb,EACV,IAAI,EACJ,SAAS,GACV,EAAE;QAAE,IAAI,EAAE,MAAM,GAAG,UAAU,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE;IAKnD,IAAI,UAAU,IAAI,UAAU,CAe3B;IAED,IAAI,MAAM,IAAI,MAAM,CAcnB;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"speech-result.js","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"speech-result.js","sourceRoot":"","sources":["../src/speech-result.ts"],"names":[],"mappings":"AAqBA,MAAM,OAAO,yBAAyB;IAC3B,SAAS,CAAS;IAEV,KAAK,CAAsB;IACpC,WAAW,CAAc;IACzB,OAAO,CAAU;IAEzB,YAAY,EACV,IAAI,EACJ,SAAS,GACwC;QACjD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED,IAAI,UAAU;QACZ,IAAI,IAAI,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7B,OAAO,IAAI,CAAC,WAAW,CAAC;QAC1B,CAAC;QACD,IAAI,IAAI,CAAC,KAAK,YAAY,UAAU,EAAE,CAAC;YACrC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC;QAChC,CAAC;aAAM,CAAC;YACN,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACtC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACxC,CAAC;YACD,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;QAC3B,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,IAAI,MAAM;QACR,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,OAAO,CAAC;QACtB,CAAC;QACD,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,IAAI,YAAY,GAAG,EAAE,CAAC;YACtB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC9B,YAAY,IAAI,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YAC5C,CAAC;YACD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC;QACpC,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { WordTimestamp } from "./timestamps.js";
|
|
2
|
+
export interface STTModelInfo {
|
|
3
|
+
readonly id: string;
|
|
4
|
+
readonly languages: readonly string[];
|
|
5
|
+
readonly releaseDate: string;
|
|
6
|
+
}
|
|
7
|
+
export interface SpeechToTextProvider {
|
|
8
|
+
readonly defaultModel: string;
|
|
9
|
+
readonly id: string;
|
|
10
|
+
readonly models: readonly STTModelInfo[];
|
|
11
|
+
transcribe(options: {
|
|
12
|
+
modelId: string;
|
|
13
|
+
audio: Uint8Array;
|
|
14
|
+
mediaType: string;
|
|
15
|
+
language?: string;
|
|
16
|
+
abortSignal?: AbortSignal;
|
|
17
|
+
headers?: Record<string, string>;
|
|
18
|
+
}): Promise<{
|
|
19
|
+
timestamps: WordTimestamp[];
|
|
20
|
+
text?: string;
|
|
21
|
+
providerMetadata?: Record<string, unknown>;
|
|
22
|
+
}>;
|
|
23
|
+
}
|
|
24
|
+
export interface ResolvedSTTModel {
|
|
25
|
+
readonly modelId: string;
|
|
26
|
+
readonly provider: SpeechToTextProvider;
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=speech-to-text-provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"speech-to-text-provider.d.ts","sourceRoot":"","sources":["../src/speech-to-text-provider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAErD,MAAM,WAAW,YAAY;IAC3B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IACtC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,MAAM,EAAE,SAAS,YAAY,EAAE,CAAC;IAEzC,UAAU,CAAC,OAAO,EAAE;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE,UAAU,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,WAAW,CAAC,EAAE,WAAW,CAAC;QAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAClC,GAAG,OAAO,CAAC;QACV,UAAU,EAAE,aAAa,EAAE,CAAC;QAC5B,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC5C,CAAC,CAAC;CACJ;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,QAAQ,EAAE,oBAAoB,CAAC;CACzC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"speech-to-text-provider.js","sourceRoot":"","sources":["../src/speech-to-text-provider.ts"],"names":[],"mappings":""}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stream-speech.d.ts","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"stream-speech.d.ts","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAUA,OAAO,EAGL,KAAK,aAAa,EAClB,KAAK,KAAK,EACX,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAEpE,wBAAsB,YAAY,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK,EAAE,OAAO,EAAE;IACnE,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;IACT,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC,GAAG,OAAO,CAAC,kBAAkB,CAAC,CA0F9B"}
|
package/dist/stream-speech.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import pRetry from "p-retry";
|
|
2
2
|
import { detectAudioTags, stripAudioTags } from "./audio-tags.js";
|
|
3
3
|
import { ApiError, NoSpeechGeneratedError, StreamingNotSupportedError, } from "./errors.js";
|
|
4
|
+
import { isRetriableApiError } from "./provider-utils.js";
|
|
4
5
|
import { resolveModel } from "./resolve-provider.js";
|
|
5
6
|
import { FEATURES, hasFeature, } from "./speech-provider.js";
|
|
6
7
|
export async function streamSpeech(options) {
|
|
@@ -48,7 +49,7 @@ export async function streamSpeech(options) {
|
|
|
48
49
|
retries: maxRetries,
|
|
49
50
|
signal: abortSignal,
|
|
50
51
|
shouldRetry: ({ error }) => {
|
|
51
|
-
if (error instanceof ApiError && error
|
|
52
|
+
if (error instanceof ApiError && !isRetriableApiError(error)) {
|
|
52
53
|
return false;
|
|
53
54
|
}
|
|
54
55
|
return true;
|
|
@@ -59,8 +60,6 @@ export async function streamSpeech(options) {
|
|
|
59
60
|
latencyMs: ttfbMs,
|
|
60
61
|
ttfbMs,
|
|
61
62
|
inputChars: processedText.length,
|
|
62
|
-
provider: resolved.provider.id,
|
|
63
|
-
model: resolved.modelId,
|
|
64
63
|
...(result.audioDurationMs != null && {
|
|
65
64
|
audioDurationMs: result.audioDurationMs,
|
|
66
65
|
}),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stream-speech.js","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,aAAa,CAAC;AAErB,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,UAAU,GAGX,MAAM,sBAAsB,CAAC;AAG9B,MAAM,CAAC,KAAK,UAAU,YAAY,CAA0B,OAS3D;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,IAAI,SAAS,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IACD,IAAI,OAAO,QAAQ,CAAC,QAAQ,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;QACnD,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IAED,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAElE,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC;QACP,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;KACR,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,
|
|
1
|
+
{"version":3,"file":"stream-speech.js","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,aAAa,CAAC;AAErB,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,UAAU,GAGX,MAAM,sBAAsB,CAAC;AAG9B,MAAM,CAAC,KAAK,UAAU,YAAY,CAA0B,OAS3D;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,IAAI,SAAS,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IACD,IAAI,OAAO,QAAQ,CAAC,QAAQ,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;QACnD,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IAED,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAElE,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC;QACP,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;KACR,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7D,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IAEzD,MAAM,QAAQ,GAAmB;QAC/B,SAAS,EAAE,MAAM;QACjB,MAAM;QACN,UAAU,EAAE,aAAa,CAAC,MAAM;QAChC,GAAG,CAAC,MAAM,CAAC,eAAe,IAAI,IAAI,IAAI;YACpC,eAAe,EAAE,MAAM,CAAC,eAAe;SACxC,CAAC;KACH,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,MAAM;QACpB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;KACrD,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"timestamps.d.ts","sourceRoot":"","sources":["../src/timestamps.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,yBAA0B,SAAQ,aAAa;IAC9D,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"timestamps.js","sourceRoot":"","sources":["../src/timestamps.ts"],"names":[],"mappings":""}
|
package/dist/turns.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { ConversationWordTimestamp } from "./timestamps.js";
|
|
2
|
+
export interface TurnTimestamp {
|
|
3
|
+
readonly end: number;
|
|
4
|
+
readonly start: number;
|
|
5
|
+
readonly text: string;
|
|
6
|
+
readonly turnIndex: number;
|
|
7
|
+
}
|
|
8
|
+
export declare function timestampsToTurns(timestamps: readonly ConversationWordTimestamp[]): readonly TurnTimestamp[];
|
|
9
|
+
//# sourceMappingURL=turns.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"turns.d.ts","sourceRoot":"","sources":["../src/turns.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,iBAAiB,CAAC;AAEjE,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAKD,wBAAgB,iBAAiB,CAC/B,UAAU,EAAE,SAAS,yBAAyB,EAAE,GAC/C,SAAS,aAAa,EAAE,CAiB1B"}
|
package/dist/turns.js
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// Assumes turnIndex runs are monotonic; non-adjacent runs of the same turnIndex would produce duplicate entries.
|
|
2
|
+
export function timestampsToTurns(timestamps) {
|
|
3
|
+
const turns = [];
|
|
4
|
+
for (const word of timestamps) {
|
|
5
|
+
const last = turns.at(-1);
|
|
6
|
+
if (last && last.turnIndex === word.turnIndex) {
|
|
7
|
+
last.end = word.end;
|
|
8
|
+
last.text = `${last.text} ${word.text}`;
|
|
9
|
+
}
|
|
10
|
+
else {
|
|
11
|
+
turns.push({
|
|
12
|
+
turnIndex: word.turnIndex,
|
|
13
|
+
start: word.start,
|
|
14
|
+
end: word.end,
|
|
15
|
+
text: word.text,
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return turns;
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=turns.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"turns.js","sourceRoot":"","sources":["../src/turns.ts"],"names":[],"mappings":"AAWA,iHAAiH;AACjH,MAAM,UAAU,iBAAiB,CAC/B,UAAgD;IAEhD,MAAM,KAAK,GAA6B,EAAE,CAAC;IAC3C,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,IAAI,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YAC9C,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC;YACpB,IAAI,CAAC,IAAI,GAAG,GAAG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAC1C,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC;gBACT,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,IAAI,EAAE,IAAI,CAAC,IAAI;aAChB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,4 +1,27 @@
|
|
|
1
1
|
import type { ResolvedModel, Voice } from "./speech-provider.js";
|
|
2
|
+
export type { CaptionFormat, CaptionsOptions } from "./captions.js";
|
|
3
|
+
export type { ConversationTurn, GenerateConversationOptions, } from "./conversation/types.js";
|
|
4
|
+
export type { SpeechMetadata } from "./metadata.js";
|
|
5
|
+
export type { CartesiaSpeechProviderConfig } from "./providers/cartesia/index.js";
|
|
6
|
+
export type { DeepgramSpeechProviderConfig } from "./providers/deepgram/index.js";
|
|
7
|
+
export type { ElevenLabsSpeechProviderConfig } from "./providers/elevenlabs/index.js";
|
|
8
|
+
export type { FalSpeechProviderConfig } from "./providers/fal/index.js";
|
|
9
|
+
export type { FishAudioSpeechProviderConfig } from "./providers/fish-audio/index.js";
|
|
10
|
+
export type { SpeechGatewayProviderConfig } from "./providers/gateway/index.js";
|
|
11
|
+
export type { GoogleSpeechProviderConfig } from "./providers/google/index.js";
|
|
12
|
+
export type { HumeSpeechProviderConfig } from "./providers/hume/index.js";
|
|
13
|
+
export type { InworldSpeechProviderConfig } from "./providers/inworld/index.js";
|
|
14
|
+
export type { MistralSpeechProviderConfig } from "./providers/mistral/index.js";
|
|
15
|
+
export type { MurfSpeechProviderConfig } from "./providers/murf/index.js";
|
|
16
|
+
export type { OpenAISpeechProviderConfig } from "./providers/openai/index.js";
|
|
17
|
+
export type { ResembleSpeechProviderConfig } from "./providers/resemble/index.js";
|
|
18
|
+
export type { XaiSpeechProviderConfig } from "./providers/xai/index.js";
|
|
19
|
+
export type { Feature, ModelInfo, ResolvedModel, SpeechProvider, Voice, } from "./speech-provider.js";
|
|
20
|
+
export type { ConversationResult, GeneratedAudioFile, SpeechResult, } from "./speech-result.js";
|
|
21
|
+
export type { ResolvedSTTModel, SpeechToTextProvider, STTModelInfo, } from "./speech-to-text-provider.js";
|
|
22
|
+
export type { StreamSpeechResult } from "./stream-speech-result.js";
|
|
23
|
+
export type { ConversationWordTimestamp, WordTimestamp, } from "./timestamps.js";
|
|
24
|
+
export type { TurnTimestamp } from "./turns.js";
|
|
2
25
|
export interface GenerateSpeechOptions<V extends Voice = Voice> {
|
|
3
26
|
abortSignal?: AbortSignal;
|
|
4
27
|
apiKey?: string;
|
|
@@ -7,6 +30,8 @@ export interface GenerateSpeechOptions<V extends Voice = Voice> {
|
|
|
7
30
|
model: string | ResolvedModel<V>;
|
|
8
31
|
providerOptions?: Record<string, unknown>;
|
|
9
32
|
text: string;
|
|
33
|
+
timestamps?: boolean;
|
|
10
34
|
voice: V;
|
|
35
|
+
volumeDbfs?: number;
|
|
11
36
|
}
|
|
12
37
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAEjE,MAAM,WAAW,qBAAqB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC5D,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAEjE,YAAY,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACpE,YAAY,EACV,gBAAgB,EAChB,2BAA2B,GAC5B,MAAM,yBAAyB,CAAC;AACjC,YAAY,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,8BAA8B,EAAE,MAAM,iCAAiC,CAAC;AACtF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EAAE,6BAA6B,EAAE,MAAM,iCAAiC,CAAC;AACrF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EACV,OAAO,EACP,SAAS,EACT,aAAa,EACb,cAAc,EACd,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,kBAAkB,EAClB,kBAAkB,EAClB,YAAY,GACb,MAAM,oBAAoB,CAAC;AAC5B,YAAY,EACV,gBAAgB,EAChB,oBAAoB,EACpB,YAAY,GACb,MAAM,8BAA8B,CAAC;AACtC,YAAY,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,YAAY,EACV,yBAAyB,EACzB,aAAa,GACd,MAAM,iBAAiB,CAAC;AACzB,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD,MAAM,WAAW,qBAAqB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC5D,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,KAAK,EAAE,CAAC,CAAC;IACT,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
|
package/dist/volume-adjust.d.ts
CHANGED
|
@@ -3,12 +3,6 @@ interface AdjustVolumeInput {
|
|
|
3
3
|
readonly mediaType: string;
|
|
4
4
|
readonly volumeDbfs: number;
|
|
5
5
|
}
|
|
6
|
-
/**
|
|
7
|
-
* Decode the provider's PCM/WAV output, RMS-normalize to the target dBFS,
|
|
8
|
-
* and re-encode as 16-bit mono WAV. Lazy-loaded by generateSpeech only when
|
|
9
|
-
* `volumeDbfs` is set so callers that never use volume adjustment don't pay
|
|
10
|
-
* for the WAV mux dependency chain at import time.
|
|
11
|
-
*/
|
|
12
6
|
export declare function adjustVolume(input: AdjustVolumeInput): Promise<Uint8Array>;
|
|
13
7
|
export {};
|
|
14
8
|
//# sourceMappingURL=volume-adjust.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"volume-adjust.d.ts","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAOA,UAAU,iBAAiB;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;IACpC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED
|
|
1
|
+
{"version":3,"file":"volume-adjust.d.ts","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAOA,UAAU,iBAAiB;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;IACpC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,wBAAsB,YAAY,CAChC,KAAK,EAAE,iBAAiB,GACvB,OAAO,CAAC,UAAU,CAAC,CAgBrB"}
|
package/dist/volume-adjust.js
CHANGED
|
@@ -1,10 +1,4 @@
|
|
|
1
1
|
import { concatPcmToWav, dbfsToInt16Rms, decodeToPcm16, normalizeRms, } from "./conversation/pcm-concat.js";
|
|
2
|
-
/**
|
|
3
|
-
* Decode the provider's PCM/WAV output, RMS-normalize to the target dBFS,
|
|
4
|
-
* and re-encode as 16-bit mono WAV. Lazy-loaded by generateSpeech only when
|
|
5
|
-
* `volumeDbfs` is set so callers that never use volume adjustment don't pay
|
|
6
|
-
* for the WAV mux dependency chain at import time.
|
|
7
|
-
*/
|
|
8
2
|
export async function adjustVolume(input) {
|
|
9
3
|
const bytes = input.audio instanceof Uint8Array
|
|
10
4
|
? input.audio
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"volume-adjust.js","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,YAAY,GACb,MAAM,8BAA8B,CAAC;AAQtC
|
|
1
|
+
{"version":3,"file":"volume-adjust.js","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,YAAY,GACb,MAAM,8BAA8B,CAAC;AAQtC,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,KAAwB;IAExB,MAAM,KAAK,GACT,KAAK,CAAC,KAAK,YAAY,UAAU;QAC/B,CAAC,CAAC,KAAK,CAAC,KAAK;QACb,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,OAAO,GAAG,aAAa,CAAC,KAAK,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IACtD,MAAM,CAAC,UAAU,CAAC,GAAG,YAAY,CAC/B,CAAC,OAAO,CAAC,EACT,cAAc,CAAC,KAAK,CAAC,UAAU,CAAC,CACjC,CAAC;IAEF,OAAO,MAAM,cAAc,CAAC,CAAC,UAAU,CAAC,EAAE;QACxC,KAAK,EAAE,CAAC;QACR,gBAAgB,EAAE,UAAU,CAAC,UAAU;KACxC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,GAAG,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@speech-sdk/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0-alpha",
|
|
4
4
|
"description": "Universal, cross-platform text-to-speech SDK with multi-provider support.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -10,65 +10,13 @@
|
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
11
11
|
"default": "./dist/index.js"
|
|
12
12
|
},
|
|
13
|
-
"./
|
|
14
|
-
"types": "./dist/
|
|
15
|
-
"default": "./dist/
|
|
13
|
+
"./providers": {
|
|
14
|
+
"types": "./dist/providers.d.ts",
|
|
15
|
+
"default": "./dist/providers.js"
|
|
16
16
|
},
|
|
17
|
-
"./
|
|
18
|
-
"types": "./dist/
|
|
19
|
-
"default": "./dist/
|
|
20
|
-
},
|
|
21
|
-
"./openai": {
|
|
22
|
-
"types": "./dist/providers/openai/index.d.ts",
|
|
23
|
-
"default": "./dist/providers/openai/index.js"
|
|
24
|
-
},
|
|
25
|
-
"./elevenlabs": {
|
|
26
|
-
"types": "./dist/providers/elevenlabs/index.d.ts",
|
|
27
|
-
"default": "./dist/providers/elevenlabs/index.js"
|
|
28
|
-
},
|
|
29
|
-
"./deepgram": {
|
|
30
|
-
"types": "./dist/providers/deepgram/index.d.ts",
|
|
31
|
-
"default": "./dist/providers/deepgram/index.js"
|
|
32
|
-
},
|
|
33
|
-
"./cartesia": {
|
|
34
|
-
"types": "./dist/providers/cartesia/index.d.ts",
|
|
35
|
-
"default": "./dist/providers/cartesia/index.js"
|
|
36
|
-
},
|
|
37
|
-
"./hume": {
|
|
38
|
-
"types": "./dist/providers/hume/index.d.ts",
|
|
39
|
-
"default": "./dist/providers/hume/index.js"
|
|
40
|
-
},
|
|
41
|
-
"./inworld": {
|
|
42
|
-
"types": "./dist/providers/inworld/index.d.ts",
|
|
43
|
-
"default": "./dist/providers/inworld/index.js"
|
|
44
|
-
},
|
|
45
|
-
"./google": {
|
|
46
|
-
"types": "./dist/providers/google/index.d.ts",
|
|
47
|
-
"default": "./dist/providers/google/index.js"
|
|
48
|
-
},
|
|
49
|
-
"./fish-audio": {
|
|
50
|
-
"types": "./dist/providers/fish-audio/index.d.ts",
|
|
51
|
-
"default": "./dist/providers/fish-audio/index.js"
|
|
52
|
-
},
|
|
53
|
-
"./murf": {
|
|
54
|
-
"types": "./dist/providers/murf/index.d.ts",
|
|
55
|
-
"default": "./dist/providers/murf/index.js"
|
|
56
|
-
},
|
|
57
|
-
"./resemble": {
|
|
58
|
-
"types": "./dist/providers/resemble/index.d.ts",
|
|
59
|
-
"default": "./dist/providers/resemble/index.js"
|
|
60
|
-
},
|
|
61
|
-
"./fal-ai": {
|
|
62
|
-
"types": "./dist/providers/fal/index.d.ts",
|
|
63
|
-
"default": "./dist/providers/fal/index.js"
|
|
64
|
-
},
|
|
65
|
-
"./mistral": {
|
|
66
|
-
"types": "./dist/providers/mistral/index.d.ts",
|
|
67
|
-
"default": "./dist/providers/mistral/index.js"
|
|
68
|
-
},
|
|
69
|
-
"./xai": {
|
|
70
|
-
"types": "./dist/providers/xai/index.d.ts",
|
|
71
|
-
"default": "./dist/providers/xai/index.js"
|
|
17
|
+
"./types": {
|
|
18
|
+
"types": "./dist/types.d.ts",
|
|
19
|
+
"default": "./dist/types.js"
|
|
72
20
|
}
|
|
73
21
|
},
|
|
74
22
|
"files": [
|
|
@@ -84,21 +32,22 @@
|
|
|
84
32
|
"inworld",
|
|
85
33
|
"ai"
|
|
86
34
|
],
|
|
87
|
-
"license": "
|
|
35
|
+
"license": "Apache-2.0",
|
|
88
36
|
"repository": {
|
|
89
37
|
"type": "git",
|
|
90
38
|
"url": "https://github.com/Jellypod-Inc/speech-sdk"
|
|
91
39
|
},
|
|
92
40
|
"dependencies": {
|
|
93
41
|
"mediabunny": "^1.40.1",
|
|
94
|
-
"p-retry": "^8.0.0"
|
|
42
|
+
"p-retry": "^8.0.0",
|
|
43
|
+
"zod": "^4.3.6"
|
|
95
44
|
},
|
|
96
45
|
"devDependencies": {
|
|
97
|
-
"@biomejs/biome": "2.4.
|
|
46
|
+
"@biomejs/biome": "2.4.13",
|
|
98
47
|
"@types/node": "^25.5.0",
|
|
99
48
|
"dotenv": "^17.3.1",
|
|
100
49
|
"typescript": "^5.8.0",
|
|
101
|
-
"ultracite": "7.
|
|
50
|
+
"ultracite": "7.6.2",
|
|
102
51
|
"vite": "^7.3.2",
|
|
103
52
|
"vitest": "^4.1.3"
|
|
104
53
|
},
|