@juspay/neurolink 9.66.0 → 9.67.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +12 -12
- package/dist/avatar/index.d.ts +13 -0
- package/dist/avatar/index.js +72 -0
- package/dist/browser/neurolink.min.js +389 -383
- package/dist/core/baseProvider.js +49 -8
- package/dist/factories/providerRegistry.js +23 -0
- package/dist/index.d.ts +10 -1
- package/dist/index.js +36 -1
- package/dist/lib/avatar/index.d.ts +13 -0
- package/dist/lib/avatar/index.js +72 -0
- package/dist/lib/core/baseProvider.js +49 -8
- package/dist/lib/factories/providerRegistry.js +23 -0
- package/dist/lib/files/fileTools.d.ts +1 -1
- package/dist/lib/index.d.ts +10 -1
- package/dist/lib/index.js +36 -1
- package/dist/lib/music/index.d.ts +14 -0
- package/dist/lib/music/index.js +80 -0
- package/dist/lib/providers/openaiCompatible.d.ts +46 -19
- package/dist/lib/providers/openaiCompatible.js +1069 -171
- package/dist/lib/types/avatar.d.ts +8 -1
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +1 -0
- package/dist/lib/types/middleware.d.ts +1 -1
- package/dist/lib/types/multimodal.d.ts +20 -7
- package/dist/lib/types/music.d.ts +8 -1
- package/dist/lib/types/openaiCompatible.d.ts +250 -0
- package/dist/lib/types/openaiCompatible.js +2 -0
- package/dist/lib/types/tts.d.ts +9 -1
- package/dist/lib/utils/avatarProcessor.d.ts +7 -1
- package/dist/lib/utils/avatarProcessor.js +6 -0
- package/dist/lib/utils/musicProcessor.d.ts +7 -1
- package/dist/lib/utils/musicProcessor.js +6 -0
- package/dist/lib/utils/parameterValidation.js +5 -1
- package/dist/lib/utils/sttProcessor.d.ts +5 -3
- package/dist/lib/utils/sttProcessor.js +4 -2
- package/dist/lib/utils/ttsProcessor.d.ts +6 -3
- package/dist/lib/utils/ttsProcessor.js +5 -2
- package/dist/lib/voice/RealtimeVoiceAPI.d.ts +5 -2
- package/dist/lib/voice/RealtimeVoiceAPI.js +4 -1
- package/dist/lib/voice/index.d.ts +23 -0
- package/dist/lib/voice/index.js +124 -2
- package/dist/lib/voice/providers/CartesiaTTS.d.ts +31 -0
- package/dist/lib/voice/providers/CartesiaTTS.js +189 -0
- package/dist/lib/workflow/config.d.ts +3 -3
- package/dist/music/index.d.ts +14 -0
- package/dist/music/index.js +80 -0
- package/dist/providers/openaiCompatible.d.ts +46 -19
- package/dist/providers/openaiCompatible.js +1069 -171
- package/dist/types/avatar.d.ts +8 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/middleware.d.ts +1 -1
- package/dist/types/multimodal.d.ts +20 -7
- package/dist/types/music.d.ts +8 -1
- package/dist/types/openaiCompatible.d.ts +250 -0
- package/dist/types/openaiCompatible.js +1 -0
- package/dist/types/tts.d.ts +9 -1
- package/dist/utils/avatarProcessor.d.ts +7 -1
- package/dist/utils/avatarProcessor.js +6 -0
- package/dist/utils/musicProcessor.d.ts +7 -1
- package/dist/utils/musicProcessor.js +6 -0
- package/dist/utils/parameterValidation.js +5 -1
- package/dist/utils/sttProcessor.d.ts +5 -3
- package/dist/utils/sttProcessor.js +4 -2
- package/dist/utils/ttsProcessor.d.ts +6 -3
- package/dist/utils/ttsProcessor.js +5 -2
- package/dist/voice/RealtimeVoiceAPI.d.ts +5 -2
- package/dist/voice/RealtimeVoiceAPI.js +4 -1
- package/dist/voice/index.d.ts +23 -0
- package/dist/voice/index.js +124 -2
- package/dist/voice/providers/CartesiaTTS.d.ts +31 -0
- package/dist/voice/providers/CartesiaTTS.js +188 -0
- package/package.json +66 -2
package/dist/lib/voice/index.js
CHANGED
|
@@ -8,8 +8,17 @@
|
|
|
8
8
|
* Use STTProcessor (src/lib/utils/sttProcessor.ts) for STT.
|
|
9
9
|
* Use RealtimeProcessor for realtime voice sessions.
|
|
10
10
|
*
|
|
11
|
+
* Importing this module also auto-registers every shipped TTS / STT /
|
|
12
|
+
* Realtime handler whose backing API key is present in `process.env`.
|
|
13
|
+
* Registration is idempotent and silently skipped on failure.
|
|
14
|
+
*
|
|
11
15
|
* @module voice
|
|
12
16
|
*/
|
|
17
|
+
import { logger } from "../utils/logger.js";
|
|
18
|
+
import { STTProcessor } from "../utils/sttProcessor.js";
|
|
19
|
+
import { TTSProcessor } from "../utils/ttsProcessor.js";
|
|
20
|
+
import { GoogleTTSHandler } from "../adapters/tts/googleTTSHandler.js";
|
|
21
|
+
import { RealtimeProcessor } from "./RealtimeVoiceAPI.js";
|
|
13
22
|
// ============================================================================
|
|
14
23
|
// ERROR CODES AND CONSTANTS
|
|
15
24
|
// ============================================================================
|
|
@@ -35,21 +44,134 @@ export { asyncIterableToStream, ChunkedAudioStream, StreamHandler, StreamMerger,
|
|
|
35
44
|
// ============================================================================
|
|
36
45
|
// TTS PROVIDERS
|
|
37
46
|
// ============================================================================
|
|
47
|
+
export { GoogleTTSHandler } from "../adapters/tts/googleTTSHandler.js";
|
|
38
48
|
export { AzureTTS, AzureTTS as AzureTTSHandler } from "./providers/AzureTTS.js";
|
|
49
|
+
export { CartesiaTTS, CartesiaTTS as CartesiaTTSHandler, } from "./providers/CartesiaTTS.js";
|
|
39
50
|
export { ElevenLabsTTS, ElevenLabsTTS as ElevenLabsTTSHandler, } from "./providers/ElevenLabsTTS.js";
|
|
51
|
+
export { FishAudioTTS, FishAudioTTS as FishAudioTTSHandler, } from "./providers/FishAudioTTS.js";
|
|
40
52
|
export { OpenAITTS, OpenAITTS as OpenAITTSHandler, } from "./providers/OpenAITTS.js";
|
|
41
53
|
// ============================================================================
|
|
42
54
|
// STT PROVIDERS
|
|
43
55
|
// ============================================================================
|
|
44
56
|
export { AzureSTT, AzureSTT as AzureSTTHandler } from "./providers/AzureSTT.js";
|
|
45
57
|
export { DeepgramSTT, DeepgramSTT as DeepgramSTTHandler, } from "./providers/DeepgramSTT.js";
|
|
46
|
-
// Export STT provider classes for direct use
|
|
47
58
|
export { GoogleSTT, GoogleSTT as GoogleSTTHandler, } from "./providers/GoogleSTT.js";
|
|
48
59
|
export { OpenAISTT, OpenAISTTHandler, WhisperSTT, WhisperSTTHandler, } from "./providers/OpenAISTT.js";
|
|
49
60
|
// ============================================================================
|
|
50
61
|
// REALTIME PROVIDERS
|
|
51
62
|
// ============================================================================
|
|
52
63
|
export { GeminiLive, GeminiLive as GeminiLiveHandler, } from "./providers/GeminiLive.js";
|
|
53
|
-
// Export Realtime provider classes for direct use
|
|
54
64
|
export { OpenAIRealtime, OpenAIRealtime as OpenAIRealtimeHandler, } from "./providers/OpenAIRealtime.js";
|
|
65
|
+
// ============================================================================
|
|
66
|
+
// AUTO-REGISTRATION
|
|
67
|
+
// ============================================================================
|
|
68
|
+
import { AzureTTS } from "./providers/AzureTTS.js";
|
|
69
|
+
import { CartesiaTTS } from "./providers/CartesiaTTS.js";
|
|
70
|
+
import { ElevenLabsTTS } from "./providers/ElevenLabsTTS.js";
|
|
71
|
+
import { FishAudioTTS } from "./providers/FishAudioTTS.js";
|
|
72
|
+
import { OpenAITTS } from "./providers/OpenAITTS.js";
|
|
73
|
+
import { AzureSTT } from "./providers/AzureSTT.js";
|
|
74
|
+
import { DeepgramSTT } from "./providers/DeepgramSTT.js";
|
|
75
|
+
import { GoogleSTT } from "./providers/GoogleSTT.js";
|
|
76
|
+
import { OpenAISTT } from "./providers/OpenAISTT.js";
|
|
77
|
+
import { GeminiLive } from "./providers/GeminiLive.js";
|
|
78
|
+
import { OpenAIRealtime } from "./providers/OpenAIRealtime.js";
|
|
79
|
+
const TTS_HANDLER_CANDIDATES = [
|
|
80
|
+
{
|
|
81
|
+
// Google TTS doubles as both the AI Studio and Vertex TTS handler.
|
|
82
|
+
name: "google-ai",
|
|
83
|
+
aliases: ["vertex"],
|
|
84
|
+
factory: () => new GoogleTTSHandler(),
|
|
85
|
+
},
|
|
86
|
+
{ name: "openai-tts", factory: () => new OpenAITTS() },
|
|
87
|
+
{
|
|
88
|
+
name: "elevenlabs",
|
|
89
|
+
aliases: ["elevenlabs-tts"],
|
|
90
|
+
factory: () => new ElevenLabsTTS(),
|
|
91
|
+
},
|
|
92
|
+
{ name: "azure-tts", factory: () => new AzureTTS() },
|
|
93
|
+
{ name: "fish-audio", factory: () => new FishAudioTTS() },
|
|
94
|
+
{ name: "cartesia", factory: () => new CartesiaTTS() },
|
|
95
|
+
];
|
|
96
|
+
const STT_HANDLER_CANDIDATES = [
|
|
97
|
+
{
|
|
98
|
+
name: "whisper",
|
|
99
|
+
aliases: ["openai-stt"],
|
|
100
|
+
factory: () => new OpenAISTT(),
|
|
101
|
+
},
|
|
102
|
+
{ name: "deepgram", factory: () => new DeepgramSTT() },
|
|
103
|
+
{ name: "google-stt", factory: () => new GoogleSTT() },
|
|
104
|
+
{ name: "azure-stt", factory: () => new AzureSTT() },
|
|
105
|
+
];
|
|
106
|
+
const REALTIME_HANDLER_CANDIDATES = [
|
|
107
|
+
{ name: "openai-realtime", factory: () => new OpenAIRealtime() },
|
|
108
|
+
{ name: "gemini-live", factory: () => new GeminiLive() },
|
|
109
|
+
];
|
|
110
|
+
function registerCandidates(candidates, supports, getRegistered, register, scope, requireConfigured) {
|
|
111
|
+
for (const { name, aliases, factory } of candidates) {
|
|
112
|
+
// Compute missingName / missingAliases separately so a manually-
|
|
113
|
+
// registered primary name doesn't block alias backfill. Important for
|
|
114
|
+
// BC: existing callers that register e.g. "elevenlabs" should still
|
|
115
|
+
// see "elevenlabs-tts" wired up by this loop.
|
|
116
|
+
const missingName = !supports(name);
|
|
117
|
+
const missingAliases = (aliases ?? []).filter((alias) => !supports(alias));
|
|
118
|
+
if (!missingName && missingAliases.length === 0) {
|
|
119
|
+
continue;
|
|
120
|
+
}
|
|
121
|
+
try {
|
|
122
|
+
// If the primary is already registered, reuse that exact handler
|
|
123
|
+
// instance for any alias backfill — wiring an alias to a *different*
|
|
124
|
+
// factory-fresh instance would silently diverge from the canonical
|
|
125
|
+
// primary's behavior (different config, different credentials).
|
|
126
|
+
// Only call factory() when we actually need to register the primary.
|
|
127
|
+
let handler;
|
|
128
|
+
if (!missingName) {
|
|
129
|
+
handler = getRegistered(name);
|
|
130
|
+
}
|
|
131
|
+
if (!handler) {
|
|
132
|
+
handler = factory();
|
|
133
|
+
if (requireConfigured && !handler.isConfigured()) {
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
if (missingName) {
|
|
138
|
+
register(name, handler);
|
|
139
|
+
}
|
|
140
|
+
for (const alias of missingAliases) {
|
|
141
|
+
register(alias, handler);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch (err) {
|
|
145
|
+
logger.debug(`[${scope}] ${name} auto-registration skipped: ${err instanceof Error ? err.message : String(err)}`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Register every shipped TTS handler whose backing credentials are
|
|
151
|
+
* present in the environment. Safe to call multiple times.
|
|
152
|
+
*/
|
|
153
|
+
export function registerDefaultTTSHandlers() {
|
|
154
|
+
registerCandidates(TTS_HANDLER_CANDIDATES, (name) => TTSProcessor.supports(name), (name) => TTSProcessor.getHandler(name), (name, handler) => TTSProcessor.registerHandler(name, handler), "voice/tts", true);
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Register every shipped STT handler whose backing credentials are
|
|
158
|
+
* present in the environment. Safe to call multiple times.
|
|
159
|
+
*/
|
|
160
|
+
export function registerDefaultSTTHandlers() {
|
|
161
|
+
registerCandidates(STT_HANDLER_CANDIDATES, (name) => STTProcessor.supports(name), (name) => STTProcessor.getHandler(name), (name, handler) => STTProcessor.registerHandler(name, handler), "voice/stt", true);
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Register every shipped Realtime handler. Realtime handlers don't gate
|
|
165
|
+
* registration on isConfigured() because session-time API keys can be
|
|
166
|
+
* supplied per-call; missing creds surface when `connect()` is invoked.
|
|
167
|
+
*/
|
|
168
|
+
export function registerDefaultRealtimeHandlers() {
|
|
169
|
+
registerCandidates(REALTIME_HANDLER_CANDIDATES, (name) => RealtimeProcessor.supports(name), (name) => RealtimeProcessor.getHandler(name), (name, handler) => RealtimeProcessor.registerHandler(name, handler), "voice/realtime", false);
|
|
170
|
+
}
|
|
171
|
+
// Run once at module import so consumers who follow the documented
|
|
172
|
+
// `nl.generate(...)` flow get every configured handler without manually
|
|
173
|
+
// calling `registerHandler`.
|
|
174
|
+
registerDefaultTTSHandlers();
|
|
175
|
+
registerDefaultSTTHandlers();
|
|
176
|
+
registerDefaultRealtimeHandlers();
|
|
55
177
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cartesia TTS Handler (synchronous /tts/bytes endpoint)
|
|
3
|
+
*
|
|
4
|
+
* Implements the standard `TTSHandler` synchronous-request contract on top
|
|
5
|
+
* of Cartesia's REST `/tts/bytes` endpoint. The pre-existing
|
|
6
|
+
* `adapters/tts/cartesiaHandler.ts` (`CartesiaStream`) targets the
|
|
7
|
+
* realtime WebSocket flow used by the voice server and does NOT implement
|
|
8
|
+
* `TTSHandler`; this file fills the gap so `nl.generate({ tts: { provider:
|
|
9
|
+
* "cartesia" } })` works through the same `TTSProcessor` dispatch as
|
|
10
|
+
* every other shipped TTS provider.
|
|
11
|
+
*
|
|
12
|
+
* @module voice/providers/CartesiaTTS
|
|
13
|
+
* @see https://docs.cartesia.ai/api-reference/tts/bytes
|
|
14
|
+
*/
|
|
15
|
+
import type { TTSHandler, TTSOptions, TTSResult } from "../../types/index.js";
|
|
16
|
+
/**
|
|
17
|
+
* Cartesia synchronous TTS handler.
|
|
18
|
+
*
|
|
19
|
+
* Auth: `X-API-Key: ${CARTESIA_API_KEY}` + `Cartesia-Version` header.
|
|
20
|
+
*/
|
|
21
|
+
export declare class CartesiaTTS implements TTSHandler {
|
|
22
|
+
readonly maxTextLength = 5000;
|
|
23
|
+
private readonly apiKey;
|
|
24
|
+
private readonly baseUrl;
|
|
25
|
+
private readonly apiVersion;
|
|
26
|
+
constructor(apiKey?: string);
|
|
27
|
+
isConfigured(): boolean;
|
|
28
|
+
synthesize(text: string, options?: TTSOptions): Promise<TTSResult>;
|
|
29
|
+
private mapOutputFormat;
|
|
30
|
+
private effectiveFormat;
|
|
31
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cartesia TTS Handler (synchronous /tts/bytes endpoint)
|
|
3
|
+
*
|
|
4
|
+
* Implements the standard `TTSHandler` synchronous-request contract on top
|
|
5
|
+
* of Cartesia's REST `/tts/bytes` endpoint. The pre-existing
|
|
6
|
+
* `adapters/tts/cartesiaHandler.ts` (`CartesiaStream`) targets the
|
|
7
|
+
* realtime WebSocket flow used by the voice server and does NOT implement
|
|
8
|
+
* `TTSHandler`; this file fills the gap so `nl.generate({ tts: { provider:
|
|
9
|
+
* "cartesia" } })` works through the same `TTSProcessor` dispatch as
|
|
10
|
+
* every other shipped TTS provider.
|
|
11
|
+
*
|
|
12
|
+
* @module voice/providers/CartesiaTTS
|
|
13
|
+
* @see https://docs.cartesia.ai/api-reference/tts/bytes
|
|
14
|
+
*/
|
|
15
|
+
import { ErrorCategory, ErrorSeverity } from "../../constants/enums.js";
|
|
16
|
+
import { withTimeout, TimeoutError } from "../../utils/async/withTimeout.js";
|
|
17
|
+
import { logger } from "../../utils/logger.js";
|
|
18
|
+
import { TTS_ERROR_CODES, TTSError } from "../../utils/ttsProcessor.js";
|
|
19
|
+
const DEFAULT_BASE_URL = "https://api.cartesia.ai";
|
|
20
|
+
const DEFAULT_API_VERSION = "2025-04-16";
|
|
21
|
+
const DEFAULT_MODEL = "sonic-2";
|
|
22
|
+
// Same default voice as the streaming handler — a publicly available
|
|
23
|
+
// Cartesia voice id ("Bright Female"). Override per-call via TTSOptions.voice.
|
|
24
|
+
const DEFAULT_VOICE_ID = "694f9389-aac1-45b6-b726-9d9369183238";
|
|
25
|
+
const REQUEST_TIMEOUT_MS = 30_000;
|
|
26
|
+
/**
|
|
27
|
+
* Cartesia synchronous TTS handler.
|
|
28
|
+
*
|
|
29
|
+
* Auth: `X-API-Key: ${CARTESIA_API_KEY}` + `Cartesia-Version` header.
|
|
30
|
+
*/
|
|
31
|
+
export class CartesiaTTS {
|
|
32
|
+
maxTextLength = 5000;
|
|
33
|
+
apiKey;
|
|
34
|
+
baseUrl;
|
|
35
|
+
apiVersion;
|
|
36
|
+
constructor(apiKey) {
|
|
37
|
+
const resolved = (apiKey ?? process.env.CARTESIA_API_KEY ?? "").trim();
|
|
38
|
+
this.apiKey = resolved.length > 0 ? resolved : null;
|
|
39
|
+
this.baseUrl = (process.env.CARTESIA_BASE_URL ?? DEFAULT_BASE_URL).replace(/\/$/, "");
|
|
40
|
+
this.apiVersion = process.env.CARTESIA_API_VERSION ?? DEFAULT_API_VERSION;
|
|
41
|
+
}
|
|
42
|
+
isConfigured() {
|
|
43
|
+
return this.apiKey !== null;
|
|
44
|
+
}
|
|
45
|
+
async synthesize(text, options = {}) {
|
|
46
|
+
if (!this.apiKey) {
|
|
47
|
+
throw new TTSError({
|
|
48
|
+
code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED,
|
|
49
|
+
message: "CARTESIA_API_KEY not configured",
|
|
50
|
+
category: ErrorCategory.CONFIGURATION,
|
|
51
|
+
severity: ErrorSeverity.HIGH,
|
|
52
|
+
retriable: false,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
const startTime = Date.now();
|
|
56
|
+
const voiceId = options.voice ?? process.env.CARTESIA_VOICE_ID ?? DEFAULT_VOICE_ID;
|
|
57
|
+
const requestedFormat = options.format ?? "mp3";
|
|
58
|
+
const { container, encoding, sampleRate } = this.mapOutputFormat(requestedFormat);
|
|
59
|
+
const cartesiaOpts = options;
|
|
60
|
+
const model = cartesiaOpts.model ?? process.env.CARTESIA_MODEL ?? DEFAULT_MODEL;
|
|
61
|
+
const body = {
|
|
62
|
+
model_id: model,
|
|
63
|
+
transcript: text,
|
|
64
|
+
voice: { mode: "id", id: voiceId },
|
|
65
|
+
output_format: {
|
|
66
|
+
container,
|
|
67
|
+
encoding,
|
|
68
|
+
sample_rate: sampleRate,
|
|
69
|
+
},
|
|
70
|
+
language: cartesiaOpts.language ?? "en",
|
|
71
|
+
};
|
|
72
|
+
let response;
|
|
73
|
+
try {
|
|
74
|
+
response = await withTimeout(fetch(`${this.baseUrl}/tts/bytes`, {
|
|
75
|
+
method: "POST",
|
|
76
|
+
headers: {
|
|
77
|
+
"X-API-Key": this.apiKey,
|
|
78
|
+
"Cartesia-Version": this.apiVersion,
|
|
79
|
+
"Content-Type": "application/json",
|
|
80
|
+
},
|
|
81
|
+
body: JSON.stringify(body),
|
|
82
|
+
}), REQUEST_TIMEOUT_MS, `Cartesia request timed out after ${REQUEST_TIMEOUT_MS / 1000}s`);
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
if (err instanceof TimeoutError) {
|
|
86
|
+
throw new TTSError({
|
|
87
|
+
code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
|
|
88
|
+
message: err.message,
|
|
89
|
+
category: ErrorCategory.NETWORK,
|
|
90
|
+
severity: ErrorSeverity.HIGH,
|
|
91
|
+
retriable: true,
|
|
92
|
+
originalError: err,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
throw new TTSError({
|
|
96
|
+
code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
|
|
97
|
+
message: `Cartesia network error: ${err instanceof Error ? err.message : String(err)}`,
|
|
98
|
+
category: ErrorCategory.NETWORK,
|
|
99
|
+
severity: ErrorSeverity.HIGH,
|
|
100
|
+
retriable: true,
|
|
101
|
+
originalError: err instanceof Error ? err : undefined,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
if (!response.ok) {
|
|
105
|
+
const text = await response.text();
|
|
106
|
+
const retriable = response.status === 408 ||
|
|
107
|
+
response.status === 429 ||
|
|
108
|
+
response.status >= 500;
|
|
109
|
+
throw new TTSError({
|
|
110
|
+
code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
|
|
111
|
+
message: `Cartesia synthesis failed: ${response.status} — ${text}`,
|
|
112
|
+
category: retriable ? ErrorCategory.NETWORK : ErrorCategory.EXECUTION,
|
|
113
|
+
severity: ErrorSeverity.HIGH,
|
|
114
|
+
retriable,
|
|
115
|
+
context: { status: response.status, voiceId, container, encoding },
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
119
|
+
const audioBuffer = Buffer.from(arrayBuffer);
|
|
120
|
+
const latency = Date.now() - startTime;
|
|
121
|
+
const effectiveFormat = this.effectiveFormat(container, encoding);
|
|
122
|
+
const result = {
|
|
123
|
+
buffer: audioBuffer,
|
|
124
|
+
format: effectiveFormat,
|
|
125
|
+
size: audioBuffer.length,
|
|
126
|
+
voice: voiceId,
|
|
127
|
+
sampleRate,
|
|
128
|
+
metadata: {
|
|
129
|
+
latency,
|
|
130
|
+
provider: "cartesia",
|
|
131
|
+
model,
|
|
132
|
+
requestedFormat: options.format,
|
|
133
|
+
container,
|
|
134
|
+
encoding,
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
logger.info(`[CartesiaTTS] Synthesized ${audioBuffer.length} bytes in ${latency}ms`);
|
|
138
|
+
return result;
|
|
139
|
+
}
|
|
140
|
+
mapOutputFormat(format) {
|
|
141
|
+
switch (format) {
|
|
142
|
+
case "mp3":
|
|
143
|
+
return { container: "mp3", encoding: "mp3", sampleRate: 44_100 };
|
|
144
|
+
case "wav":
|
|
145
|
+
return { container: "wav", encoding: "pcm_s16le", sampleRate: 44_100 };
|
|
146
|
+
case "pcm16":
|
|
147
|
+
return { container: "raw", encoding: "pcm_s16le", sampleRate: 24_000 };
|
|
148
|
+
default:
|
|
149
|
+
// Cartesia only supports mp3 / wav / pcm16 today. Fail fast instead
|
|
150
|
+
// of silently downgrading so callers passing ogg / flac / m4a /
|
|
151
|
+
// opus / webm see a clear error rather than mislabeled MP3 bytes.
|
|
152
|
+
throw new TTSError({
|
|
153
|
+
code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
|
|
154
|
+
message: `Cartesia does not support output format "${format}". Supported: mp3, wav, pcm16.`,
|
|
155
|
+
category: ErrorCategory.VALIDATION,
|
|
156
|
+
severity: ErrorSeverity.MEDIUM,
|
|
157
|
+
retriable: false,
|
|
158
|
+
context: {
|
|
159
|
+
format,
|
|
160
|
+
supported: ["mp3", "wav", "pcm16"],
|
|
161
|
+
},
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
effectiveFormat(container, encoding) {
|
|
166
|
+
if (container === "mp3") {
|
|
167
|
+
return "mp3";
|
|
168
|
+
}
|
|
169
|
+
if (container === "wav") {
|
|
170
|
+
return "wav";
|
|
171
|
+
}
|
|
172
|
+
if (encoding === "pcm_s16le") {
|
|
173
|
+
return "pcm16";
|
|
174
|
+
}
|
|
175
|
+
// In practice mapOutputFormat() throws before we reach this branch
|
|
176
|
+
// (it only emits mp3/wav/raw containers from a validated input).
|
|
177
|
+
// Throwing here too means a future container/encoding combination
|
|
178
|
+
// surfaces clearly instead of returning mislabeled bytes as "mp3".
|
|
179
|
+
throw new TTSError({
|
|
180
|
+
code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
|
|
181
|
+
message: `Unsupported Cartesia output combination: container=${container}, encoding=${encoding}`,
|
|
182
|
+
category: ErrorCategory.EXECUTION,
|
|
183
|
+
severity: ErrorSeverity.HIGH,
|
|
184
|
+
retriable: false,
|
|
185
|
+
context: { container, encoding },
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
//# sourceMappingURL=CartesiaTTS.js.map
|
|
@@ -41,9 +41,9 @@ export declare const JudgeConfigSchema: z.ZodObject<{
|
|
|
41
41
|
criteria: z.ZodArray<z.ZodString>;
|
|
42
42
|
outputFormat: z.ZodEnum<{
|
|
43
43
|
scores: "scores";
|
|
44
|
+
detailed: "detailed";
|
|
44
45
|
ranking: "ranking";
|
|
45
46
|
best: "best";
|
|
46
|
-
detailed: "detailed";
|
|
47
47
|
}>;
|
|
48
48
|
customPrompt: z.ZodOptional<z.ZodString>;
|
|
49
49
|
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
@@ -195,9 +195,9 @@ export declare const WorkflowConfigSchema: z.ZodObject<{
|
|
|
195
195
|
criteria: z.ZodArray<z.ZodString>;
|
|
196
196
|
outputFormat: z.ZodEnum<{
|
|
197
197
|
scores: "scores";
|
|
198
|
+
detailed: "detailed";
|
|
198
199
|
ranking: "ranking";
|
|
199
200
|
best: "best";
|
|
200
|
-
detailed: "detailed";
|
|
201
201
|
}>;
|
|
202
202
|
customPrompt: z.ZodOptional<z.ZodString>;
|
|
203
203
|
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
@@ -219,9 +219,9 @@ export declare const WorkflowConfigSchema: z.ZodObject<{
|
|
|
219
219
|
criteria: z.ZodArray<z.ZodString>;
|
|
220
220
|
outputFormat: z.ZodEnum<{
|
|
221
221
|
scores: "scores";
|
|
222
|
+
detailed: "detailed";
|
|
222
223
|
ranking: "ranking";
|
|
223
224
|
best: "best";
|
|
224
|
-
detailed: "detailed";
|
|
225
225
|
}>;
|
|
226
226
|
customPrompt: z.ZodOptional<z.ZodString>;
|
|
227
227
|
systemPrompt: z.ZodOptional<z.ZodString>;
|
package/dist/music/index.d.ts
CHANGED
|
@@ -7,7 +7,21 @@
|
|
|
7
7
|
* Use `MusicProcessor.generate(provider, options)` to dispatch to the
|
|
8
8
|
* registered handler for `provider`.
|
|
9
9
|
*
|
|
10
|
+
* Importing this module also auto-registers every shipped music handler
|
|
11
|
+
* whose backing API key is present in `process.env`. Registration is
|
|
12
|
+
* idempotent and silently skipped if a provider is already registered or
|
|
13
|
+
* its constructor throws (e.g. missing optional native dependency).
|
|
14
|
+
*
|
|
10
15
|
* @module music
|
|
11
16
|
*/
|
|
12
17
|
export { MUSIC_ERROR_CODES, MusicError, MusicProcessor, } from "../utils/musicProcessor.js";
|
|
13
18
|
export { BeatovenMusic, BeatovenMusic as BeatovenMusicHandler, } from "./providers/BeatovenMusic.js";
|
|
19
|
+
export { ElevenLabsMusic, ElevenLabsMusic as ElevenLabsMusicHandler, } from "./providers/ElevenLabsMusic.js";
|
|
20
|
+
export { LyriaMusic, LyriaMusic as LyriaMusicHandler, } from "./providers/LyriaMusic.js";
|
|
21
|
+
export { ReplicateMusic, ReplicateMusic as ReplicateMusicHandler, } from "./providers/ReplicateMusic.js";
|
|
22
|
+
/**
|
|
23
|
+
* Register every shipped music handler whose backing credentials are
|
|
24
|
+
* present in the environment. Safe to call multiple times — existing
|
|
25
|
+
* registrations are preserved.
|
|
26
|
+
*/
|
|
27
|
+
export declare function registerDefaultMusicHandlers(): void;
|
package/dist/music/index.js
CHANGED
|
@@ -7,7 +7,87 @@
|
|
|
7
7
|
* Use `MusicProcessor.generate(provider, options)` to dispatch to the
|
|
8
8
|
* registered handler for `provider`.
|
|
9
9
|
*
|
|
10
|
+
* Importing this module also auto-registers every shipped music handler
|
|
11
|
+
* whose backing API key is present in `process.env`. Registration is
|
|
12
|
+
* idempotent and silently skipped if a provider is already registered or
|
|
13
|
+
* its constructor throws (e.g. missing optional native dependency).
|
|
14
|
+
*
|
|
10
15
|
* @module music
|
|
11
16
|
*/
|
|
17
|
+
import { logger } from "../utils/logger.js";
|
|
18
|
+
import { MusicProcessor } from "../utils/musicProcessor.js";
|
|
12
19
|
export { MUSIC_ERROR_CODES, MusicError, MusicProcessor, } from "../utils/musicProcessor.js";
|
|
20
|
+
// ============================================================================
|
|
21
|
+
// HANDLER CLASSES
|
|
22
|
+
// ============================================================================
|
|
13
23
|
export { BeatovenMusic, BeatovenMusic as BeatovenMusicHandler, } from "./providers/BeatovenMusic.js";
|
|
24
|
+
export { ElevenLabsMusic, ElevenLabsMusic as ElevenLabsMusicHandler, } from "./providers/ElevenLabsMusic.js";
|
|
25
|
+
export { LyriaMusic, LyriaMusic as LyriaMusicHandler, } from "./providers/LyriaMusic.js";
|
|
26
|
+
export { ReplicateMusic, ReplicateMusic as ReplicateMusicHandler, } from "./providers/ReplicateMusic.js";
|
|
27
|
+
// ============================================================================
|
|
28
|
+
// AUTO-REGISTRATION
|
|
29
|
+
// ============================================================================
|
|
30
|
+
import { BeatovenMusic } from "./providers/BeatovenMusic.js";
|
|
31
|
+
import { ElevenLabsMusic } from "./providers/ElevenLabsMusic.js";
|
|
32
|
+
import { LyriaMusic } from "./providers/LyriaMusic.js";
|
|
33
|
+
import { ReplicateMusic } from "./providers/ReplicateMusic.js";
|
|
34
|
+
const MUSIC_HANDLER_CANDIDATES = [
|
|
35
|
+
{ name: "beatoven", factory: () => new BeatovenMusic() },
|
|
36
|
+
{
|
|
37
|
+
name: "elevenlabs-music",
|
|
38
|
+
aliases: ["elevenlabs-sound"],
|
|
39
|
+
factory: () => new ElevenLabsMusic(),
|
|
40
|
+
},
|
|
41
|
+
{ name: "lyria", factory: () => new LyriaMusic() },
|
|
42
|
+
{
|
|
43
|
+
name: "replicate",
|
|
44
|
+
aliases: ["musicgen"],
|
|
45
|
+
factory: () => new ReplicateMusic(),
|
|
46
|
+
},
|
|
47
|
+
];
|
|
48
|
+
/**
|
|
49
|
+
* Register every shipped music handler whose backing credentials are
|
|
50
|
+
* present in the environment. Safe to call multiple times — existing
|
|
51
|
+
* registrations are preserved.
|
|
52
|
+
*/
|
|
53
|
+
export function registerDefaultMusicHandlers() {
|
|
54
|
+
for (const { name, aliases, factory } of MUSIC_HANDLER_CANDIDATES) {
|
|
55
|
+
// Compute missingName / missingAliases separately so a pre-registered
|
|
56
|
+
// primary doesn't block alias backfill — keeps "musicgen" reachable
|
|
57
|
+
// when only "replicate" was wired up via another path (and likewise
|
|
58
|
+
// "elevenlabs-sound" vs "elevenlabs-music").
|
|
59
|
+
const missingName = !MusicProcessor.supports(name);
|
|
60
|
+
const missingAliases = (aliases ?? []).filter((alias) => !MusicProcessor.supports(alias));
|
|
61
|
+
if (!missingName && missingAliases.length === 0) {
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
try {
|
|
65
|
+
// Reuse the already-registered primary's handler for alias backfill
|
|
66
|
+
// when one exists — wiring an alias to a factory-fresh instance
|
|
67
|
+
// would silently diverge from the canonical primary's config.
|
|
68
|
+
let handler;
|
|
69
|
+
if (!missingName) {
|
|
70
|
+
handler = MusicProcessor.getHandler(name);
|
|
71
|
+
}
|
|
72
|
+
if (!handler) {
|
|
73
|
+
handler = factory();
|
|
74
|
+
if (!handler.isConfigured()) {
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
if (missingName) {
|
|
79
|
+
MusicProcessor.registerHandler(name, handler);
|
|
80
|
+
}
|
|
81
|
+
for (const alias of missingAliases) {
|
|
82
|
+
MusicProcessor.registerHandler(alias, handler);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
catch (err) {
|
|
86
|
+
logger.debug(`[music] ${name} auto-registration skipped: ${err instanceof Error ? err.message : String(err)}`);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
// Run once at module import so consumers who follow the documented
|
|
91
|
+
// `nl.generate(...)` flow get every configured handler without manually
|
|
92
|
+
// calling `registerHandler`.
|
|
93
|
+
registerDefaultMusicHandlers();
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
import type { AIProviderName } from "../constants/enums.js";
|
|
2
2
|
import { BaseProvider } from "../core/baseProvider.js";
|
|
3
|
-
import type { StreamOptions, StreamResult, ZodUnknownSchema } from "../types/index.js";
|
|
4
|
-
import type { LanguageModel, Schema } from "../types/index.js";
|
|
3
|
+
import type { LanguageModel, Schema, StreamOptions, StreamResult, ZodUnknownSchema } from "../types/index.js";
|
|
5
4
|
/**
|
|
6
|
-
* OpenAI Compatible Provider
|
|
7
|
-
*
|
|
5
|
+
* OpenAI Compatible Provider — direct HTTP, no AI SDK.
|
|
6
|
+
*
|
|
7
|
+
* Talks to any OpenAI chat-completions-shaped endpoint (LiteLLM, vLLM,
|
|
8
|
+
* OpenRouter, etc.). The entire request/stream/tool-loop is inline above;
|
|
9
|
+
* no `streamText`, no `LanguageModelV3`, no `@ai-sdk/openai`.
|
|
8
10
|
*/
|
|
9
11
|
export declare class OpenAICompatibleProvider extends BaseProvider {
|
|
10
|
-
private model?;
|
|
11
12
|
private config;
|
|
13
|
+
private resolvedModel?;
|
|
12
14
|
private discoveredModel?;
|
|
13
|
-
private customOpenAI;
|
|
14
15
|
constructor(modelName?: string, sdk?: unknown, _region?: string, credentials?: {
|
|
15
16
|
apiKey?: string;
|
|
16
17
|
baseURL?: string;
|
|
@@ -18,33 +19,59 @@ export declare class OpenAICompatibleProvider extends BaseProvider {
|
|
|
18
19
|
protected getProviderName(): AIProviderName;
|
|
19
20
|
protected getDefaultModel(): string;
|
|
20
21
|
/**
|
|
21
|
-
*
|
|
22
|
-
*
|
|
22
|
+
* Abstract from BaseProvider — used by the parent's generate() path which
|
|
23
|
+
* still goes through `generateText`. Returns a thin LanguageModelV3-shaped
|
|
24
|
+
* object that delegates to the same HTTP helpers used by executeStream.
|
|
25
|
+
* Stays inside this file so no AI-SDK-named import is needed here.
|
|
23
26
|
*/
|
|
24
27
|
protected getAISDKModel(): Promise<LanguageModel>;
|
|
25
|
-
|
|
28
|
+
private resolveModelName;
|
|
26
29
|
/**
|
|
27
|
-
*
|
|
30
|
+
* Returns a minimal V3-shaped model. Only used by BaseProvider's
|
|
31
|
+
* `generate()` non-streaming path which still relies on the parent's
|
|
32
|
+
* `generateText`. The streaming path bypasses this entirely.
|
|
28
33
|
*/
|
|
34
|
+
private buildDelegatingModel;
|
|
35
|
+
protected formatProviderError(error: unknown): Error;
|
|
29
36
|
supportsTools(): boolean;
|
|
30
37
|
/**
|
|
31
|
-
*
|
|
32
|
-
*
|
|
38
|
+
* Streaming path — drives the OpenAI endpoint directly. No streamText,
|
|
39
|
+
* no AI SDK orchestrator. Tool calls, multi-step loops, telemetry,
|
|
40
|
+
* abort handling all inline.
|
|
33
41
|
*/
|
|
34
42
|
protected executeStream(options: StreamOptions, _analysisSchema?: ZodUnknownSchema | Schema<unknown>): Promise<StreamResult>;
|
|
35
43
|
/**
|
|
36
|
-
*
|
|
44
|
+
* Multi-step streaming orchestrator. One iteration per model turn:
|
|
37
45
|
*
|
|
38
|
-
*
|
|
39
|
-
*
|
|
46
|
+
* 1. POST /chat/completions with stream:true
|
|
47
|
+
* 2. Parse SSE; push text deltas to the consumer queue
|
|
48
|
+
* 3. If the step emitted tool_calls → execute each, append to
|
|
49
|
+
* conversation, loop again
|
|
50
|
+
* 4. Otherwise resolve the deferred analytics promises and exit
|
|
51
|
+
*
|
|
52
|
+
* Bounded by `args.maxSteps`. Any thrown error rejects loopPromise and
|
|
53
|
+
* is surfaced to the consumer via `await loopPromise` in the stream
|
|
54
|
+
* generator.
|
|
40
55
|
*/
|
|
41
|
-
|
|
56
|
+
private runStreamLoop;
|
|
42
57
|
/**
|
|
43
|
-
*
|
|
58
|
+
* One streaming round-trip: POST chat-completions, parse SSE, push text
|
|
59
|
+
* deltas to the consumer queue. Returns the accumulated SSE result so
|
|
60
|
+
* the caller can decide whether to run tools and re-stream.
|
|
44
61
|
*/
|
|
45
|
-
|
|
62
|
+
private streamOneStep;
|
|
46
63
|
/**
|
|
47
|
-
*
|
|
64
|
+
* Execute every tool_call collected from one streaming step:
|
|
65
|
+
*
|
|
66
|
+
* - append an `assistant` turn carrying the tool_calls
|
|
67
|
+
* - resolve each tool from the local registry and run it
|
|
68
|
+
* - emit tool:start/tool:end events
|
|
69
|
+
* - push per-execution summaries
|
|
70
|
+
* - append a `tool` turn per result so the next step can see them
|
|
71
|
+
* - mirror BaseProvider's tool-events + storage hooks
|
|
48
72
|
*/
|
|
73
|
+
private executeToolBatch;
|
|
74
|
+
getAvailableModels(): Promise<string[]>;
|
|
75
|
+
getFirstAvailableModel(): Promise<string>;
|
|
49
76
|
private getFallbackModels;
|
|
50
77
|
}
|