@dtelecom/agents-js 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +14 -2
- package/dist/index.d.ts +14 -2
- package/dist/index.js +79 -53
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +79 -53
- package/dist/index.mjs.map +1 -1
- package/dist/memory/index.d.mts +1 -1
- package/dist/memory/index.d.ts +1 -1
- package/dist/providers/index.d.mts +85 -2
- package/dist/providers/index.d.ts +85 -2
- package/dist/providers/index.js +469 -29
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/index.mjs +467 -29
- package/dist/providers/index.mjs.map +1 -1
- package/dist/{types-BVMiP1bW.d.mts → types-MPHcuMhp.d.mts} +4 -0
- package/dist/{types-BVMiP1bW.d.ts → types-MPHcuMhp.d.ts} +4 -0
- package/package.json +1 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { h as STTPlugin, i as STTStreamOptions, S as STTStream, L as LLMPlugin, M as Message, e as LLMChunk, j as TTSPlugin } from '../types-
|
|
1
|
+
import { h as STTPlugin, i as STTStreamOptions, S as STTStream, L as LLMPlugin, M as Message, e as LLMChunk, j as TTSPlugin } from '../types-MPHcuMhp.mjs';
|
|
2
2
|
import '@dtelecom/server-sdk-node';
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -200,4 +200,87 @@ declare class DeepgramTTS implements TTSPlugin {
|
|
|
200
200
|
private ensureConnection;
|
|
201
201
|
}
|
|
202
202
|
|
|
203
|
-
|
|
203
|
+
/**
|
|
204
|
+
* DtelecomSTT — real-time streaming STT via dTelecom STT server (realtime-stt-m2).
|
|
205
|
+
*
|
|
206
|
+
* Protocol:
|
|
207
|
+
* - Connect to ws://<server>:<port> (address from options, no API key)
|
|
208
|
+
* - Send config: {"type":"config","language":"en"} (or "auto" for Parakeet auto-detect)
|
|
209
|
+
* - Wait for ready: {"type":"ready","client_id":"...","language":"en"}
|
|
210
|
+
* - Send audio as binary PCM16 16kHz mono frames
|
|
211
|
+
* - Receive transcriptions: {"type":"transcription","text":"...","is_final":true,"latency_ms":N}
|
|
212
|
+
* - Receive VAD events: {"type":"vad_event","event":"speech_start"|"speech_end"}
|
|
213
|
+
* - Keepalive via {"type":"ping"} / {"type":"pong"}
|
|
214
|
+
* - Mid-session reconfigure: send {"type":"config","language":"es","model":"whisper"} at any time
|
|
215
|
+
*/
|
|
216
|
+
|
|
217
|
+
interface DtelecomSTTOptions {
|
|
218
|
+
/** WebSocket server URL, e.g. "ws://192.168.1.100:8765" */
|
|
219
|
+
serverUrl: string;
|
|
220
|
+
/** Initial language (default: "auto" for Parakeet auto-detect) */
|
|
221
|
+
language?: string;
|
|
222
|
+
/** Force Whisper model even if Parakeet supports the language */
|
|
223
|
+
forceWhisper?: boolean;
|
|
224
|
+
}
|
|
225
|
+
declare class DtelecomSTT implements STTPlugin {
|
|
226
|
+
private readonly options;
|
|
227
|
+
constructor(options: DtelecomSTTOptions);
|
|
228
|
+
createStream(options?: STTStreamOptions): STTStream;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* DtelecomTTS — real-time streaming TTS via dTelecom TTS server (realtime-tts-m2).
|
|
233
|
+
*
|
|
234
|
+
* Protocol:
|
|
235
|
+
* - Connect to ws://<server>:<port> (address from options, no API key)
|
|
236
|
+
* - Send config: {"config":{"voice":"af_heart","lang_code":"a","speed":1.0}}
|
|
237
|
+
* - Send text: {"text":"Hello world"} — uses config defaults
|
|
238
|
+
* - Send text with per-message override: {"text":"Hola","voice":"ef_dora","lang_code":"e","speed":1.0}
|
|
239
|
+
* - Receive: {"type":"generating","text":"..."} then binary PCM16 48kHz chunks, then {"type":"done"}
|
|
240
|
+
* - Cancel: {"type":"clear"} → {"type":"cleared"}
|
|
241
|
+
*
|
|
242
|
+
* Key differences from DeepgramTTS:
|
|
243
|
+
* - Single WebSocket connection (not per-language pool)
|
|
244
|
+
* - Per-message voice/language switching instead of separate connections
|
|
245
|
+
* - Server outputs 48kHz PCM16 (resampled from Kokoro's native 24kHz)
|
|
246
|
+
* - Uses SSML <lang> tags to route text segments to correct voice (same as DeepgramTTS)
|
|
247
|
+
*/
|
|
248
|
+
|
|
249
|
+
interface VoiceConfig {
|
|
250
|
+
voice: string;
|
|
251
|
+
langCode: string;
|
|
252
|
+
}
|
|
253
|
+
interface DtelecomTTSOptions {
|
|
254
|
+
/** WebSocket server URL, e.g. "ws://192.168.1.100:8766" */
|
|
255
|
+
serverUrl: string;
|
|
256
|
+
/** Voice config per language: { en: { voice: "af_heart", langCode: "a" }, es: { voice: "bf_emma", langCode: "b" } } */
|
|
257
|
+
voices: Record<string, VoiceConfig>;
|
|
258
|
+
/** Default language code (default: "en") */
|
|
259
|
+
defaultLanguage?: string;
|
|
260
|
+
/** Speech speed multiplier (default: 1.0) */
|
|
261
|
+
speed?: number;
|
|
262
|
+
}
|
|
263
|
+
declare class DtelecomTTS implements TTSPlugin {
|
|
264
|
+
private readonly serverUrl;
|
|
265
|
+
private readonly voices;
|
|
266
|
+
private readonly defaultLang;
|
|
267
|
+
private readonly speed;
|
|
268
|
+
private ws;
|
|
269
|
+
private connectPromise;
|
|
270
|
+
private flushState;
|
|
271
|
+
/** Default language code for untagged text (e.g. 'en'). */
|
|
272
|
+
get defaultLanguage(): string;
|
|
273
|
+
constructor(options: DtelecomTTSOptions);
|
|
274
|
+
/** Pre-connect WebSocket to TTS server. */
|
|
275
|
+
warmup(): Promise<void>;
|
|
276
|
+
/** Close WebSocket connection. */
|
|
277
|
+
close(): void;
|
|
278
|
+
/** Strip SSML lang tags from text for display/events. */
|
|
279
|
+
cleanText(text: string): string;
|
|
280
|
+
synthesize(text: string, signal?: AbortSignal): AsyncGenerator<Buffer>;
|
|
281
|
+
private synthesizeSegment;
|
|
282
|
+
/** Ensure a WebSocket connection exists and is open. */
|
|
283
|
+
private ensureConnection;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export { CartesiaTTS, type CartesiaTTSOptions, DeepgramSTT, type DeepgramSTTOptions, DeepgramTTS, type DeepgramTTSOptions, DtelecomSTT, type DtelecomSTTOptions, DtelecomTTS, type DtelecomTTSOptions, type VoiceConfig as DtelecomVoiceConfig, OpenRouterLLM, type OpenRouterLLMOptions };
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { h as STTPlugin, i as STTStreamOptions, S as STTStream, L as LLMPlugin, M as Message, e as LLMChunk, j as TTSPlugin } from '../types-
|
|
1
|
+
import { h as STTPlugin, i as STTStreamOptions, S as STTStream, L as LLMPlugin, M as Message, e as LLMChunk, j as TTSPlugin } from '../types-MPHcuMhp.js';
|
|
2
2
|
import '@dtelecom/server-sdk-node';
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -200,4 +200,87 @@ declare class DeepgramTTS implements TTSPlugin {
|
|
|
200
200
|
private ensureConnection;
|
|
201
201
|
}
|
|
202
202
|
|
|
203
|
-
|
|
203
|
+
/**
|
|
204
|
+
* DtelecomSTT — real-time streaming STT via dTelecom STT server (realtime-stt-m2).
|
|
205
|
+
*
|
|
206
|
+
* Protocol:
|
|
207
|
+
* - Connect to ws://<server>:<port> (address from options, no API key)
|
|
208
|
+
* - Send config: {"type":"config","language":"en"} (or "auto" for Parakeet auto-detect)
|
|
209
|
+
* - Wait for ready: {"type":"ready","client_id":"...","language":"en"}
|
|
210
|
+
* - Send audio as binary PCM16 16kHz mono frames
|
|
211
|
+
* - Receive transcriptions: {"type":"transcription","text":"...","is_final":true,"latency_ms":N}
|
|
212
|
+
* - Receive VAD events: {"type":"vad_event","event":"speech_start"|"speech_end"}
|
|
213
|
+
* - Keepalive via {"type":"ping"} / {"type":"pong"}
|
|
214
|
+
* - Mid-session reconfigure: send {"type":"config","language":"es","model":"whisper"} at any time
|
|
215
|
+
*/
|
|
216
|
+
|
|
217
|
+
interface DtelecomSTTOptions {
|
|
218
|
+
/** WebSocket server URL, e.g. "ws://192.168.1.100:8765" */
|
|
219
|
+
serverUrl: string;
|
|
220
|
+
/** Initial language (default: "auto" for Parakeet auto-detect) */
|
|
221
|
+
language?: string;
|
|
222
|
+
/** Force Whisper model even if Parakeet supports the language */
|
|
223
|
+
forceWhisper?: boolean;
|
|
224
|
+
}
|
|
225
|
+
declare class DtelecomSTT implements STTPlugin {
|
|
226
|
+
private readonly options;
|
|
227
|
+
constructor(options: DtelecomSTTOptions);
|
|
228
|
+
createStream(options?: STTStreamOptions): STTStream;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* DtelecomTTS — real-time streaming TTS via dTelecom TTS server (realtime-tts-m2).
|
|
233
|
+
*
|
|
234
|
+
* Protocol:
|
|
235
|
+
* - Connect to ws://<server>:<port> (address from options, no API key)
|
|
236
|
+
* - Send config: {"config":{"voice":"af_heart","lang_code":"a","speed":1.0}}
|
|
237
|
+
* - Send text: {"text":"Hello world"} — uses config defaults
|
|
238
|
+
* - Send text with per-message override: {"text":"Hola","voice":"ef_dora","lang_code":"e","speed":1.0}
|
|
239
|
+
* - Receive: {"type":"generating","text":"..."} then binary PCM16 48kHz chunks, then {"type":"done"}
|
|
240
|
+
* - Cancel: {"type":"clear"} → {"type":"cleared"}
|
|
241
|
+
*
|
|
242
|
+
* Key differences from DeepgramTTS:
|
|
243
|
+
* - Single WebSocket connection (not per-language pool)
|
|
244
|
+
* - Per-message voice/language switching instead of separate connections
|
|
245
|
+
* - Server outputs 48kHz PCM16 (resampled from Kokoro's native 24kHz)
|
|
246
|
+
* - Uses SSML <lang> tags to route text segments to correct voice (same as DeepgramTTS)
|
|
247
|
+
*/
|
|
248
|
+
|
|
249
|
+
interface VoiceConfig {
|
|
250
|
+
voice: string;
|
|
251
|
+
langCode: string;
|
|
252
|
+
}
|
|
253
|
+
interface DtelecomTTSOptions {
|
|
254
|
+
/** WebSocket server URL, e.g. "ws://192.168.1.100:8766" */
|
|
255
|
+
serverUrl: string;
|
|
256
|
+
/** Voice config per language: { en: { voice: "af_heart", langCode: "a" }, es: { voice: "bf_emma", langCode: "b" } } */
|
|
257
|
+
voices: Record<string, VoiceConfig>;
|
|
258
|
+
/** Default language code (default: "en") */
|
|
259
|
+
defaultLanguage?: string;
|
|
260
|
+
/** Speech speed multiplier (default: 1.0) */
|
|
261
|
+
speed?: number;
|
|
262
|
+
}
|
|
263
|
+
declare class DtelecomTTS implements TTSPlugin {
|
|
264
|
+
private readonly serverUrl;
|
|
265
|
+
private readonly voices;
|
|
266
|
+
private readonly defaultLang;
|
|
267
|
+
private readonly speed;
|
|
268
|
+
private ws;
|
|
269
|
+
private connectPromise;
|
|
270
|
+
private flushState;
|
|
271
|
+
/** Default language code for untagged text (e.g. 'en'). */
|
|
272
|
+
get defaultLanguage(): string;
|
|
273
|
+
constructor(options: DtelecomTTSOptions);
|
|
274
|
+
/** Pre-connect WebSocket to TTS server. */
|
|
275
|
+
warmup(): Promise<void>;
|
|
276
|
+
/** Close WebSocket connection. */
|
|
277
|
+
close(): void;
|
|
278
|
+
/** Strip SSML lang tags from text for display/events. */
|
|
279
|
+
cleanText(text: string): string;
|
|
280
|
+
synthesize(text: string, signal?: AbortSignal): AsyncGenerator<Buffer>;
|
|
281
|
+
private synthesizeSegment;
|
|
282
|
+
/** Ensure a WebSocket connection exists and is open. */
|
|
283
|
+
private ensureConnection;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export { CartesiaTTS, type CartesiaTTSOptions, DeepgramSTT, type DeepgramSTTOptions, DeepgramTTS, type DeepgramTTSOptions, DtelecomSTT, type DtelecomSTTOptions, DtelecomTTS, type DtelecomTTSOptions, type VoiceConfig as DtelecomVoiceConfig, OpenRouterLLM, type OpenRouterLLMOptions };
|