@dtelecom/agents-js 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +6 -5
- package/dist/index.d.ts +6 -5
- package/dist/index.js +100 -34
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +100 -34
- package/dist/index.mjs.map +1 -1
- package/dist/memory/index.d.mts +1 -1
- package/dist/memory/index.d.ts +1 -1
- package/dist/providers/index.d.mts +1 -1
- package/dist/providers/index.d.ts +1 -1
- package/dist/providers/index.js +42 -0
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/index.mjs +42 -0
- package/dist/providers/index.mjs.map +1 -1
- package/dist/{types-BBKtiPvm.d.mts → types-Di_jxIgs.d.mts} +25 -5
- package/dist/{types-BBKtiPvm.d.ts → types-Di_jxIgs.d.ts} +25 -5
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
|
|
2
2
|
import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
|
|
3
3
|
import { EventEmitter } from 'events';
|
|
4
|
-
import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-
|
|
5
|
-
export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-
|
|
4
|
+
import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Di_jxIgs.mjs';
|
|
5
|
+
export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-Di_jxIgs.mjs';
|
|
6
6
|
|
|
7
7
|
declare class VoiceAgent extends EventEmitter {
|
|
8
8
|
private readonly config;
|
|
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
|
|
|
111
111
|
private readonly nameVariants;
|
|
112
112
|
private readonly beforeRespond?;
|
|
113
113
|
private readonly memory?;
|
|
114
|
+
private readonly tools?;
|
|
114
115
|
/** Strip provider-specific markup (e.g. SSML lang tags) for display. */
|
|
115
116
|
private cleanText;
|
|
116
117
|
/** Active STT streams, keyed by participant identity */
|
|
@@ -121,9 +122,9 @@ declare class Pipeline extends EventEmitter {
|
|
|
121
122
|
/** Queued turn while current one is still processing */
|
|
122
123
|
private pendingTurn;
|
|
123
124
|
constructor(options: PipelineOptions);
|
|
124
|
-
|
|
125
|
-
private
|
|
126
|
-
private
|
|
125
|
+
private readonly _warmupPromise;
|
|
126
|
+
private readonly _ttsWarmupPromise;
|
|
127
|
+
private readonly _llmWarmupPromise;
|
|
127
128
|
get processing(): boolean;
|
|
128
129
|
get running(): boolean;
|
|
129
130
|
get agentState(): AgentState;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
|
|
2
2
|
import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
|
|
3
3
|
import { EventEmitter } from 'events';
|
|
4
|
-
import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-
|
|
5
|
-
export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-
|
|
4
|
+
import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Di_jxIgs.js';
|
|
5
|
+
export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-Di_jxIgs.js';
|
|
6
6
|
|
|
7
7
|
declare class VoiceAgent extends EventEmitter {
|
|
8
8
|
private readonly config;
|
|
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
|
|
|
111
111
|
private readonly nameVariants;
|
|
112
112
|
private readonly beforeRespond?;
|
|
113
113
|
private readonly memory?;
|
|
114
|
+
private readonly tools?;
|
|
114
115
|
/** Strip provider-specific markup (e.g. SSML lang tags) for display. */
|
|
115
116
|
private cleanText;
|
|
116
117
|
/** Active STT streams, keyed by participant identity */
|
|
@@ -121,9 +122,9 @@ declare class Pipeline extends EventEmitter {
|
|
|
121
122
|
/** Queued turn while current one is still processing */
|
|
122
123
|
private pendingTurn;
|
|
123
124
|
constructor(options: PipelineOptions);
|
|
124
|
-
|
|
125
|
-
private
|
|
126
|
-
private
|
|
125
|
+
private readonly _warmupPromise;
|
|
126
|
+
private readonly _ttsWarmupPromise;
|
|
127
|
+
private readonly _llmWarmupPromise;
|
|
127
128
|
get processing(): boolean;
|
|
128
129
|
get running(): boolean;
|
|
129
130
|
get agentState(): AgentState;
|
package/dist/index.js
CHANGED
|
@@ -1151,6 +1151,49 @@ var AUDIO_DRAIN_MS = 800;
|
|
|
1151
1151
|
function sleep2(ms) {
|
|
1152
1152
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1153
1153
|
}
|
|
1154
|
+
function prefetchTTS(tts, text, signal) {
|
|
1155
|
+
const buffer = [];
|
|
1156
|
+
let done = false;
|
|
1157
|
+
let error = null;
|
|
1158
|
+
let wake = null;
|
|
1159
|
+
const notify = () => {
|
|
1160
|
+
if (wake) {
|
|
1161
|
+
const w = wake;
|
|
1162
|
+
wake = null;
|
|
1163
|
+
w();
|
|
1164
|
+
}
|
|
1165
|
+
};
|
|
1166
|
+
void (async () => {
|
|
1167
|
+
try {
|
|
1168
|
+
const stream = tts.synthesize(text, signal);
|
|
1169
|
+
for await (const chunk of stream) {
|
|
1170
|
+
if (signal?.aborted) break;
|
|
1171
|
+
buffer.push(chunk);
|
|
1172
|
+
notify();
|
|
1173
|
+
}
|
|
1174
|
+
} catch (e) {
|
|
1175
|
+
if (!(e instanceof Error && e.name === "AbortError")) error = e;
|
|
1176
|
+
} finally {
|
|
1177
|
+
done = true;
|
|
1178
|
+
notify();
|
|
1179
|
+
}
|
|
1180
|
+
})();
|
|
1181
|
+
return async function* () {
|
|
1182
|
+
let index = 0;
|
|
1183
|
+
while (true) {
|
|
1184
|
+
if (signal?.aborted) return;
|
|
1185
|
+
if (error) throw error;
|
|
1186
|
+
if (index < buffer.length) {
|
|
1187
|
+
yield buffer[index++];
|
|
1188
|
+
continue;
|
|
1189
|
+
}
|
|
1190
|
+
if (done) return;
|
|
1191
|
+
await new Promise((r) => {
|
|
1192
|
+
wake = r;
|
|
1193
|
+
});
|
|
1194
|
+
}
|
|
1195
|
+
};
|
|
1196
|
+
}
|
|
1154
1197
|
var Pipeline = class extends import_events.EventEmitter {
|
|
1155
1198
|
stt;
|
|
1156
1199
|
llm;
|
|
@@ -1165,6 +1208,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1165
1208
|
nameVariants;
|
|
1166
1209
|
beforeRespond;
|
|
1167
1210
|
memory;
|
|
1211
|
+
tools;
|
|
1168
1212
|
/** Strip provider-specific markup (e.g. SSML lang tags) for display. */
|
|
1169
1213
|
cleanText(text) {
|
|
1170
1214
|
return this.tts?.cleanText ? this.tts.cleanText(text) : text;
|
|
@@ -1187,6 +1231,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1187
1231
|
this.nameVariants = (options.nameVariants ?? []).map((n) => n.toLowerCase());
|
|
1188
1232
|
this.beforeRespond = options.beforeRespond;
|
|
1189
1233
|
this.memory = options.memory;
|
|
1234
|
+
this.tools = options.tools;
|
|
1190
1235
|
this.context = new ContextManager({
|
|
1191
1236
|
instructions: options.instructions,
|
|
1192
1237
|
maxContextTokens: options.maxContextTokens
|
|
@@ -1203,28 +1248,18 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1203
1248
|
this.splitter.reset();
|
|
1204
1249
|
this.setAgentState("idle");
|
|
1205
1250
|
};
|
|
1206
|
-
this.
|
|
1251
|
+
this._ttsWarmupPromise = this.tts?.warmup ? this.tts.warmup().catch((err) => {
|
|
1252
|
+
log7.warn("TTS warmup failed (non-fatal):", err);
|
|
1253
|
+
}) : Promise.resolve();
|
|
1254
|
+
this._llmWarmupPromise = this.llm.warmup ? this.llm.warmup(options.instructions).catch((err) => {
|
|
1255
|
+
log7.warn("LLM warmup failed (non-fatal):", err);
|
|
1256
|
+
}) : Promise.resolve();
|
|
1257
|
+
this._warmupPromise = Promise.all([this._ttsWarmupPromise, this._llmWarmupPromise]).then(() => {
|
|
1258
|
+
});
|
|
1207
1259
|
}
|
|
1208
|
-
/** One-shot warmup — safe to call from constructor, resolves when both LLM and TTS are ready. */
|
|
1209
1260
|
_warmupPromise;
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
if (this.llm.warmup) {
|
|
1213
|
-
tasks.push(
|
|
1214
|
-
this.llm.warmup(instructions).catch((err) => {
|
|
1215
|
-
log7.warn("LLM warmup failed:", err);
|
|
1216
|
-
})
|
|
1217
|
-
);
|
|
1218
|
-
}
|
|
1219
|
-
if (this.tts?.warmup) {
|
|
1220
|
-
tasks.push(
|
|
1221
|
-
this.tts.warmup().catch((err) => {
|
|
1222
|
-
log7.warn("TTS warmup failed:", err);
|
|
1223
|
-
})
|
|
1224
|
-
);
|
|
1225
|
-
}
|
|
1226
|
-
await Promise.all(tasks);
|
|
1227
|
-
}
|
|
1261
|
+
_ttsWarmupPromise;
|
|
1262
|
+
_llmWarmupPromise;
|
|
1228
1263
|
get processing() {
|
|
1229
1264
|
return this._processing;
|
|
1230
1265
|
}
|
|
@@ -1400,7 +1435,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1400
1435
|
segBuf.length = 0;
|
|
1401
1436
|
pushSentence(combined);
|
|
1402
1437
|
};
|
|
1403
|
-
const llmStream = this.llm.chat(messages, signal);
|
|
1438
|
+
const llmStream = this.llm.chat(messages, signal, { tools: this.tools });
|
|
1404
1439
|
try {
|
|
1405
1440
|
while (!signal.aborted) {
|
|
1406
1441
|
const { value: chunk, done } = await llmStream.next();
|
|
@@ -1429,6 +1464,9 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1429
1464
|
for (const sentence of sentences) {
|
|
1430
1465
|
pushSentence(sentence);
|
|
1431
1466
|
}
|
|
1467
|
+
} else if (chunk.type === "tool_call" && chunk.toolCall) {
|
|
1468
|
+
log7.info(`Tool call: ${chunk.toolCall.name}(${chunk.toolCall.arguments})`);
|
|
1469
|
+
this.emit("toolCall", chunk.toolCall);
|
|
1432
1470
|
}
|
|
1433
1471
|
}
|
|
1434
1472
|
} finally {
|
|
@@ -1451,29 +1489,55 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1451
1489
|
};
|
|
1452
1490
|
const consumer = async () => {
|
|
1453
1491
|
this.audioOutput.beginResponse();
|
|
1492
|
+
const state = { prefetched: null };
|
|
1454
1493
|
try {
|
|
1455
1494
|
while (true) {
|
|
1456
1495
|
if (signal.aborted) break;
|
|
1457
|
-
|
|
1458
|
-
|
|
1496
|
+
let sentence;
|
|
1497
|
+
let existingStream;
|
|
1498
|
+
if (state.prefetched) {
|
|
1499
|
+
sentence = state.prefetched.sentence;
|
|
1500
|
+
existingStream = state.prefetched.streamFn();
|
|
1501
|
+
state.prefetched = null;
|
|
1502
|
+
} else if (sentenceQueue.length > 0) {
|
|
1503
|
+
sentence = sentenceQueue.shift();
|
|
1459
1504
|
if (!/\w/.test(sentence)) {
|
|
1460
1505
|
log7.debug(`Skipping non-word sentence: "${sentence}"`);
|
|
1461
1506
|
continue;
|
|
1462
1507
|
}
|
|
1508
|
+
existingStream = void 0;
|
|
1509
|
+
} else if (producerDone) {
|
|
1510
|
+
break;
|
|
1511
|
+
} else {
|
|
1512
|
+
await new Promise((resolve) => {
|
|
1513
|
+
wakeConsumer = resolve;
|
|
1514
|
+
});
|
|
1515
|
+
wakeConsumer = null;
|
|
1516
|
+
continue;
|
|
1517
|
+
}
|
|
1518
|
+
const tryPrefetch = () => {
|
|
1519
|
+
if (state.prefetched || !this.tts) return;
|
|
1520
|
+
if (sentenceQueue.length > 0) {
|
|
1521
|
+
const next = sentenceQueue.shift();
|
|
1522
|
+
if (/\w/.test(next)) {
|
|
1523
|
+
state.prefetched = { sentence: next, streamFn: prefetchTTS(this.tts, next, signal) };
|
|
1524
|
+
}
|
|
1525
|
+
}
|
|
1526
|
+
};
|
|
1527
|
+
tryPrefetch();
|
|
1528
|
+
try {
|
|
1463
1529
|
await this.synthesizeAndPlay(sentence, signal, (t) => {
|
|
1464
1530
|
if (!tFirstAudioPlayed) {
|
|
1465
1531
|
tFirstAudioPlayed = t;
|
|
1466
1532
|
this.setAgentState("speaking");
|
|
1467
1533
|
}
|
|
1468
1534
|
this.emit("sentence", this.cleanText(sentence), sentence);
|
|
1469
|
-
|
|
1470
|
-
|
|
1535
|
+
tryPrefetch();
|
|
1536
|
+
}, existingStream);
|
|
1537
|
+
} catch (ttsErr) {
|
|
1538
|
+
if (ttsErr instanceof Error && ttsErr.name === "AbortError") throw ttsErr;
|
|
1539
|
+
log7.warn(`TTS error for sentence (skipping): "${sentence.slice(0, 40)}"`, ttsErr);
|
|
1471
1540
|
}
|
|
1472
|
-
if (producerDone) break;
|
|
1473
|
-
await new Promise((resolve) => {
|
|
1474
|
-
wakeConsumer = resolve;
|
|
1475
|
-
});
|
|
1476
|
-
wakeConsumer = null;
|
|
1477
1541
|
}
|
|
1478
1542
|
} finally {
|
|
1479
1543
|
if (!signal.aborted) {
|
|
@@ -1526,7 +1590,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1526
1590
|
return;
|
|
1527
1591
|
}
|
|
1528
1592
|
this._processing = true;
|
|
1529
|
-
await this.
|
|
1593
|
+
await this._ttsWarmupPromise;
|
|
1530
1594
|
log7.info(`say(): "${text.slice(0, 60)}"`);
|
|
1531
1595
|
try {
|
|
1532
1596
|
const signal = this.bargeIn.startCycle();
|
|
@@ -1563,7 +1627,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1563
1627
|
}
|
|
1564
1628
|
}
|
|
1565
1629
|
}
|
|
1566
|
-
async synthesizeAndPlay(text, signal, onFirstAudio) {
|
|
1630
|
+
async synthesizeAndPlay(text, signal, onFirstAudio, existingStream) {
|
|
1567
1631
|
if (!this.tts || signal.aborted) {
|
|
1568
1632
|
log7.info(`[Agent says]: ${text}`);
|
|
1569
1633
|
return;
|
|
@@ -1572,7 +1636,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1572
1636
|
const ttsStart = performance.now();
|
|
1573
1637
|
let firstChunk = true;
|
|
1574
1638
|
let ttsChunkCount = 0;
|
|
1575
|
-
const ttsStream = this.tts.synthesize(text, signal);
|
|
1639
|
+
const ttsStream = existingStream ?? this.tts.synthesize(text, signal);
|
|
1576
1640
|
const measuredStream = async function* () {
|
|
1577
1641
|
for await (const chunk of ttsStream) {
|
|
1578
1642
|
ttsChunkCount++;
|
|
@@ -1681,12 +1745,14 @@ var VoiceAgent = class extends import_events2.EventEmitter {
|
|
|
1681
1745
|
agentName: this.config.agentName,
|
|
1682
1746
|
nameVariants: this.config.nameVariants,
|
|
1683
1747
|
memory: this.memory ?? void 0,
|
|
1684
|
-
maxContextTokens: this.config.maxContextTokens
|
|
1748
|
+
maxContextTokens: this.config.maxContextTokens,
|
|
1749
|
+
tools: this.config.tools
|
|
1685
1750
|
});
|
|
1686
1751
|
this.pipeline.on("transcription", (result) => this.emit("transcription", result));
|
|
1687
1752
|
this.pipeline.on("sentence", (text, raw) => this.emit("sentence", text, raw));
|
|
1688
1753
|
this.pipeline.on("response", (text) => this.emit("response", text));
|
|
1689
1754
|
this.pipeline.on("agentState", (state) => this.emit("agentState", state));
|
|
1755
|
+
this.pipeline.on("toolCall", (tc) => this.emit("toolCall", tc));
|
|
1690
1756
|
this.pipeline.on("error", (error) => this.emit("error", error));
|
|
1691
1757
|
for (const participant of this.connection.room.remoteParticipants.values()) {
|
|
1692
1758
|
for (const [, pub] of participant.trackPublications) {
|