@dtelecom/agents-js 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +4 -2
- package/dist/index.d.ts +4 -2
- package/dist/index.js +44 -15
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +44 -15
- package/dist/index.mjs.map +1 -1
- package/dist/memory/index.d.mts +1 -1
- package/dist/memory/index.d.ts +1 -1
- package/dist/providers/index.d.mts +1 -1
- package/dist/providers/index.d.ts +1 -1
- package/dist/providers/index.js +42 -0
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/index.mjs +42 -0
- package/dist/providers/index.mjs.map +1 -1
- package/dist/{types-BBKtiPvm.d.mts → types-BJylZd8Q.d.mts} +28 -5
- package/dist/{types-BBKtiPvm.d.ts → types-BJylZd8Q.d.ts} +28 -5
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
|
|
2
2
|
import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
|
|
3
3
|
import { EventEmitter } from 'events';
|
|
4
|
-
import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-
|
|
5
|
-
export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-
|
|
4
|
+
import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BJylZd8Q.mjs';
|
|
5
|
+
export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-BJylZd8Q.mjs';
|
|
6
6
|
|
|
7
7
|
declare class VoiceAgent extends EventEmitter {
|
|
8
8
|
private readonly config;
|
|
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
|
|
|
111
111
|
private readonly nameVariants;
|
|
112
112
|
private readonly beforeRespond?;
|
|
113
113
|
private readonly memory?;
|
|
114
|
+
private readonly tools?;
|
|
114
115
|
/** Strip provider-specific markup (e.g. SSML lang tags) for display. */
|
|
115
116
|
private cleanText;
|
|
116
117
|
/** Active STT streams, keyed by participant identity */
|
|
@@ -124,6 +125,7 @@ declare class Pipeline extends EventEmitter {
|
|
|
124
125
|
private readonly _warmupPromise;
|
|
125
126
|
private readonly _ttsWarmupPromise;
|
|
126
127
|
private readonly _llmWarmupPromise;
|
|
128
|
+
private readonly _audioReadyPromise;
|
|
127
129
|
get processing(): boolean;
|
|
128
130
|
get running(): boolean;
|
|
129
131
|
get agentState(): AgentState;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
|
|
2
2
|
import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
|
|
3
3
|
import { EventEmitter } from 'events';
|
|
4
|
-
import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-
|
|
5
|
-
export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-
|
|
4
|
+
import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BJylZd8Q.js';
|
|
5
|
+
export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-BJylZd8Q.js';
|
|
6
6
|
|
|
7
7
|
declare class VoiceAgent extends EventEmitter {
|
|
8
8
|
private readonly config;
|
|
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
|
|
|
111
111
|
private readonly nameVariants;
|
|
112
112
|
private readonly beforeRespond?;
|
|
113
113
|
private readonly memory?;
|
|
114
|
+
private readonly tools?;
|
|
114
115
|
/** Strip provider-specific markup (e.g. SSML lang tags) for display. */
|
|
115
116
|
private cleanText;
|
|
116
117
|
/** Active STT streams, keyed by participant identity */
|
|
@@ -124,6 +125,7 @@ declare class Pipeline extends EventEmitter {
|
|
|
124
125
|
private readonly _warmupPromise;
|
|
125
126
|
private readonly _ttsWarmupPromise;
|
|
126
127
|
private readonly _llmWarmupPromise;
|
|
128
|
+
private readonly _audioReadyPromise;
|
|
127
129
|
get processing(): boolean;
|
|
128
130
|
get running(): boolean;
|
|
129
131
|
get agentState(): AgentState;
|
package/dist/index.js
CHANGED
|
@@ -682,11 +682,21 @@ var AudioOutput = class {
|
|
|
682
682
|
_responding = false;
|
|
683
683
|
_stopped = false;
|
|
684
684
|
silenceInterval = null;
|
|
685
|
+
/** Resolves when the RTP transport is ready and initial silence has been sent. */
|
|
686
|
+
whenReady;
|
|
687
|
+
_resolveReady;
|
|
685
688
|
/** When set, raw PCM from TTS is saved to this directory as WAV files for debugging. */
|
|
686
689
|
dumpDir = null;
|
|
687
690
|
dumpCounter = 0;
|
|
688
691
|
constructor(source) {
|
|
689
692
|
this.source = source;
|
|
693
|
+
if (source.ready) {
|
|
694
|
+
this.whenReady = Promise.resolve();
|
|
695
|
+
} else {
|
|
696
|
+
this.whenReady = new Promise((resolve) => {
|
|
697
|
+
this._resolveReady = resolve;
|
|
698
|
+
});
|
|
699
|
+
}
|
|
690
700
|
}
|
|
691
701
|
get playing() {
|
|
692
702
|
return this._playing;
|
|
@@ -714,8 +724,11 @@ var AudioOutput = class {
|
|
|
714
724
|
startSilence() {
|
|
715
725
|
if (this.silenceInterval) return;
|
|
716
726
|
const startKeepalive = () => {
|
|
717
|
-
log3.debug("Transport ready \u2014 sending initial silence + starting 3s keepalive");
|
|
718
|
-
|
|
727
|
+
log3.debug("Transport ready \u2014 sending initial silence burst + starting 3s keepalive");
|
|
728
|
+
for (let i = 0; i < 15; i++) {
|
|
729
|
+
this.sendSilenceFrame();
|
|
730
|
+
}
|
|
731
|
+
this._resolveReady?.();
|
|
719
732
|
this.silenceInterval = setInterval(() => {
|
|
720
733
|
if (!this._playing && !this._responding && !this._stopped) {
|
|
721
734
|
this.sendSilenceFrame();
|
|
@@ -1208,6 +1221,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1208
1221
|
nameVariants;
|
|
1209
1222
|
beforeRespond;
|
|
1210
1223
|
memory;
|
|
1224
|
+
tools;
|
|
1211
1225
|
/** Strip provider-specific markup (e.g. SSML lang tags) for display. */
|
|
1212
1226
|
cleanText(text) {
|
|
1213
1227
|
return this.tts?.cleanText ? this.tts.cleanText(text) : text;
|
|
@@ -1230,6 +1244,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1230
1244
|
this.nameVariants = (options.nameVariants ?? []).map((n) => n.toLowerCase());
|
|
1231
1245
|
this.beforeRespond = options.beforeRespond;
|
|
1232
1246
|
this.memory = options.memory;
|
|
1247
|
+
this.tools = options.tools;
|
|
1233
1248
|
this.context = new ContextManager({
|
|
1234
1249
|
instructions: options.instructions,
|
|
1235
1250
|
maxContextTokens: options.maxContextTokens
|
|
@@ -1252,12 +1267,14 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1252
1267
|
this._llmWarmupPromise = this.llm.warmup ? this.llm.warmup(options.instructions).catch((err) => {
|
|
1253
1268
|
log7.warn("LLM warmup failed (non-fatal):", err);
|
|
1254
1269
|
}) : Promise.resolve();
|
|
1255
|
-
this.
|
|
1270
|
+
this._audioReadyPromise = this.audioOutput.whenReady;
|
|
1271
|
+
this._warmupPromise = Promise.all([this._ttsWarmupPromise, this._llmWarmupPromise, this._audioReadyPromise]).then(() => {
|
|
1256
1272
|
});
|
|
1257
1273
|
}
|
|
1258
1274
|
_warmupPromise;
|
|
1259
1275
|
_ttsWarmupPromise;
|
|
1260
1276
|
_llmWarmupPromise;
|
|
1277
|
+
_audioReadyPromise;
|
|
1261
1278
|
get processing() {
|
|
1262
1279
|
return this._processing;
|
|
1263
1280
|
}
|
|
@@ -1415,6 +1432,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1415
1432
|
wake();
|
|
1416
1433
|
};
|
|
1417
1434
|
const MAX_LLM_RETRIES = 2;
|
|
1435
|
+
let toolCallEmitted = false;
|
|
1418
1436
|
const producer = async () => {
|
|
1419
1437
|
const defaultLang = this.tts?.defaultLanguage;
|
|
1420
1438
|
for (let attempt = 0; attempt <= MAX_LLM_RETRIES; attempt++) {
|
|
@@ -1433,7 +1451,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1433
1451
|
segBuf.length = 0;
|
|
1434
1452
|
pushSentence(combined);
|
|
1435
1453
|
};
|
|
1436
|
-
const llmStream = this.llm.chat(messages, signal);
|
|
1454
|
+
const llmStream = this.llm.chat(messages, signal, { tools: this.tools });
|
|
1437
1455
|
try {
|
|
1438
1456
|
while (!signal.aborted) {
|
|
1439
1457
|
const { value: chunk, done } = await llmStream.next();
|
|
@@ -1462,6 +1480,10 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1462
1480
|
for (const sentence of sentences) {
|
|
1463
1481
|
pushSentence(sentence);
|
|
1464
1482
|
}
|
|
1483
|
+
} else if (chunk.type === "tool_call" && chunk.toolCall) {
|
|
1484
|
+
log7.info(`Tool call: ${chunk.toolCall.name}(${chunk.toolCall.arguments})`);
|
|
1485
|
+
toolCallEmitted = true;
|
|
1486
|
+
this.emit("toolCall", chunk.toolCall);
|
|
1465
1487
|
}
|
|
1466
1488
|
}
|
|
1467
1489
|
} finally {
|
|
@@ -1473,7 +1495,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1473
1495
|
if (remaining) {
|
|
1474
1496
|
pushSentence(remaining);
|
|
1475
1497
|
}
|
|
1476
|
-
if (fullResponse.trim()) {
|
|
1498
|
+
if (fullResponse.trim() || toolCallEmitted) {
|
|
1477
1499
|
break;
|
|
1478
1500
|
}
|
|
1479
1501
|
log7.warn(`LLM produced no output (attempt ${attempt + 1}/${MAX_LLM_RETRIES + 1})`);
|
|
@@ -1520,14 +1542,19 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1520
1542
|
}
|
|
1521
1543
|
};
|
|
1522
1544
|
tryPrefetch();
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
tFirstAudioPlayed
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1545
|
+
try {
|
|
1546
|
+
await this.synthesizeAndPlay(sentence, signal, (t) => {
|
|
1547
|
+
if (!tFirstAudioPlayed) {
|
|
1548
|
+
tFirstAudioPlayed = t;
|
|
1549
|
+
this.setAgentState("speaking");
|
|
1550
|
+
}
|
|
1551
|
+
this.emit("sentence", this.cleanText(sentence), sentence);
|
|
1552
|
+
tryPrefetch();
|
|
1553
|
+
}, existingStream);
|
|
1554
|
+
} catch (ttsErr) {
|
|
1555
|
+
if (ttsErr instanceof Error && ttsErr.name === "AbortError") throw ttsErr;
|
|
1556
|
+
log7.warn(`TTS error for sentence (skipping): "${sentence.slice(0, 40)}"`, ttsErr);
|
|
1557
|
+
}
|
|
1531
1558
|
}
|
|
1532
1559
|
} finally {
|
|
1533
1560
|
if (!signal.aborted) {
|
|
@@ -1580,7 +1607,7 @@ var Pipeline = class extends import_events.EventEmitter {
|
|
|
1580
1607
|
return;
|
|
1581
1608
|
}
|
|
1582
1609
|
this._processing = true;
|
|
1583
|
-
await this._ttsWarmupPromise;
|
|
1610
|
+
await Promise.all([this._ttsWarmupPromise, this._audioReadyPromise]);
|
|
1584
1611
|
log7.info(`say(): "${text.slice(0, 60)}"`);
|
|
1585
1612
|
try {
|
|
1586
1613
|
const signal = this.bargeIn.startCycle();
|
|
@@ -1735,12 +1762,14 @@ var VoiceAgent = class extends import_events2.EventEmitter {
|
|
|
1735
1762
|
agentName: this.config.agentName,
|
|
1736
1763
|
nameVariants: this.config.nameVariants,
|
|
1737
1764
|
memory: this.memory ?? void 0,
|
|
1738
|
-
maxContextTokens: this.config.maxContextTokens
|
|
1765
|
+
maxContextTokens: this.config.maxContextTokens,
|
|
1766
|
+
tools: this.config.tools
|
|
1739
1767
|
});
|
|
1740
1768
|
this.pipeline.on("transcription", (result) => this.emit("transcription", result));
|
|
1741
1769
|
this.pipeline.on("sentence", (text, raw) => this.emit("sentence", text, raw));
|
|
1742
1770
|
this.pipeline.on("response", (text) => this.emit("response", text));
|
|
1743
1771
|
this.pipeline.on("agentState", (state) => this.emit("agentState", state));
|
|
1772
|
+
this.pipeline.on("toolCall", (tc) => this.emit("toolCall", tc));
|
|
1744
1773
|
this.pipeline.on("error", (error) => this.emit("error", error));
|
|
1745
1774
|
for (const participant of this.connection.room.remoteParticipants.values()) {
|
|
1746
1775
|
for (const [, pub] of participant.trackPublications) {
|