@dtelecom/agents-js 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,8 +1,8 @@
1
1
  import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
2
2
  import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
3
3
  import { EventEmitter } from 'events';
4
- import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BBKtiPvm.mjs';
5
- export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-BBKtiPvm.mjs';
4
+ import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BJylZd8Q.mjs';
5
+ export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-BJylZd8Q.mjs';
6
6
 
7
7
  declare class VoiceAgent extends EventEmitter {
8
8
  private readonly config;
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
111
111
  private readonly nameVariants;
112
112
  private readonly beforeRespond?;
113
113
  private readonly memory?;
114
+ private readonly tools?;
114
115
  /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
115
116
  private cleanText;
116
117
  /** Active STT streams, keyed by participant identity */
@@ -124,6 +125,7 @@ declare class Pipeline extends EventEmitter {
124
125
  private readonly _warmupPromise;
125
126
  private readonly _ttsWarmupPromise;
126
127
  private readonly _llmWarmupPromise;
128
+ private readonly _audioReadyPromise;
127
129
  get processing(): boolean;
128
130
  get running(): boolean;
129
131
  get agentState(): AgentState;
package/dist/index.d.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
2
2
  import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
3
3
  import { EventEmitter } from 'events';
4
- import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BBKtiPvm.js';
5
- export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-BBKtiPvm.js';
4
+ import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BJylZd8Q.js';
5
+ export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-BJylZd8Q.js';
6
6
 
7
7
  declare class VoiceAgent extends EventEmitter {
8
8
  private readonly config;
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
111
111
  private readonly nameVariants;
112
112
  private readonly beforeRespond?;
113
113
  private readonly memory?;
114
+ private readonly tools?;
114
115
  /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
115
116
  private cleanText;
116
117
  /** Active STT streams, keyed by participant identity */
@@ -124,6 +125,7 @@ declare class Pipeline extends EventEmitter {
124
125
  private readonly _warmupPromise;
125
126
  private readonly _ttsWarmupPromise;
126
127
  private readonly _llmWarmupPromise;
128
+ private readonly _audioReadyPromise;
127
129
  get processing(): boolean;
128
130
  get running(): boolean;
129
131
  get agentState(): AgentState;
package/dist/index.js CHANGED
@@ -682,11 +682,21 @@ var AudioOutput = class {
682
682
  _responding = false;
683
683
  _stopped = false;
684
684
  silenceInterval = null;
685
+ /** Resolves when the RTP transport is ready and initial silence has been sent. */
686
+ whenReady;
687
+ _resolveReady;
685
688
  /** When set, raw PCM from TTS is saved to this directory as WAV files for debugging. */
686
689
  dumpDir = null;
687
690
  dumpCounter = 0;
688
691
  constructor(source) {
689
692
  this.source = source;
693
+ if (source.ready) {
694
+ this.whenReady = Promise.resolve();
695
+ } else {
696
+ this.whenReady = new Promise((resolve) => {
697
+ this._resolveReady = resolve;
698
+ });
699
+ }
690
700
  }
691
701
  get playing() {
692
702
  return this._playing;
@@ -714,8 +724,11 @@ var AudioOutput = class {
714
724
  startSilence() {
715
725
  if (this.silenceInterval) return;
716
726
  const startKeepalive = () => {
717
- log3.debug("Transport ready \u2014 sending initial silence + starting 3s keepalive");
718
- this.sendSilenceFrame();
727
+ log3.debug("Transport ready \u2014 sending initial silence burst + starting 3s keepalive");
728
+ for (let i = 0; i < 15; i++) {
729
+ this.sendSilenceFrame();
730
+ }
731
+ this._resolveReady?.();
719
732
  this.silenceInterval = setInterval(() => {
720
733
  if (!this._playing && !this._responding && !this._stopped) {
721
734
  this.sendSilenceFrame();
@@ -1208,6 +1221,7 @@ var Pipeline = class extends import_events.EventEmitter {
1208
1221
  nameVariants;
1209
1222
  beforeRespond;
1210
1223
  memory;
1224
+ tools;
1211
1225
  /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
1212
1226
  cleanText(text) {
1213
1227
  return this.tts?.cleanText ? this.tts.cleanText(text) : text;
@@ -1230,6 +1244,7 @@ var Pipeline = class extends import_events.EventEmitter {
1230
1244
  this.nameVariants = (options.nameVariants ?? []).map((n) => n.toLowerCase());
1231
1245
  this.beforeRespond = options.beforeRespond;
1232
1246
  this.memory = options.memory;
1247
+ this.tools = options.tools;
1233
1248
  this.context = new ContextManager({
1234
1249
  instructions: options.instructions,
1235
1250
  maxContextTokens: options.maxContextTokens
@@ -1252,12 +1267,14 @@ var Pipeline = class extends import_events.EventEmitter {
1252
1267
  this._llmWarmupPromise = this.llm.warmup ? this.llm.warmup(options.instructions).catch((err) => {
1253
1268
  log7.warn("LLM warmup failed (non-fatal):", err);
1254
1269
  }) : Promise.resolve();
1255
- this._warmupPromise = Promise.all([this._ttsWarmupPromise, this._llmWarmupPromise]).then(() => {
1270
+ this._audioReadyPromise = this.audioOutput.whenReady;
1271
+ this._warmupPromise = Promise.all([this._ttsWarmupPromise, this._llmWarmupPromise, this._audioReadyPromise]).then(() => {
1256
1272
  });
1257
1273
  }
1258
1274
  _warmupPromise;
1259
1275
  _ttsWarmupPromise;
1260
1276
  _llmWarmupPromise;
1277
+ _audioReadyPromise;
1261
1278
  get processing() {
1262
1279
  return this._processing;
1263
1280
  }
@@ -1415,6 +1432,7 @@ var Pipeline = class extends import_events.EventEmitter {
1415
1432
  wake();
1416
1433
  };
1417
1434
  const MAX_LLM_RETRIES = 2;
1435
+ let toolCallEmitted = false;
1418
1436
  const producer = async () => {
1419
1437
  const defaultLang = this.tts?.defaultLanguage;
1420
1438
  for (let attempt = 0; attempt <= MAX_LLM_RETRIES; attempt++) {
@@ -1433,7 +1451,7 @@ var Pipeline = class extends import_events.EventEmitter {
1433
1451
  segBuf.length = 0;
1434
1452
  pushSentence(combined);
1435
1453
  };
1436
- const llmStream = this.llm.chat(messages, signal);
1454
+ const llmStream = this.llm.chat(messages, signal, { tools: this.tools });
1437
1455
  try {
1438
1456
  while (!signal.aborted) {
1439
1457
  const { value: chunk, done } = await llmStream.next();
@@ -1462,6 +1480,10 @@ var Pipeline = class extends import_events.EventEmitter {
1462
1480
  for (const sentence of sentences) {
1463
1481
  pushSentence(sentence);
1464
1482
  }
1483
+ } else if (chunk.type === "tool_call" && chunk.toolCall) {
1484
+ log7.info(`Tool call: ${chunk.toolCall.name}(${chunk.toolCall.arguments})`);
1485
+ toolCallEmitted = true;
1486
+ this.emit("toolCall", chunk.toolCall);
1465
1487
  }
1466
1488
  }
1467
1489
  } finally {
@@ -1473,7 +1495,7 @@ var Pipeline = class extends import_events.EventEmitter {
1473
1495
  if (remaining) {
1474
1496
  pushSentence(remaining);
1475
1497
  }
1476
- if (fullResponse.trim()) {
1498
+ if (fullResponse.trim() || toolCallEmitted) {
1477
1499
  break;
1478
1500
  }
1479
1501
  log7.warn(`LLM produced no output (attempt ${attempt + 1}/${MAX_LLM_RETRIES + 1})`);
@@ -1520,14 +1542,19 @@ var Pipeline = class extends import_events.EventEmitter {
1520
1542
  }
1521
1543
  };
1522
1544
  tryPrefetch();
1523
- await this.synthesizeAndPlay(sentence, signal, (t) => {
1524
- if (!tFirstAudioPlayed) {
1525
- tFirstAudioPlayed = t;
1526
- this.setAgentState("speaking");
1527
- }
1528
- this.emit("sentence", this.cleanText(sentence), sentence);
1529
- tryPrefetch();
1530
- }, existingStream);
1545
+ try {
1546
+ await this.synthesizeAndPlay(sentence, signal, (t) => {
1547
+ if (!tFirstAudioPlayed) {
1548
+ tFirstAudioPlayed = t;
1549
+ this.setAgentState("speaking");
1550
+ }
1551
+ this.emit("sentence", this.cleanText(sentence), sentence);
1552
+ tryPrefetch();
1553
+ }, existingStream);
1554
+ } catch (ttsErr) {
1555
+ if (ttsErr instanceof Error && ttsErr.name === "AbortError") throw ttsErr;
1556
+ log7.warn(`TTS error for sentence (skipping): "${sentence.slice(0, 40)}"`, ttsErr);
1557
+ }
1531
1558
  }
1532
1559
  } finally {
1533
1560
  if (!signal.aborted) {
@@ -1580,7 +1607,7 @@ var Pipeline = class extends import_events.EventEmitter {
1580
1607
  return;
1581
1608
  }
1582
1609
  this._processing = true;
1583
- await this._ttsWarmupPromise;
1610
+ await Promise.all([this._ttsWarmupPromise, this._audioReadyPromise]);
1584
1611
  log7.info(`say(): "${text.slice(0, 60)}"`);
1585
1612
  try {
1586
1613
  const signal = this.bargeIn.startCycle();
@@ -1735,12 +1762,14 @@ var VoiceAgent = class extends import_events2.EventEmitter {
1735
1762
  agentName: this.config.agentName,
1736
1763
  nameVariants: this.config.nameVariants,
1737
1764
  memory: this.memory ?? void 0,
1738
- maxContextTokens: this.config.maxContextTokens
1765
+ maxContextTokens: this.config.maxContextTokens,
1766
+ tools: this.config.tools
1739
1767
  });
1740
1768
  this.pipeline.on("transcription", (result) => this.emit("transcription", result));
1741
1769
  this.pipeline.on("sentence", (text, raw) => this.emit("sentence", text, raw));
1742
1770
  this.pipeline.on("response", (text) => this.emit("response", text));
1743
1771
  this.pipeline.on("agentState", (state) => this.emit("agentState", state));
1772
+ this.pipeline.on("toolCall", (tc) => this.emit("toolCall", tc));
1744
1773
  this.pipeline.on("error", (error) => this.emit("error", error));
1745
1774
  for (const participant of this.connection.room.remoteParticipants.values()) {
1746
1775
  for (const [, pub] of participant.trackPublications) {