@dtelecom/agents-js 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,8 +1,8 @@
1
1
  import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
2
2
  import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
3
3
  import { EventEmitter } from 'events';
4
- import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BBKtiPvm.mjs';
5
- export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-BBKtiPvm.mjs';
4
+ import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Di_jxIgs.mjs';
5
+ export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-Di_jxIgs.mjs';
6
6
 
7
7
  declare class VoiceAgent extends EventEmitter {
8
8
  private readonly config;
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
111
111
  private readonly nameVariants;
112
112
  private readonly beforeRespond?;
113
113
  private readonly memory?;
114
+ private readonly tools?;
114
115
  /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
115
116
  private cleanText;
116
117
  /** Active STT streams, keyed by participant identity */
package/dist/index.d.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
2
2
  import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
3
3
  import { EventEmitter } from 'events';
4
- import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BBKtiPvm.js';
5
- export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-BBKtiPvm.js';
4
+ import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Di_jxIgs.js';
5
+ export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-Di_jxIgs.js';
6
6
 
7
7
  declare class VoiceAgent extends EventEmitter {
8
8
  private readonly config;
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
111
111
  private readonly nameVariants;
112
112
  private readonly beforeRespond?;
113
113
  private readonly memory?;
114
+ private readonly tools?;
114
115
  /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
115
116
  private cleanText;
116
117
  /** Active STT streams, keyed by participant identity */
package/dist/index.js CHANGED
@@ -1208,6 +1208,7 @@ var Pipeline = class extends import_events.EventEmitter {
1208
1208
  nameVariants;
1209
1209
  beforeRespond;
1210
1210
  memory;
1211
+ tools;
1211
1212
  /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
1212
1213
  cleanText(text) {
1213
1214
  return this.tts?.cleanText ? this.tts.cleanText(text) : text;
@@ -1230,6 +1231,7 @@ var Pipeline = class extends import_events.EventEmitter {
1230
1231
  this.nameVariants = (options.nameVariants ?? []).map((n) => n.toLowerCase());
1231
1232
  this.beforeRespond = options.beforeRespond;
1232
1233
  this.memory = options.memory;
1234
+ this.tools = options.tools;
1233
1235
  this.context = new ContextManager({
1234
1236
  instructions: options.instructions,
1235
1237
  maxContextTokens: options.maxContextTokens
@@ -1433,7 +1435,7 @@ var Pipeline = class extends import_events.EventEmitter {
1433
1435
  segBuf.length = 0;
1434
1436
  pushSentence(combined);
1435
1437
  };
1436
- const llmStream = this.llm.chat(messages, signal);
1438
+ const llmStream = this.llm.chat(messages, signal, { tools: this.tools });
1437
1439
  try {
1438
1440
  while (!signal.aborted) {
1439
1441
  const { value: chunk, done } = await llmStream.next();
@@ -1462,6 +1464,9 @@ var Pipeline = class extends import_events.EventEmitter {
1462
1464
  for (const sentence of sentences) {
1463
1465
  pushSentence(sentence);
1464
1466
  }
1467
+ } else if (chunk.type === "tool_call" && chunk.toolCall) {
1468
+ log7.info(`Tool call: ${chunk.toolCall.name}(${chunk.toolCall.arguments})`);
1469
+ this.emit("toolCall", chunk.toolCall);
1465
1470
  }
1466
1471
  }
1467
1472
  } finally {
@@ -1520,14 +1525,19 @@ var Pipeline = class extends import_events.EventEmitter {
1520
1525
  }
1521
1526
  };
1522
1527
  tryPrefetch();
1523
- await this.synthesizeAndPlay(sentence, signal, (t) => {
1524
- if (!tFirstAudioPlayed) {
1525
- tFirstAudioPlayed = t;
1526
- this.setAgentState("speaking");
1527
- }
1528
- this.emit("sentence", this.cleanText(sentence), sentence);
1529
- tryPrefetch();
1530
- }, existingStream);
1528
+ try {
1529
+ await this.synthesizeAndPlay(sentence, signal, (t) => {
1530
+ if (!tFirstAudioPlayed) {
1531
+ tFirstAudioPlayed = t;
1532
+ this.setAgentState("speaking");
1533
+ }
1534
+ this.emit("sentence", this.cleanText(sentence), sentence);
1535
+ tryPrefetch();
1536
+ }, existingStream);
1537
+ } catch (ttsErr) {
1538
+ if (ttsErr instanceof Error && ttsErr.name === "AbortError") throw ttsErr;
1539
+ log7.warn(`TTS error for sentence (skipping): "${sentence.slice(0, 40)}"`, ttsErr);
1540
+ }
1531
1541
  }
1532
1542
  } finally {
1533
1543
  if (!signal.aborted) {
@@ -1735,12 +1745,14 @@ var VoiceAgent = class extends import_events2.EventEmitter {
1735
1745
  agentName: this.config.agentName,
1736
1746
  nameVariants: this.config.nameVariants,
1737
1747
  memory: this.memory ?? void 0,
1738
- maxContextTokens: this.config.maxContextTokens
1748
+ maxContextTokens: this.config.maxContextTokens,
1749
+ tools: this.config.tools
1739
1750
  });
1740
1751
  this.pipeline.on("transcription", (result) => this.emit("transcription", result));
1741
1752
  this.pipeline.on("sentence", (text, raw) => this.emit("sentence", text, raw));
1742
1753
  this.pipeline.on("response", (text) => this.emit("response", text));
1743
1754
  this.pipeline.on("agentState", (state) => this.emit("agentState", state));
1755
+ this.pipeline.on("toolCall", (tc) => this.emit("toolCall", tc));
1744
1756
  this.pipeline.on("error", (error) => this.emit("error", error));
1745
1757
  for (const participant of this.connection.room.remoteParticipants.values()) {
1746
1758
  for (const [, pub] of participant.trackPublications) {