@dtelecom/agents-js 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,8 +1,8 @@
1
1
  import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
2
2
  import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
3
3
  import { EventEmitter } from 'events';
4
- import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-EvtHMokR.mjs';
5
- export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChunk, f as MemoryConfig, g as PipelineEvents, R as RespondMode, h as STTPlugin, i as STTStreamOptions, j as TTSPlugin } from './types-EvtHMokR.mjs';
4
+ import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Bfz5YQwd.mjs';
5
+ export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChunk, f as MemoryConfig, g as PipelineEvents, R as RespondMode, h as STTPlugin, i as STTStreamOptions, j as TTSPlugin } from './types-Bfz5YQwd.mjs';
6
6
 
7
7
  declare class VoiceAgent extends EventEmitter {
8
8
  private readonly config;
@@ -103,6 +103,8 @@ declare class Pipeline extends EventEmitter {
103
103
  private readonly nameVariants;
104
104
  private readonly beforeRespond?;
105
105
  private readonly memory?;
106
+ /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
107
+ private cleanText;
106
108
  /** Active STT streams, keyed by participant identity */
107
109
  private sttStreams;
108
110
  private _processing;
package/dist/index.d.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
2
2
  import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
3
3
  import { EventEmitter } from 'events';
4
- import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-EvtHMokR.js';
5
- export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChunk, f as MemoryConfig, g as PipelineEvents, R as RespondMode, h as STTPlugin, i as STTStreamOptions, j as TTSPlugin } from './types-EvtHMokR.js';
4
+ import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Bfz5YQwd.js';
5
+ export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChunk, f as MemoryConfig, g as PipelineEvents, R as RespondMode, h as STTPlugin, i as STTStreamOptions, j as TTSPlugin } from './types-Bfz5YQwd.js';
6
6
 
7
7
  declare class VoiceAgent extends EventEmitter {
8
8
  private readonly config;
@@ -103,6 +103,8 @@ declare class Pipeline extends EventEmitter {
103
103
  private readonly nameVariants;
104
104
  private readonly beforeRespond?;
105
105
  private readonly memory?;
106
+ /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
107
+ private cleanText;
106
108
  /** Active STT streams, keyed by participant identity */
107
109
  private sttStreams;
108
110
  private _processing;
package/dist/index.js CHANGED
@@ -698,20 +698,33 @@ var AudioOutput = class {
698
698
  * Start sparse silence keepalive to prevent the SFU from dropping the track.
699
699
  * With Opus DTX enabled, the encoder handles silence natively — we only need
700
700
  * an occasional packet to keep the SSRC alive.
701
+ *
702
+ * Waits for the RTP transport to be ready before sending — no frames are
703
+ * wasted before DTLS is connected.
701
704
  */
702
705
  startSilence() {
703
706
  if (this.silenceInterval) return;
704
- log3.debug("Starting silence keepalive (sparse, 3s interval)");
705
- const immediate = new import_server_sdk_node2.AudioFrame(SILENCE, SAMPLE_RATE, CHANNELS, SAMPLES_PER_FRAME);
706
- this.source.captureFrame(immediate).catch(() => {
707
+ const startKeepalive = () => {
708
+ log3.debug("Transport ready \u2014 sending initial silence + starting 3s keepalive");
709
+ this.sendSilenceFrame();
710
+ this.silenceInterval = setInterval(() => {
711
+ if (!this._playing && !this._responding && !this._stopped) {
712
+ this.sendSilenceFrame();
713
+ }
714
+ }, 3e3);
715
+ };
716
+ if (this.source.ready) {
717
+ startKeepalive();
718
+ } else {
719
+ log3.debug("Waiting for transport before starting silence keepalive...");
720
+ this.source.onReady = () => startKeepalive();
721
+ }
722
+ }
723
+ sendSilenceFrame() {
724
+ const frame = new import_server_sdk_node2.AudioFrame(SILENCE, SAMPLE_RATE, CHANNELS, SAMPLES_PER_FRAME);
725
+ this.source.captureFrame(frame).catch((err) => {
726
+ log3.warn("Failed to send silence frame:", err);
707
727
  });
708
- this.silenceInterval = setInterval(() => {
709
- if (!this._playing && !this._responding && !this._stopped) {
710
- const f = new import_server_sdk_node2.AudioFrame(SILENCE, SAMPLE_RATE, CHANNELS, SAMPLES_PER_FRAME);
711
- this.source.captureFrame(f).catch(() => {
712
- });
713
- }
714
- }, 3e3);
715
728
  }
716
729
  /**
717
730
  * Write a PCM16 buffer to the audio output.
@@ -1143,6 +1156,10 @@ var Pipeline = class extends import_events.EventEmitter {
1143
1156
  nameVariants;
1144
1157
  beforeRespond;
1145
1158
  memory;
1159
+ /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
1160
+ cleanText(text) {
1161
+ return this.tts?.cleanText ? this.tts.cleanText(text) : text;
1162
+ }
1146
1163
  /** Active STT streams, keyed by participant identity */
1147
1164
  sttStreams = /* @__PURE__ */ new Map();
1148
1165
  _processing = false;
@@ -1396,7 +1413,7 @@ var Pipeline = class extends import_events.EventEmitter {
1396
1413
  tFirstAudioPlayed = t;
1397
1414
  this.setAgentState("speaking");
1398
1415
  }
1399
- this.emit("sentence", sentence);
1416
+ this.emit("sentence", this.cleanText(sentence));
1400
1417
  });
1401
1418
  continue;
1402
1419
  }
@@ -1424,7 +1441,7 @@ var Pipeline = class extends import_events.EventEmitter {
1424
1441
  if (fullResponse.trim()) {
1425
1442
  this.context.addAgentTurn(fullResponse.trim());
1426
1443
  this.memory?.storeTurn("assistant", fullResponse.trim(), true);
1427
- this.emit("response", fullResponse.trim());
1444
+ this.emit("response", this.cleanText(fullResponse.trim()));
1428
1445
  }
1429
1446
  await sleep2(AUDIO_DRAIN_MS);
1430
1447
  this.setAgentState("idle");
@@ -1465,13 +1482,13 @@ var Pipeline = class extends import_events.EventEmitter {
1465
1482
  this.setAgentState("thinking");
1466
1483
  await this.synthesizeAndPlay(text, signal, () => {
1467
1484
  this.setAgentState("speaking");
1468
- this.emit("sentence", text);
1485
+ this.emit("sentence", this.cleanText(text));
1469
1486
  });
1470
1487
  if (!signal.aborted) {
1471
1488
  await this.audioOutput.writeSilence(40);
1472
1489
  this.context.addAgentTurn(text);
1473
1490
  this.memory?.storeTurn("assistant", text, true);
1474
- this.emit("response", text);
1491
+ this.emit("response", this.cleanText(text));
1475
1492
  }
1476
1493
  await sleep2(AUDIO_DRAIN_MS);
1477
1494
  this.setAgentState("idle");