@dtelecom/agents-js 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +6 -5
- package/dist/index.d.ts +6 -5
- package/dist/index.js +100 -34
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +100 -34
- package/dist/index.mjs.map +1 -1
- package/dist/memory/index.d.mts +1 -1
- package/dist/memory/index.d.ts +1 -1
- package/dist/providers/index.d.mts +1 -1
- package/dist/providers/index.d.ts +1 -1
- package/dist/providers/index.js +42 -0
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/index.mjs +42 -0
- package/dist/providers/index.mjs.map +1 -1
- package/dist/{types-BBKtiPvm.d.mts → types-Di_jxIgs.d.mts} +25 -5
- package/dist/{types-BBKtiPvm.d.ts → types-Di_jxIgs.d.ts} +25 -5
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -621,6 +621,49 @@ var AUDIO_DRAIN_MS = 800;
|
|
|
621
621
|
function sleep2(ms) {
|
|
622
622
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
623
623
|
}
|
|
624
|
+
function prefetchTTS(tts, text, signal) {
|
|
625
|
+
const buffer = [];
|
|
626
|
+
let done = false;
|
|
627
|
+
let error = null;
|
|
628
|
+
let wake = null;
|
|
629
|
+
const notify = () => {
|
|
630
|
+
if (wake) {
|
|
631
|
+
const w = wake;
|
|
632
|
+
wake = null;
|
|
633
|
+
w();
|
|
634
|
+
}
|
|
635
|
+
};
|
|
636
|
+
void (async () => {
|
|
637
|
+
try {
|
|
638
|
+
const stream = tts.synthesize(text, signal);
|
|
639
|
+
for await (const chunk of stream) {
|
|
640
|
+
if (signal?.aborted) break;
|
|
641
|
+
buffer.push(chunk);
|
|
642
|
+
notify();
|
|
643
|
+
}
|
|
644
|
+
} catch (e) {
|
|
645
|
+
if (!(e instanceof Error && e.name === "AbortError")) error = e;
|
|
646
|
+
} finally {
|
|
647
|
+
done = true;
|
|
648
|
+
notify();
|
|
649
|
+
}
|
|
650
|
+
})();
|
|
651
|
+
return async function* () {
|
|
652
|
+
let index = 0;
|
|
653
|
+
while (true) {
|
|
654
|
+
if (signal?.aborted) return;
|
|
655
|
+
if (error) throw error;
|
|
656
|
+
if (index < buffer.length) {
|
|
657
|
+
yield buffer[index++];
|
|
658
|
+
continue;
|
|
659
|
+
}
|
|
660
|
+
if (done) return;
|
|
661
|
+
await new Promise((r) => {
|
|
662
|
+
wake = r;
|
|
663
|
+
});
|
|
664
|
+
}
|
|
665
|
+
};
|
|
666
|
+
}
|
|
624
667
|
var Pipeline = class extends EventEmitter {
|
|
625
668
|
stt;
|
|
626
669
|
llm;
|
|
@@ -635,6 +678,7 @@ var Pipeline = class extends EventEmitter {
|
|
|
635
678
|
nameVariants;
|
|
636
679
|
beforeRespond;
|
|
637
680
|
memory;
|
|
681
|
+
tools;
|
|
638
682
|
/** Strip provider-specific markup (e.g. SSML lang tags) for display. */
|
|
639
683
|
cleanText(text) {
|
|
640
684
|
return this.tts?.cleanText ? this.tts.cleanText(text) : text;
|
|
@@ -657,6 +701,7 @@ var Pipeline = class extends EventEmitter {
|
|
|
657
701
|
this.nameVariants = (options.nameVariants ?? []).map((n) => n.toLowerCase());
|
|
658
702
|
this.beforeRespond = options.beforeRespond;
|
|
659
703
|
this.memory = options.memory;
|
|
704
|
+
this.tools = options.tools;
|
|
660
705
|
this.context = new ContextManager({
|
|
661
706
|
instructions: options.instructions,
|
|
662
707
|
maxContextTokens: options.maxContextTokens
|
|
@@ -673,28 +718,18 @@ var Pipeline = class extends EventEmitter {
|
|
|
673
718
|
this.splitter.reset();
|
|
674
719
|
this.setAgentState("idle");
|
|
675
720
|
};
|
|
676
|
-
this.
|
|
721
|
+
this._ttsWarmupPromise = this.tts?.warmup ? this.tts.warmup().catch((err) => {
|
|
722
|
+
log7.warn("TTS warmup failed (non-fatal):", err);
|
|
723
|
+
}) : Promise.resolve();
|
|
724
|
+
this._llmWarmupPromise = this.llm.warmup ? this.llm.warmup(options.instructions).catch((err) => {
|
|
725
|
+
log7.warn("LLM warmup failed (non-fatal):", err);
|
|
726
|
+
}) : Promise.resolve();
|
|
727
|
+
this._warmupPromise = Promise.all([this._ttsWarmupPromise, this._llmWarmupPromise]).then(() => {
|
|
728
|
+
});
|
|
677
729
|
}
|
|
678
|
-
/** One-shot warmup — safe to call from constructor, resolves when both LLM and TTS are ready. */
|
|
679
730
|
_warmupPromise;
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
if (this.llm.warmup) {
|
|
683
|
-
tasks.push(
|
|
684
|
-
this.llm.warmup(instructions).catch((err) => {
|
|
685
|
-
log7.warn("LLM warmup failed:", err);
|
|
686
|
-
})
|
|
687
|
-
);
|
|
688
|
-
}
|
|
689
|
-
if (this.tts?.warmup) {
|
|
690
|
-
tasks.push(
|
|
691
|
-
this.tts.warmup().catch((err) => {
|
|
692
|
-
log7.warn("TTS warmup failed:", err);
|
|
693
|
-
})
|
|
694
|
-
);
|
|
695
|
-
}
|
|
696
|
-
await Promise.all(tasks);
|
|
697
|
-
}
|
|
731
|
+
_ttsWarmupPromise;
|
|
732
|
+
_llmWarmupPromise;
|
|
698
733
|
get processing() {
|
|
699
734
|
return this._processing;
|
|
700
735
|
}
|
|
@@ -870,7 +905,7 @@ var Pipeline = class extends EventEmitter {
|
|
|
870
905
|
segBuf.length = 0;
|
|
871
906
|
pushSentence(combined);
|
|
872
907
|
};
|
|
873
|
-
const llmStream = this.llm.chat(messages, signal);
|
|
908
|
+
const llmStream = this.llm.chat(messages, signal, { tools: this.tools });
|
|
874
909
|
try {
|
|
875
910
|
while (!signal.aborted) {
|
|
876
911
|
const { value: chunk, done } = await llmStream.next();
|
|
@@ -899,6 +934,9 @@ var Pipeline = class extends EventEmitter {
|
|
|
899
934
|
for (const sentence of sentences) {
|
|
900
935
|
pushSentence(sentence);
|
|
901
936
|
}
|
|
937
|
+
} else if (chunk.type === "tool_call" && chunk.toolCall) {
|
|
938
|
+
log7.info(`Tool call: ${chunk.toolCall.name}(${chunk.toolCall.arguments})`);
|
|
939
|
+
this.emit("toolCall", chunk.toolCall);
|
|
902
940
|
}
|
|
903
941
|
}
|
|
904
942
|
} finally {
|
|
@@ -921,29 +959,55 @@ var Pipeline = class extends EventEmitter {
|
|
|
921
959
|
};
|
|
922
960
|
const consumer = async () => {
|
|
923
961
|
this.audioOutput.beginResponse();
|
|
962
|
+
const state = { prefetched: null };
|
|
924
963
|
try {
|
|
925
964
|
while (true) {
|
|
926
965
|
if (signal.aborted) break;
|
|
927
|
-
|
|
928
|
-
|
|
966
|
+
let sentence;
|
|
967
|
+
let existingStream;
|
|
968
|
+
if (state.prefetched) {
|
|
969
|
+
sentence = state.prefetched.sentence;
|
|
970
|
+
existingStream = state.prefetched.streamFn();
|
|
971
|
+
state.prefetched = null;
|
|
972
|
+
} else if (sentenceQueue.length > 0) {
|
|
973
|
+
sentence = sentenceQueue.shift();
|
|
929
974
|
if (!/\w/.test(sentence)) {
|
|
930
975
|
log7.debug(`Skipping non-word sentence: "${sentence}"`);
|
|
931
976
|
continue;
|
|
932
977
|
}
|
|
978
|
+
existingStream = void 0;
|
|
979
|
+
} else if (producerDone) {
|
|
980
|
+
break;
|
|
981
|
+
} else {
|
|
982
|
+
await new Promise((resolve) => {
|
|
983
|
+
wakeConsumer = resolve;
|
|
984
|
+
});
|
|
985
|
+
wakeConsumer = null;
|
|
986
|
+
continue;
|
|
987
|
+
}
|
|
988
|
+
const tryPrefetch = () => {
|
|
989
|
+
if (state.prefetched || !this.tts) return;
|
|
990
|
+
if (sentenceQueue.length > 0) {
|
|
991
|
+
const next = sentenceQueue.shift();
|
|
992
|
+
if (/\w/.test(next)) {
|
|
993
|
+
state.prefetched = { sentence: next, streamFn: prefetchTTS(this.tts, next, signal) };
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
};
|
|
997
|
+
tryPrefetch();
|
|
998
|
+
try {
|
|
933
999
|
await this.synthesizeAndPlay(sentence, signal, (t) => {
|
|
934
1000
|
if (!tFirstAudioPlayed) {
|
|
935
1001
|
tFirstAudioPlayed = t;
|
|
936
1002
|
this.setAgentState("speaking");
|
|
937
1003
|
}
|
|
938
1004
|
this.emit("sentence", this.cleanText(sentence), sentence);
|
|
939
|
-
|
|
940
|
-
|
|
1005
|
+
tryPrefetch();
|
|
1006
|
+
}, existingStream);
|
|
1007
|
+
} catch (ttsErr) {
|
|
1008
|
+
if (ttsErr instanceof Error && ttsErr.name === "AbortError") throw ttsErr;
|
|
1009
|
+
log7.warn(`TTS error for sentence (skipping): "${sentence.slice(0, 40)}"`, ttsErr);
|
|
941
1010
|
}
|
|
942
|
-
if (producerDone) break;
|
|
943
|
-
await new Promise((resolve) => {
|
|
944
|
-
wakeConsumer = resolve;
|
|
945
|
-
});
|
|
946
|
-
wakeConsumer = null;
|
|
947
1011
|
}
|
|
948
1012
|
} finally {
|
|
949
1013
|
if (!signal.aborted) {
|
|
@@ -996,7 +1060,7 @@ var Pipeline = class extends EventEmitter {
|
|
|
996
1060
|
return;
|
|
997
1061
|
}
|
|
998
1062
|
this._processing = true;
|
|
999
|
-
await this.
|
|
1063
|
+
await this._ttsWarmupPromise;
|
|
1000
1064
|
log7.info(`say(): "${text.slice(0, 60)}"`);
|
|
1001
1065
|
try {
|
|
1002
1066
|
const signal = this.bargeIn.startCycle();
|
|
@@ -1033,7 +1097,7 @@ var Pipeline = class extends EventEmitter {
|
|
|
1033
1097
|
}
|
|
1034
1098
|
}
|
|
1035
1099
|
}
|
|
1036
|
-
async synthesizeAndPlay(text, signal, onFirstAudio) {
|
|
1100
|
+
async synthesizeAndPlay(text, signal, onFirstAudio, existingStream) {
|
|
1037
1101
|
if (!this.tts || signal.aborted) {
|
|
1038
1102
|
log7.info(`[Agent says]: ${text}`);
|
|
1039
1103
|
return;
|
|
@@ -1042,7 +1106,7 @@ var Pipeline = class extends EventEmitter {
|
|
|
1042
1106
|
const ttsStart = performance.now();
|
|
1043
1107
|
let firstChunk = true;
|
|
1044
1108
|
let ttsChunkCount = 0;
|
|
1045
|
-
const ttsStream = this.tts.synthesize(text, signal);
|
|
1109
|
+
const ttsStream = existingStream ?? this.tts.synthesize(text, signal);
|
|
1046
1110
|
const measuredStream = async function* () {
|
|
1047
1111
|
for await (const chunk of ttsStream) {
|
|
1048
1112
|
ttsChunkCount++;
|
|
@@ -1150,12 +1214,14 @@ var VoiceAgent = class extends EventEmitter2 {
|
|
|
1150
1214
|
agentName: this.config.agentName,
|
|
1151
1215
|
nameVariants: this.config.nameVariants,
|
|
1152
1216
|
memory: this.memory ?? void 0,
|
|
1153
|
-
maxContextTokens: this.config.maxContextTokens
|
|
1217
|
+
maxContextTokens: this.config.maxContextTokens,
|
|
1218
|
+
tools: this.config.tools
|
|
1154
1219
|
});
|
|
1155
1220
|
this.pipeline.on("transcription", (result) => this.emit("transcription", result));
|
|
1156
1221
|
this.pipeline.on("sentence", (text, raw) => this.emit("sentence", text, raw));
|
|
1157
1222
|
this.pipeline.on("response", (text) => this.emit("response", text));
|
|
1158
1223
|
this.pipeline.on("agentState", (state) => this.emit("agentState", state));
|
|
1224
|
+
this.pipeline.on("toolCall", (tc) => this.emit("toolCall", tc));
|
|
1159
1225
|
this.pipeline.on("error", (error) => this.emit("error", error));
|
|
1160
1226
|
for (const participant of this.connection.room.remoteParticipants.values()) {
|
|
1161
1227
|
for (const [, pub] of participant.trackPublications) {
|