open-agents-ai 0.187.255 → 0.187.257
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +297 -93
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -327046,32 +327046,52 @@ __export(voicechat_exports, {
|
|
|
327046
327046
|
VoiceChatSession: () => VoiceChatSession
|
|
327047
327047
|
});
|
|
327048
327048
|
import { EventEmitter as EventEmitter10 } from "node:events";
|
|
327049
|
-
var VoiceChatSession;
|
|
327049
|
+
var VAD_SILENCE_MS, MAX_SEGMENT_MS, SUMMARY_INJECTION_INTERVAL, MAX_CONTEXT_TURNS, SYSTEM_PROMPT2, VoiceChatSession;
|
|
327050
327050
|
var init_voicechat = __esm({
|
|
327051
327051
|
"packages/cli/src/tui/voicechat.ts"() {
|
|
327052
327052
|
"use strict";
|
|
327053
|
+
VAD_SILENCE_MS = 1100;
|
|
327054
|
+
MAX_SEGMENT_MS = 6500;
|
|
327055
|
+
SUMMARY_INJECTION_INTERVAL = 4;
|
|
327056
|
+
MAX_CONTEXT_TURNS = 20;
|
|
327057
|
+
SYSTEM_PROMPT2 = `You are a voice assistant having a live spoken conversation. Keep responses extremely brief — 1-2 sentences max. You're speaking aloud, not writing. Be conversational, direct, and helpful. Don't use markdown, bullet points, or formatting — just natural speech. If you don't know something, say so briefly. Do not over-think — respond quickly and concisely.`;
|
|
327053
327058
|
VoiceChatSession = class extends EventEmitter10 {
|
|
327054
327059
|
voice;
|
|
327055
327060
|
listen;
|
|
327061
|
+
backendUrl;
|
|
327062
|
+
model;
|
|
327063
|
+
apiKey;
|
|
327056
327064
|
runner;
|
|
327065
|
+
// State machine
|
|
327066
|
+
_state = "IDLE";
|
|
327057
327067
|
active = false;
|
|
327058
|
-
|
|
327068
|
+
// Conversation context — own turns, separate from main agent
|
|
327069
|
+
context = [];
|
|
327070
|
+
turnCount = 0;
|
|
327071
|
+
// VAD segment capture
|
|
327072
|
+
captureBuffer = "";
|
|
327073
|
+
captureStartTime = 0;
|
|
327074
|
+
silenceTimer = null;
|
|
327075
|
+
maxSegmentTimer = null;
|
|
327076
|
+
// Abort control for inference
|
|
327077
|
+
abortController = null;
|
|
327078
|
+
// Callbacks
|
|
327059
327079
|
onStatus;
|
|
327060
327080
|
onUserSpeech;
|
|
327061
327081
|
onPartialTranscript;
|
|
327062
327082
|
onAgentSpeech;
|
|
327063
|
-
|
|
327064
|
-
|
|
327065
|
-
|
|
327066
|
-
|
|
327067
|
-
isSpeaking = false;
|
|
327068
|
-
lastSpokenText = "";
|
|
327083
|
+
onStateChange;
|
|
327084
|
+
// Bound handlers for cleanup
|
|
327085
|
+
_onTranscript = null;
|
|
327086
|
+
_onError = null;
|
|
327069
327087
|
constructor(opts) {
|
|
327070
327088
|
super();
|
|
327071
327089
|
this.voice = opts.voice;
|
|
327072
327090
|
this.listen = opts.listen;
|
|
327073
|
-
this.
|
|
327074
|
-
this.
|
|
327091
|
+
this.backendUrl = opts.backendUrl.replace(/\/+$/, "");
|
|
327092
|
+
this.model = opts.model;
|
|
327093
|
+
this.apiKey = opts.apiKey ?? "";
|
|
327094
|
+
this.runner = opts.runner ?? null;
|
|
327075
327095
|
this.onStatus = opts.onStatus ?? (() => {
|
|
327076
327096
|
});
|
|
327077
327097
|
this.onUserSpeech = opts.onUserSpeech ?? (() => {
|
|
@@ -327080,11 +327100,28 @@ var init_voicechat = __esm({
|
|
|
327080
327100
|
});
|
|
327081
327101
|
this.onAgentSpeech = opts.onAgentSpeech ?? (() => {
|
|
327082
327102
|
});
|
|
327103
|
+
this.onStateChange = opts.onStateChange ?? (() => {
|
|
327104
|
+
});
|
|
327105
|
+
}
|
|
327106
|
+
get state() {
|
|
327107
|
+
return this._state;
|
|
327083
327108
|
}
|
|
327084
327109
|
get isActive() {
|
|
327085
327110
|
return this.active;
|
|
327086
327111
|
}
|
|
327087
|
-
|
|
327112
|
+
// ---------------------------------------------------------------------------
|
|
327113
|
+
// State transitions
|
|
327114
|
+
// ---------------------------------------------------------------------------
|
|
327115
|
+
setState(next) {
|
|
327116
|
+
if (this._state === next) return;
|
|
327117
|
+
const prev = this._state;
|
|
327118
|
+
this._state = next;
|
|
327119
|
+
this.onStateChange(next);
|
|
327120
|
+
this.emit("stateChange", { from: prev, to: next });
|
|
327121
|
+
}
|
|
327122
|
+
// ---------------------------------------------------------------------------
|
|
327123
|
+
// Start / Stop
|
|
327124
|
+
// ---------------------------------------------------------------------------
|
|
327088
327125
|
async start() {
|
|
327089
327126
|
if (this.active) return;
|
|
327090
327127
|
if (!this.voice.enabled || !this.voice.ready) {
|
|
@@ -327092,98 +327129,278 @@ var init_voicechat = __esm({
|
|
|
327092
327129
|
await this.voice.toggle();
|
|
327093
327130
|
}
|
|
327094
327131
|
this.active = true;
|
|
327095
|
-
this.
|
|
327096
|
-
this.
|
|
327097
|
-
|
|
327098
|
-
|
|
327099
|
-
|
|
327100
|
-
|
|
327101
|
-
if (
|
|
327102
|
-
|
|
327103
|
-
|
|
327132
|
+
this.context = [{ role: "system", content: SYSTEM_PROMPT2 }];
|
|
327133
|
+
this.turnCount = 0;
|
|
327134
|
+
this.onStatus("VoiceChat v2 active — state machine: LISTENING");
|
|
327135
|
+
this._onTranscript = (...args) => {
|
|
327136
|
+
let text;
|
|
327137
|
+
let isFinal;
|
|
327138
|
+
if (typeof args[0] === "object" && args[0] !== null) {
|
|
327139
|
+
const evt = args[0];
|
|
327140
|
+
text = evt.text ?? "";
|
|
327141
|
+
isFinal = evt.isFinal ?? false;
|
|
327104
327142
|
} else {
|
|
327105
|
-
|
|
327106
|
-
|
|
327107
|
-
}, this.silenceTimeout * 1e3);
|
|
327143
|
+
text = String(args[0] ?? "");
|
|
327144
|
+
isFinal = Boolean(args[1]);
|
|
327108
327145
|
}
|
|
327109
|
-
|
|
327110
|
-
|
|
327111
|
-
|
|
327112
|
-
|
|
327113
|
-
const text = event.content.trim();
|
|
327114
|
-
if (!text || text.length < 3) return;
|
|
327115
|
-
if (text === this.lastSpokenText) return;
|
|
327116
|
-
this.lastSpokenText = text;
|
|
327117
|
-
this.onAgentSpeech(text);
|
|
327118
|
-
this.queueSpeak(text);
|
|
327119
|
-
}
|
|
327120
|
-
});
|
|
327121
|
-
this.listen.on("error", (err) => {
|
|
327146
|
+
if (!text.trim()) return;
|
|
327147
|
+
this.handleTranscript(text.trim(), isFinal);
|
|
327148
|
+
};
|
|
327149
|
+
this._onError = (err) => {
|
|
327122
327150
|
const msg = err instanceof Error ? err.message : String(err);
|
|
327123
|
-
this.onStatus(`ASR error (
|
|
327124
|
-
}
|
|
327151
|
+
this.onStatus(`ASR error (voicechat continues without mic): ${msg.slice(0, 80)}`);
|
|
327152
|
+
};
|
|
327153
|
+
this.listen.on("transcript", this._onTranscript);
|
|
327154
|
+
this.listen.on("error", this._onError);
|
|
327125
327155
|
try {
|
|
327126
327156
|
await this.listen.start();
|
|
327127
|
-
this.
|
|
327157
|
+
this.setState("LISTENING");
|
|
327158
|
+
this.onStatus("Mic active — LISTENING for speech...");
|
|
327128
327159
|
} catch (err) {
|
|
327129
|
-
this.onStatus(
|
|
327160
|
+
this.onStatus(
|
|
327161
|
+
`Mic failed: ${err instanceof Error ? err.message : String(err)}. VoiceChat active without mic.`
|
|
327162
|
+
);
|
|
327163
|
+
this.setState("LISTENING");
|
|
327130
327164
|
}
|
|
327131
327165
|
}
|
|
327132
|
-
/** Stop the voice chat session */
|
|
327133
327166
|
async stop() {
|
|
327134
327167
|
if (!this.active) return;
|
|
327135
327168
|
this.active = false;
|
|
327169
|
+
if (this.abortController) {
|
|
327170
|
+
this.abortController.abort();
|
|
327171
|
+
this.abortController = null;
|
|
327172
|
+
}
|
|
327136
327173
|
if (this.silenceTimer) {
|
|
327137
327174
|
clearTimeout(this.silenceTimer);
|
|
327138
327175
|
this.silenceTimer = null;
|
|
327139
327176
|
}
|
|
327140
|
-
if (this.
|
|
327141
|
-
this.
|
|
327177
|
+
if (this.maxSegmentTimer) {
|
|
327178
|
+
clearTimeout(this.maxSegmentTimer);
|
|
327179
|
+
this.maxSegmentTimer = null;
|
|
327180
|
+
}
|
|
327181
|
+
if (this.captureBuffer.trim() && (this._state === "CAPTURING" || this._state === "TRANSCRIBING")) {
|
|
327182
|
+
this.finalizeSegment();
|
|
327183
|
+
}
|
|
327184
|
+
if (this._onTranscript) {
|
|
327185
|
+
this.listen.removeAllListeners("transcript");
|
|
327186
|
+
this._onTranscript = null;
|
|
327187
|
+
}
|
|
327188
|
+
if (this._onError) {
|
|
327189
|
+
this.listen.removeAllListeners("error");
|
|
327190
|
+
this._onError = null;
|
|
327142
327191
|
}
|
|
327143
327192
|
try {
|
|
327144
327193
|
await this.listen.stop();
|
|
327145
327194
|
} catch {
|
|
327146
327195
|
}
|
|
327147
|
-
this.
|
|
327148
|
-
this.
|
|
327149
|
-
this.onStatus("Voice chat ended");
|
|
327196
|
+
this.setState("IDLE");
|
|
327197
|
+
this.onStatus("VoiceChat ended");
|
|
327150
327198
|
this.emit("stopped");
|
|
327151
327199
|
}
|
|
327152
|
-
|
|
327153
|
-
|
|
327154
|
-
|
|
327155
|
-
|
|
327156
|
-
this.
|
|
327200
|
+
// ---------------------------------------------------------------------------
|
|
327201
|
+
// Transcript handling — VAD-style segment capture (Voryn pattern)
|
|
327202
|
+
// ---------------------------------------------------------------------------
|
|
327203
|
+
handleTranscript(text, isFinal) {
|
|
327204
|
+
if (!this.active) return;
|
|
327205
|
+
if (this._state !== "LISTENING" && this._state !== "CAPTURING") {
|
|
327206
|
+
return;
|
|
327207
|
+
}
|
|
327208
|
+
if (this._state === "LISTENING") {
|
|
327209
|
+
this.setState("CAPTURING");
|
|
327210
|
+
this.captureBuffer = "";
|
|
327211
|
+
this.captureStartTime = Date.now();
|
|
327212
|
+
this.maxSegmentTimer = setTimeout(() => {
|
|
327213
|
+
if (this._state === "CAPTURING") {
|
|
327214
|
+
this.finalizeSegment();
|
|
327215
|
+
}
|
|
327216
|
+
}, MAX_SEGMENT_MS);
|
|
327217
|
+
}
|
|
327218
|
+
this.captureBuffer = text;
|
|
327219
|
+
this.onPartialTranscript(text);
|
|
327220
|
+
if (this.silenceTimer) clearTimeout(this.silenceTimer);
|
|
327221
|
+
if (isFinal) {
|
|
327222
|
+
this.finalizeSegment();
|
|
327223
|
+
} else {
|
|
327224
|
+
this.silenceTimer = setTimeout(() => {
|
|
327225
|
+
if (this._state === "CAPTURING") {
|
|
327226
|
+
this.finalizeSegment();
|
|
327227
|
+
}
|
|
327228
|
+
}, VAD_SILENCE_MS);
|
|
327229
|
+
}
|
|
327230
|
+
}
|
|
327231
|
+
// ---------------------------------------------------------------------------
|
|
327232
|
+
// Segment finalization → Transcribing → Thinking → Speaking
|
|
327233
|
+
// ---------------------------------------------------------------------------
|
|
327234
|
+
finalizeSegment() {
|
|
327235
|
+
const text = this.captureBuffer.trim();
|
|
327157
327236
|
if (this.silenceTimer) {
|
|
327158
327237
|
clearTimeout(this.silenceTimer);
|
|
327159
327238
|
this.silenceTimer = null;
|
|
327160
327239
|
}
|
|
327240
|
+
if (this.maxSegmentTimer) {
|
|
327241
|
+
clearTimeout(this.maxSegmentTimer);
|
|
327242
|
+
this.maxSegmentTimer = null;
|
|
327243
|
+
}
|
|
327244
|
+
this.captureBuffer = "";
|
|
327245
|
+
if (!text) {
|
|
327246
|
+
this.setState("LISTENING");
|
|
327247
|
+
return;
|
|
327248
|
+
}
|
|
327249
|
+
this.setState("TRANSCRIBING");
|
|
327161
327250
|
this.onUserSpeech(text);
|
|
327162
|
-
this.
|
|
327163
|
-
|
|
327164
|
-
|
|
327165
|
-
|
|
327251
|
+
this.context.push({ role: "user", content: text });
|
|
327252
|
+
this.turnCount++;
|
|
327253
|
+
while (this.context.length > MAX_CONTEXT_TURNS + 1) {
|
|
327254
|
+
this.context.splice(1, 1);
|
|
327255
|
+
}
|
|
327256
|
+
this.think();
|
|
327166
327257
|
}
|
|
327167
|
-
|
|
327168
|
-
|
|
327169
|
-
|
|
327170
|
-
|
|
327171
|
-
|
|
327258
|
+
// ---------------------------------------------------------------------------
|
|
327259
|
+
// Direct Ollama inference (not through main agent runner)
|
|
327260
|
+
// ---------------------------------------------------------------------------
|
|
327261
|
+
async think() {
|
|
327262
|
+
if (!this.active) return;
|
|
327263
|
+
this.setState("THINKING");
|
|
327264
|
+
this.onStatus("Thinking...");
|
|
327265
|
+
this.abortController = new AbortController();
|
|
327266
|
+
try {
|
|
327267
|
+
const response = await this.streamOllamaInference(this.abortController.signal);
|
|
327268
|
+
if (!this.active) return;
|
|
327269
|
+
if (response.trim()) {
|
|
327270
|
+
this.context.push({ role: "assistant", content: response.trim() });
|
|
327271
|
+
this.setState("SPEAKING");
|
|
327272
|
+
this.onAgentSpeech(response.trim());
|
|
327273
|
+
this.voice.speak(response.trim());
|
|
327274
|
+
if (this.runner && this.turnCount % SUMMARY_INJECTION_INTERVAL === 0) {
|
|
327275
|
+
this.injectSummary();
|
|
327276
|
+
}
|
|
327277
|
+
const estimatedMs = Math.max(1500, response.length / 5 * (6e4 / 150));
|
|
327278
|
+
await new Promise((r2) => setTimeout(r2, estimatedMs));
|
|
327279
|
+
}
|
|
327280
|
+
} catch (err) {
|
|
327281
|
+
if (!this.active) return;
|
|
327282
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
327283
|
+
if (!msg.includes("abort")) {
|
|
327284
|
+
this.onStatus(`Inference error: ${msg.slice(0, 100)}`);
|
|
327285
|
+
}
|
|
327286
|
+
} finally {
|
|
327287
|
+
this.abortController = null;
|
|
327288
|
+
}
|
|
327289
|
+
if (this.active) {
|
|
327290
|
+
this.setState("LISTENING");
|
|
327291
|
+
this.onStatus("LISTENING...");
|
|
327172
327292
|
}
|
|
327173
327293
|
}
|
|
327174
|
-
/**
|
|
327175
|
-
|
|
327176
|
-
|
|
327177
|
-
|
|
327178
|
-
|
|
327179
|
-
|
|
327180
|
-
|
|
327181
|
-
|
|
327182
|
-
|
|
327183
|
-
|
|
327294
|
+
/**
|
|
327295
|
+
* Stream inference. Tries native Ollama /api/chat first (supports think:false
|
|
327296
|
+
* for reasoning models), falls back to OpenAI-compat /v1/chat/completions.
|
|
327297
|
+
*/
|
|
327298
|
+
async streamOllamaInference(signal) {
|
|
327299
|
+
const baseUrl = this.backendUrl.replace(/\/v1\/?$/, "");
|
|
327300
|
+
const headers = { "Content-Type": "application/json" };
|
|
327301
|
+
if (this.apiKey) headers["Authorization"] = `Bearer ${this.apiKey}`;
|
|
327302
|
+
try {
|
|
327303
|
+
const nativeBody = JSON.stringify({
|
|
327304
|
+
model: this.model,
|
|
327305
|
+
messages: this.context,
|
|
327306
|
+
stream: true,
|
|
327307
|
+
think: false,
|
|
327308
|
+
// Disable reasoning — voice chat needs fast, direct responses
|
|
327309
|
+
options: { temperature: 0.7, num_predict: 256 }
|
|
327310
|
+
});
|
|
327311
|
+
const res2 = await fetch(`${baseUrl}/api/chat`, {
|
|
327312
|
+
method: "POST",
|
|
327313
|
+
headers,
|
|
327314
|
+
body: nativeBody,
|
|
327315
|
+
signal
|
|
327316
|
+
});
|
|
327317
|
+
if (res2.ok) {
|
|
327318
|
+
return await this.parseOllamaNativeStream(res2, signal);
|
|
327184
327319
|
}
|
|
327320
|
+
} catch (err) {
|
|
327321
|
+
const msg = err instanceof Error ? err.message : "";
|
|
327322
|
+
if (msg.includes("abort")) throw err;
|
|
327185
327323
|
}
|
|
327186
|
-
|
|
327324
|
+
const openaiBody = JSON.stringify({
|
|
327325
|
+
model: this.model,
|
|
327326
|
+
messages: this.context,
|
|
327327
|
+
stream: true,
|
|
327328
|
+
temperature: 0.7,
|
|
327329
|
+
max_tokens: 1024
|
|
327330
|
+
});
|
|
327331
|
+
const endpoint = baseUrl.includes("/v1") ? `${baseUrl}/chat/completions` : `${baseUrl}/v1/chat/completions`;
|
|
327332
|
+
const res = await fetch(endpoint, { method: "POST", headers, body: openaiBody, signal });
|
|
327333
|
+
if (!res.ok) {
|
|
327334
|
+
const errText = await res.text().catch(() => "unknown");
|
|
327335
|
+
throw new Error(`Inference ${res.status}: ${errText.slice(0, 200)}`);
|
|
327336
|
+
}
|
|
327337
|
+
return await this.parseOpenAIStream(res);
|
|
327338
|
+
}
|
|
327339
|
+
/** Parse native Ollama /api/chat streaming response (NDJSON, not SSE) */
|
|
327340
|
+
async parseOllamaNativeStream(res, _signal) {
|
|
327341
|
+
const reader = res.body?.getReader();
|
|
327342
|
+
if (!reader) throw new Error("No response body");
|
|
327343
|
+
const decoder = new TextDecoder();
|
|
327344
|
+
let fullText = "";
|
|
327345
|
+
let buffer2 = "";
|
|
327346
|
+
while (true) {
|
|
327347
|
+
const { done, value: value2 } = await reader.read();
|
|
327348
|
+
if (done) break;
|
|
327349
|
+
buffer2 += decoder.decode(value2, { stream: true });
|
|
327350
|
+
const lines = buffer2.split("\n");
|
|
327351
|
+
buffer2 = lines.pop() ?? "";
|
|
327352
|
+
for (const line of lines) {
|
|
327353
|
+
if (!line.trim()) continue;
|
|
327354
|
+
try {
|
|
327355
|
+
const parsed = JSON.parse(line);
|
|
327356
|
+
const content = parsed.message?.content;
|
|
327357
|
+
if (content) fullText += content;
|
|
327358
|
+
if (parsed.done) return fullText;
|
|
327359
|
+
} catch {
|
|
327360
|
+
}
|
|
327361
|
+
}
|
|
327362
|
+
}
|
|
327363
|
+
return fullText;
|
|
327364
|
+
}
|
|
327365
|
+
/** Parse OpenAI-compat SSE streaming response */
|
|
327366
|
+
async parseOpenAIStream(res) {
|
|
327367
|
+
const reader = res.body?.getReader();
|
|
327368
|
+
if (!reader) throw new Error("No response body");
|
|
327369
|
+
const decoder = new TextDecoder();
|
|
327370
|
+
let fullText = "";
|
|
327371
|
+
let buffer2 = "";
|
|
327372
|
+
while (true) {
|
|
327373
|
+
const { done, value: value2 } = await reader.read();
|
|
327374
|
+
if (done) break;
|
|
327375
|
+
buffer2 += decoder.decode(value2, { stream: true });
|
|
327376
|
+
const lines = buffer2.split("\n");
|
|
327377
|
+
buffer2 = lines.pop() ?? "";
|
|
327378
|
+
for (const line of lines) {
|
|
327379
|
+
const trimmed = line.trim();
|
|
327380
|
+
if (!trimmed || !trimmed.startsWith("data: ")) continue;
|
|
327381
|
+
const data = trimmed.slice(6);
|
|
327382
|
+
if (data === "[DONE]") continue;
|
|
327383
|
+
try {
|
|
327384
|
+
const parsed = JSON.parse(data);
|
|
327385
|
+
const delta = parsed.choices?.[0]?.delta?.content;
|
|
327386
|
+
if (delta) fullText += delta;
|
|
327387
|
+
} catch {
|
|
327388
|
+
}
|
|
327389
|
+
}
|
|
327390
|
+
}
|
|
327391
|
+
return fullText;
|
|
327392
|
+
}
|
|
327393
|
+
// ---------------------------------------------------------------------------
|
|
327394
|
+
// Summary injection to main agent
|
|
327395
|
+
// ---------------------------------------------------------------------------
|
|
327396
|
+
injectSummary() {
|
|
327397
|
+
if (!this.runner) return;
|
|
327398
|
+
const recentTurns = this.context.filter((t2) => t2.role !== "system").slice(-6).map((t2) => `${t2.role === "user" ? "User" : "Assistant"}: ${t2.content}`).join("\n");
|
|
327399
|
+
this.runner.injectUserMessage(
|
|
327400
|
+
`[VOICECHAT SUMMARY] The following is a summary of the recent voice conversation happening in parallel. You don't need to respond to this directly — it's for your awareness. Continue your current task.
|
|
327401
|
+
|
|
327402
|
+
${recentTurns}`
|
|
327403
|
+
);
|
|
327187
327404
|
}
|
|
327188
327405
|
};
|
|
327189
327406
|
}
|
|
@@ -331399,7 +331616,7 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
|
|
|
331399
331616
|
getCallUrl() {
|
|
331400
331617
|
return voiceSession?.tunnelUrl ?? null;
|
|
331401
331618
|
},
|
|
331402
|
-
// --- /voicechat:
|
|
331619
|
+
// --- /voicechat: Voryn-style state machine voice conversation ---
|
|
331403
331620
|
async voiceChatStart() {
|
|
331404
331621
|
if (_voiceChatSession?.isActive) return;
|
|
331405
331622
|
if (!voiceEngine.enabled || !voiceEngine.ready) {
|
|
@@ -331411,36 +331628,20 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
|
|
|
331411
331628
|
const { VoiceChatSession: VoiceChatSession2 } = await Promise.resolve().then(() => (init_voicechat(), voicechat_exports));
|
|
331412
331629
|
const { ListenEngine: ListenEngine2 } = await Promise.resolve().then(() => (init_listen(), listen_exports));
|
|
331413
331630
|
const listenEng = new ListenEngine2();
|
|
331414
|
-
const
|
|
331631
|
+
const summaryRunner = {
|
|
331415
331632
|
injectUserMessage(content) {
|
|
331416
331633
|
if (activeTask?.runner) {
|
|
331417
331634
|
activeTask.runner.injectUserMessage(content);
|
|
331418
|
-
} else {
|
|
331419
|
-
const match = content.match(/:\s*"([^"]+)"/);
|
|
331420
|
-
const rawText = match ? match[1] : content;
|
|
331421
|
-
if (rl && rawText.trim()) {
|
|
331422
|
-
rl.setLine(rawText.trim());
|
|
331423
|
-
rl.emit("line", rawText.trim());
|
|
331424
|
-
}
|
|
331425
|
-
}
|
|
331426
|
-
},
|
|
331427
|
-
onEvent(handler) {
|
|
331428
|
-
const checkInterval = setInterval(() => {
|
|
331429
|
-
if (activeTask?.runner) {
|
|
331430
|
-
activeTask.runner.onEvent(handler);
|
|
331431
|
-
clearInterval(checkInterval);
|
|
331432
|
-
}
|
|
331433
|
-
}, 500);
|
|
331434
|
-
if (activeTask?.runner) {
|
|
331435
|
-
activeTask.runner.onEvent(handler);
|
|
331436
|
-
clearInterval(checkInterval);
|
|
331437
331635
|
}
|
|
331438
331636
|
}
|
|
331439
331637
|
};
|
|
331440
331638
|
_voiceChatSession = new VoiceChatSession2({
|
|
331441
331639
|
voice: voiceEngine,
|
|
331442
331640
|
listen: listenEng,
|
|
331443
|
-
|
|
331641
|
+
backendUrl: currentConfig.backendUrl,
|
|
331642
|
+
model: currentConfig.model,
|
|
331643
|
+
apiKey: currentConfig.apiKey,
|
|
331644
|
+
runner: summaryRunner,
|
|
331444
331645
|
onStatus(msg) {
|
|
331445
331646
|
writeContent(() => renderInfo(`[voicechat] ${msg}`));
|
|
331446
331647
|
},
|
|
@@ -331454,6 +331655,9 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
|
|
|
331454
331655
|
},
|
|
331455
331656
|
onAgentSpeech(text) {
|
|
331456
331657
|
writeContent(() => renderInfo(`\x1B[38;5;178m[agent]\x1B[0m ${text.slice(0, 120)}`));
|
|
331658
|
+
},
|
|
331659
|
+
onStateChange(state) {
|
|
331660
|
+
writeContent(() => renderInfo(`\x1B[38;5;243m[voicechat] ${state}\x1B[0m`));
|
|
331457
331661
|
}
|
|
331458
331662
|
});
|
|
331459
331663
|
await _voiceChatSession.start();
|
package/package.json
CHANGED