agent-relay-server 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/public/index.html +226 -17
- package/src/routes.ts +1 -1
package/package.json
CHANGED
package/public/index.html
CHANGED
|
@@ -10300,6 +10300,21 @@ async function apiPostAudio(path, blob) {
|
|
|
10300
10300
|
}
|
|
10301
10301
|
return response.json();
|
|
10302
10302
|
}
|
|
10303
|
+
/** POST JSON and get an audio (or other) Blob back — used for server-side TTS. */
|
|
10304
|
+
async function apiPostJsonForBlob(path, body) {
|
|
10305
|
+
const headers = { "Content-Type": "application/json" };
|
|
10306
|
+
if (authToken) headers["X-Agent-Relay-Token"] = authToken;
|
|
10307
|
+
const response = await fetch(new URL("api" + path, baseUrl()), {
|
|
10308
|
+
method: "POST",
|
|
10309
|
+
headers,
|
|
10310
|
+
body: JSON.stringify(body)
|
|
10311
|
+
});
|
|
10312
|
+
if (!response.ok) {
|
|
10313
|
+
if (response.status === 401) throw makeError(401, "Authentication required");
|
|
10314
|
+
throw makeError(response.status, await responseErrorMessage(response));
|
|
10315
|
+
}
|
|
10316
|
+
return response.blob();
|
|
10317
|
+
}
|
|
10303
10318
|
async function apiBlob(path) {
|
|
10304
10319
|
const opts = {
|
|
10305
10320
|
method: "GET",
|
|
@@ -10377,13 +10392,18 @@ function chunkForSpeech(text) {
|
|
|
10377
10392
|
var synthAvailable = typeof window !== "undefined" && "speechSynthesis" in window;
|
|
10378
10393
|
var VoiceTts = class {
|
|
10379
10394
|
enabled = false;
|
|
10395
|
+
lang = "en-US";
|
|
10396
|
+
mode = "kokoro";
|
|
10397
|
+
kokoroVoice = "am_michael";
|
|
10380
10398
|
active = null;
|
|
10381
10399
|
queue = [];
|
|
10382
10400
|
currentChat = null;
|
|
10383
10401
|
speaking = false;
|
|
10384
10402
|
gen = 0;
|
|
10403
|
+
audioEl = null;
|
|
10404
|
+
audioUrl = null;
|
|
10385
10405
|
get available() {
|
|
10386
|
-
return synthAvailable;
|
|
10406
|
+
return synthAvailable || typeof Audio !== "undefined";
|
|
10387
10407
|
}
|
|
10388
10408
|
isEnabled() {
|
|
10389
10409
|
return this.enabled;
|
|
@@ -10393,6 +10413,18 @@ var VoiceTts = class {
|
|
|
10393
10413
|
this.enabled = on;
|
|
10394
10414
|
if (!on) this.reset();
|
|
10395
10415
|
}
|
|
10416
|
+
/** Set the spoken-voice language (BCP-47, e.g. "en-US"). Empty = browser locale. */
|
|
10417
|
+
setLang(lang) {
|
|
10418
|
+
this.lang = lang;
|
|
10419
|
+
}
|
|
10420
|
+
setMode(mode) {
|
|
10421
|
+
if (mode === this.mode) return;
|
|
10422
|
+
this.mode = mode;
|
|
10423
|
+
this.reset();
|
|
10424
|
+
}
|
|
10425
|
+
setKokoroVoice(voice) {
|
|
10426
|
+
this.kokoroVoice = voice;
|
|
10427
|
+
}
|
|
10396
10428
|
setActiveChat(chatId) {
|
|
10397
10429
|
if (chatId === this.active) return;
|
|
10398
10430
|
this.active = chatId;
|
|
@@ -10400,7 +10432,7 @@ var VoiceTts = class {
|
|
|
10400
10432
|
}
|
|
10401
10433
|
/** A captured agent response turn arrived for `chatId`. */
|
|
10402
10434
|
onResponse(chatId, rawText) {
|
|
10403
|
-
if (!this.enabled || !
|
|
10435
|
+
if (!this.enabled || !this.available || !chatId || chatId !== this.active) return;
|
|
10404
10436
|
const text = speechify(rawText);
|
|
10405
10437
|
if (!text) return;
|
|
10406
10438
|
if (this.speaking && this.currentChat && this.currentChat !== chatId) {
|
|
@@ -10428,6 +10460,19 @@ var VoiceTts = class {
|
|
|
10428
10460
|
try {
|
|
10429
10461
|
window.speechSynthesis.cancel();
|
|
10430
10462
|
} catch {}
|
|
10463
|
+
this.stopAudio();
|
|
10464
|
+
}
|
|
10465
|
+
stopAudio() {
|
|
10466
|
+
if (this.audioEl) try {
|
|
10467
|
+
this.audioEl.pause();
|
|
10468
|
+
this.audioEl.src = "";
|
|
10469
|
+
} catch {}
|
|
10470
|
+
if (this.audioUrl) {
|
|
10471
|
+
try {
|
|
10472
|
+
URL.revokeObjectURL(this.audioUrl);
|
|
10473
|
+
} catch {}
|
|
10474
|
+
this.audioUrl = null;
|
|
10475
|
+
}
|
|
10431
10476
|
}
|
|
10432
10477
|
pump() {
|
|
10433
10478
|
if (this.speaking) return;
|
|
@@ -10437,24 +10482,78 @@ var VoiceTts = class {
|
|
|
10437
10482
|
this.currentChat = item.chatId;
|
|
10438
10483
|
const gen = ++this.gen;
|
|
10439
10484
|
const chunks = chunkForSpeech(item.text);
|
|
10440
|
-
const
|
|
10485
|
+
const done = () => {
|
|
10441
10486
|
if (gen !== this.gen) return;
|
|
10442
|
-
|
|
10443
|
-
|
|
10444
|
-
|
|
10445
|
-
|
|
10446
|
-
|
|
10447
|
-
|
|
10448
|
-
|
|
10449
|
-
|
|
10450
|
-
|
|
10451
|
-
|
|
10452
|
-
|
|
10487
|
+
this.speaking = false;
|
|
10488
|
+
this.currentChat = null;
|
|
10489
|
+
this.pump();
|
|
10490
|
+
};
|
|
10491
|
+
if (this.mode === "kokoro") this.speakKokoro(chunks, 0, gen, done);
|
|
10492
|
+
else this.speakBrowser(chunks, 0, gen, done);
|
|
10493
|
+
}
|
|
10494
|
+
speakBrowser(chunks, i, gen, done) {
|
|
10495
|
+
if (gen !== this.gen) return;
|
|
10496
|
+
if (!synthAvailable || i >= chunks.length) return done();
|
|
10497
|
+
const u = new SpeechSynthesisUtterance(chunks[i]);
|
|
10498
|
+
u.lang = this.lang || navigator.language || "en-US";
|
|
10499
|
+
u.onend = () => this.speakBrowser(chunks, i + 1, gen, done);
|
|
10500
|
+
u.onerror = () => this.speakBrowser(chunks, i + 1, gen, done);
|
|
10501
|
+
window.speechSynthesis.speak(u);
|
|
10502
|
+
}
|
|
10503
|
+
speakKokoro(chunks, i, gen, done, prefetched) {
|
|
10504
|
+
if (gen !== this.gen) return;
|
|
10505
|
+
const text = chunks[i];
|
|
10506
|
+
if (text === void 0) return done();
|
|
10507
|
+
const cur = prefetched ?? this.fetchSpeech(text);
|
|
10508
|
+
const nextText = chunks[i + 1];
|
|
10509
|
+
const next = nextText !== void 0 ? this.fetchSpeech(nextText) : void 0;
|
|
10510
|
+
cur.then((blob) => {
|
|
10511
|
+
if (gen !== this.gen) return;
|
|
10512
|
+
this.playBlob(blob, gen, () => this.speakKokoro(chunks, i + 1, gen, done, next), () => {
|
|
10513
|
+
this.speakBrowser(chunks, i, gen, done);
|
|
10514
|
+
});
|
|
10515
|
+
}).catch(() => {
|
|
10516
|
+
if (gen !== this.gen) return;
|
|
10517
|
+
this.speakBrowser(chunks, i, gen, done);
|
|
10518
|
+
});
|
|
10519
|
+
}
|
|
10520
|
+
fetchSpeech(text) {
|
|
10521
|
+
return apiPostJsonForBlob("/connectors/voice/call/speak", {
|
|
10522
|
+
text,
|
|
10523
|
+
voice: this.kokoroVoice
|
|
10524
|
+
});
|
|
10525
|
+
}
|
|
10526
|
+
playBlob(blob, gen, onend, onerror) {
|
|
10527
|
+
if (gen !== this.gen) return;
|
|
10528
|
+
if (typeof Audio === "undefined") return onerror();
|
|
10529
|
+
this.stopAudio();
|
|
10530
|
+
if (!this.audioEl) this.audioEl = new Audio();
|
|
10531
|
+
const url = URL.createObjectURL(blob);
|
|
10532
|
+
this.audioUrl = url;
|
|
10533
|
+
const el = this.audioEl;
|
|
10534
|
+
el.src = url;
|
|
10535
|
+
el.onended = () => {
|
|
10536
|
+
if (gen === this.gen) onend();
|
|
10537
|
+
};
|
|
10538
|
+
el.onerror = () => {
|
|
10539
|
+
if (gen === this.gen) onerror();
|
|
10453
10540
|
};
|
|
10454
|
-
|
|
10541
|
+
el.play().catch(() => {
|
|
10542
|
+
if (gen === this.gen) onerror();
|
|
10543
|
+
});
|
|
10455
10544
|
}
|
|
10456
10545
|
};
|
|
10457
10546
|
var voiceTts = new VoiceTts();
|
|
10547
|
+
/** Sorted unique BCP-47 languages the browser's speech engine can speak. May be empty
|
|
10548
|
+
* until the engine finishes loading voices (listen for `voiceschanged` and re-read). */
|
|
10549
|
+
function availableSpeechLangs() {
|
|
10550
|
+
if (!synthAvailable) return [];
|
|
10551
|
+
try {
|
|
10552
|
+
return [...new Set(window.speechSynthesis.getVoices().map((v) => v.lang).filter(Boolean))].sort();
|
|
10553
|
+
} catch {
|
|
10554
|
+
return [];
|
|
10555
|
+
}
|
|
10556
|
+
}
|
|
10458
10557
|
var micAvailable = typeof navigator !== "undefined" && !!navigator.mediaDevices?.getUserMedia && typeof window !== "undefined" && "MediaRecorder" in window;
|
|
10459
10558
|
function pickMimeType() {
|
|
10460
10559
|
for (const m of [
|
|
@@ -11861,6 +11960,9 @@ var useRelayStore = create$1()(persist((set, get) => ({
|
|
|
11861
11960
|
showBuiltIns: false,
|
|
11862
11961
|
autoRefresh: true,
|
|
11863
11962
|
voiceTtsEnabled: false,
|
|
11963
|
+
voiceTtsLang: "en-US",
|
|
11964
|
+
voiceTtsMode: "kokoro",
|
|
11965
|
+
voiceTtsKokoroVoice: "am_michael",
|
|
11864
11966
|
agentSort: "status",
|
|
11865
11967
|
agentSortDir: "asc",
|
|
11866
11968
|
agentPresetFilter: "",
|
|
@@ -12052,6 +12154,18 @@ var useRelayStore = create$1()(persist((set, get) => ({
|
|
|
12052
12154
|
voiceTts.setEnabled(on);
|
|
12053
12155
|
set({ voiceTtsEnabled: on });
|
|
12054
12156
|
},
|
|
12157
|
+
setVoiceTtsLang(lang) {
|
|
12158
|
+
voiceTts.setLang(lang);
|
|
12159
|
+
set({ voiceTtsLang: lang });
|
|
12160
|
+
},
|
|
12161
|
+
setVoiceTtsMode(mode) {
|
|
12162
|
+
voiceTts.setMode(mode);
|
|
12163
|
+
set({ voiceTtsMode: mode });
|
|
12164
|
+
},
|
|
12165
|
+
setVoiceTtsKokoroVoice(voice) {
|
|
12166
|
+
voiceTts.setKokoroVoice(voice);
|
|
12167
|
+
set({ voiceTtsKokoroVoice: voice });
|
|
12168
|
+
},
|
|
12055
12169
|
async init() {
|
|
12056
12170
|
if (!useRelayStore.persist.hasHydrated()) await new Promise((resolve) => {
|
|
12057
12171
|
const unsub = useRelayStore.persist.onFinishHydration(() => {
|
|
@@ -12062,6 +12176,9 @@ var useRelayStore = create$1()(persist((set, get) => ({
|
|
|
12062
12176
|
const token = get().authToken;
|
|
12063
12177
|
if (token) setAuthToken(token);
|
|
12064
12178
|
voiceTts.setEnabled(get().voiceTtsEnabled);
|
|
12179
|
+
voiceTts.setLang(get().voiceTtsLang);
|
|
12180
|
+
voiceTts.setMode(get().voiceTtsMode);
|
|
12181
|
+
voiceTts.setKokoroVoice(get().voiceTtsKokoroVoice);
|
|
12065
12182
|
syncVoiceActiveChat(get());
|
|
12066
12183
|
setUnauthorizedHandler(() => {
|
|
12067
12184
|
if (!get().authNeeded) set({
|
|
@@ -12576,9 +12693,9 @@ var useRelayStore = create$1()(persist((set, get) => ({
|
|
|
12576
12693
|
const msgs = [...s.messages, msg];
|
|
12577
12694
|
if (msgs.length > 500) msgs.splice(0, msgs.length - 500);
|
|
12578
12695
|
set({ messages: msgs });
|
|
12579
|
-
if (msg.kind === "session") {
|
|
12696
|
+
if (msg.kind === "session" && msg.from !== "user") {
|
|
12580
12697
|
const sess = msg.payload?.session;
|
|
12581
|
-
if (
|
|
12698
|
+
if (sess?.type === "response" && sess?.origin === "provider") voiceTts.onResponse(inboxPeer(msg), msg.body);
|
|
12582
12699
|
}
|
|
12583
12700
|
const peer = inboxPeer(msg);
|
|
12584
12701
|
if (isHumanInboundMessage(msg) && peer && s.view === "chat" && s.selectedInboxThread === peer && !isDashboardHidden()) get().markInboxThreadReadTo(peer, msg.id);
|
|
@@ -14150,6 +14267,9 @@ var useRelayStore = create$1()(persist((set, get) => ({
|
|
|
14150
14267
|
showBuiltIns: state.showBuiltIns,
|
|
14151
14268
|
autoRefresh: state.autoRefresh,
|
|
14152
14269
|
voiceTtsEnabled: state.voiceTtsEnabled,
|
|
14270
|
+
voiceTtsLang: state.voiceTtsLang,
|
|
14271
|
+
voiceTtsMode: state.voiceTtsMode,
|
|
14272
|
+
voiceTtsKokoroVoice: state.voiceTtsKokoroVoice,
|
|
14153
14273
|
agentSort: state.agentSort,
|
|
14154
14274
|
agentSortDir: state.agentSortDir,
|
|
14155
14275
|
agentPresetFilter: state.agentPresetFilter,
|
|
@@ -125564,6 +125684,40 @@ var TIMELINE_STATUS_LABELS = {
|
|
|
125564
125684
|
var TIMELINE_STATUSES = new Set(Object.keys(TIMELINE_STATUS_LABELS));
|
|
125565
125685
|
var STATUS_DEDUPE_WINDOW_MS = 3e3;
|
|
125566
125686
|
var CHAT_BOTTOM_THRESHOLD_PX = 96;
|
|
125687
|
+
var KOKORO_VOICES = [
|
|
125688
|
+
{
|
|
125689
|
+
id: "am_michael",
|
|
125690
|
+
label: "Michael (US ♂)"
|
|
125691
|
+
},
|
|
125692
|
+
{
|
|
125693
|
+
id: "am_adam",
|
|
125694
|
+
label: "Adam (US ♂)"
|
|
125695
|
+
},
|
|
125696
|
+
{
|
|
125697
|
+
id: "af_heart",
|
|
125698
|
+
label: "Heart (US ♀)"
|
|
125699
|
+
},
|
|
125700
|
+
{
|
|
125701
|
+
id: "af_bella",
|
|
125702
|
+
label: "Bella (US ♀)"
|
|
125703
|
+
},
|
|
125704
|
+
{
|
|
125705
|
+
id: "af_nicole",
|
|
125706
|
+
label: "Nicole (US ♀)"
|
|
125707
|
+
},
|
|
125708
|
+
{
|
|
125709
|
+
id: "af_sarah",
|
|
125710
|
+
label: "Sarah (US ♀)"
|
|
125711
|
+
},
|
|
125712
|
+
{
|
|
125713
|
+
id: "bm_george",
|
|
125714
|
+
label: "George (UK ♂)"
|
|
125715
|
+
},
|
|
125716
|
+
{
|
|
125717
|
+
id: "bf_emma",
|
|
125718
|
+
label: "Emma (UK ♀)"
|
|
125719
|
+
}
|
|
125720
|
+
];
|
|
125567
125721
|
function StatusMarker({ event }) {
|
|
125568
125722
|
return /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("div", {
|
|
125569
125723
|
className: "flex items-center justify-center gap-2 py-2 my-1",
|
|
@@ -126599,6 +126753,20 @@ function ChatPanel({ threads, onBack, showBackButton }) {
|
|
|
126599
126753
|
const fetchOrchestrators = useRelayStore((s) => s.fetchOrchestrators);
|
|
126600
126754
|
const voiceTtsEnabled = useRelayStore((s) => s.voiceTtsEnabled);
|
|
126601
126755
|
const setVoiceTtsEnabled = useRelayStore((s) => s.setVoiceTtsEnabled);
|
|
126756
|
+
const voiceTtsLang = useRelayStore((s) => s.voiceTtsLang);
|
|
126757
|
+
const setVoiceTtsLang = useRelayStore((s) => s.setVoiceTtsLang);
|
|
126758
|
+
const voiceTtsMode = useRelayStore((s) => s.voiceTtsMode);
|
|
126759
|
+
const setVoiceTtsMode = useRelayStore((s) => s.setVoiceTtsMode);
|
|
126760
|
+
const voiceTtsKokoroVoice = useRelayStore((s) => s.voiceTtsKokoroVoice);
|
|
126761
|
+
const setVoiceTtsKokoroVoice = useRelayStore((s) => s.setVoiceTtsKokoroVoice);
|
|
126762
|
+
const [speechLangs, setSpeechLangs] = (0, import_react.useState)(() => availableSpeechLangs());
|
|
126763
|
+
(0, import_react.useEffect)(() => {
|
|
126764
|
+
if (!voiceTts.available) return;
|
|
126765
|
+
const refresh = () => setSpeechLangs(availableSpeechLangs());
|
|
126766
|
+
refresh();
|
|
126767
|
+
window.speechSynthesis.addEventListener?.("voiceschanged", refresh);
|
|
126768
|
+
return () => window.speechSynthesis.removeEventListener?.("voiceschanged", refresh);
|
|
126769
|
+
}, []);
|
|
126602
126770
|
const fileInputRef = (0, import_react.useRef)(null);
|
|
126603
126771
|
const pttRecorderRef = (0, import_react.useRef)(null);
|
|
126604
126772
|
const [micState, setMicState] = (0, import_react.useState)("idle");
|
|
@@ -126996,6 +127164,47 @@ function ChatPanel({ threads, onBack, showBackButton }) {
|
|
|
126996
127164
|
onClick: () => setVoiceTtsEnabled(!voiceTtsEnabled),
|
|
126997
127165
|
children: voiceTtsEnabled ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Volume2, { className: "w-3.5 h-3.5" }) : /* @__PURE__ */ (0, import_jsx_runtime.jsx)(VolumeX, { className: "w-3.5 h-3.5" })
|
|
126998
127166
|
}),
|
|
127167
|
+
voiceTts.available && voiceTtsEnabled && /* @__PURE__ */ (0, import_jsx_runtime.jsxs)(import_jsx_runtime.Fragment, { children: [/* @__PURE__ */ (0, import_jsx_runtime.jsxs)("select", {
|
|
127168
|
+
value: voiceTtsMode,
|
|
127169
|
+
onChange: (e) => setVoiceTtsMode(e.target.value),
|
|
127170
|
+
title: "Voice engine — Kokoro (server, natural) falls back to browser automatically",
|
|
127171
|
+
className: "h-7 rounded border border-border bg-background px-1 text-xs",
|
|
127172
|
+
children: [/* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", {
|
|
127173
|
+
value: "kokoro",
|
|
127174
|
+
children: "Kokoro"
|
|
127175
|
+
}), /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", {
|
|
127176
|
+
value: "browser",
|
|
127177
|
+
children: "Browser"
|
|
127178
|
+
})]
|
|
127179
|
+
}), voiceTtsMode === "kokoro" ? /* @__PURE__ */ (0, import_jsx_runtime.jsx)("select", {
|
|
127180
|
+
value: voiceTtsKokoroVoice,
|
|
127181
|
+
onChange: (e) => setVoiceTtsKokoroVoice(e.target.value),
|
|
127182
|
+
title: "Kokoro voice",
|
|
127183
|
+
className: "h-7 rounded border border-border bg-background px-1 text-xs",
|
|
127184
|
+
children: [...KOKORO_VOICES, ...KOKORO_VOICES.some((v) => v.id === voiceTtsKokoroVoice) ? [] : [{
|
|
127185
|
+
id: voiceTtsKokoroVoice,
|
|
127186
|
+
label: voiceTtsKokoroVoice
|
|
127187
|
+
}]].map((v) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", {
|
|
127188
|
+
value: v.id,
|
|
127189
|
+
children: v.label
|
|
127190
|
+
}, v.id))
|
|
127191
|
+
}) : /* @__PURE__ */ (0, import_jsx_runtime.jsxs)("select", {
|
|
127192
|
+
value: voiceTtsLang,
|
|
127193
|
+
onChange: (e) => setVoiceTtsLang(e.target.value),
|
|
127194
|
+
title: "Voice language",
|
|
127195
|
+
className: "h-7 rounded border border-border bg-background px-1 text-xs",
|
|
127196
|
+
children: [[...new Set([
|
|
127197
|
+
"en-US",
|
|
127198
|
+
...speechLangs,
|
|
127199
|
+
...voiceTtsLang ? [voiceTtsLang] : []
|
|
127200
|
+
])].sort().map((l) => /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", {
|
|
127201
|
+
value: l,
|
|
127202
|
+
children: l
|
|
127203
|
+
}, l)), /* @__PURE__ */ (0, import_jsx_runtime.jsx)("option", {
|
|
127204
|
+
value: "",
|
|
127205
|
+
children: "Browser default"
|
|
127206
|
+
})]
|
|
127207
|
+
})] }),
|
|
126999
127208
|
canOpenTerminal && /* @__PURE__ */ (0, import_jsx_runtime.jsx)(Button, {
|
|
127000
127209
|
variant: "ghost",
|
|
127001
127210
|
size: "icon-sm",
|
package/src/routes.ts
CHANGED
|
@@ -5679,7 +5679,7 @@ const putConnectorConfig: Handler = async (req, params) => {
|
|
|
5679
5679
|
// Endpoints a connector daemon may expose for the dashboard to call through the
|
|
5680
5680
|
// relay (single-origin — no CORS, no extra port exposure). Kept to a small,
|
|
5681
5681
|
// non-mutating allowlist; the connector advertises its HTTP base via its status.
|
|
5682
|
-
const PROXYABLE_CONNECTOR_CALLS = new Set(["transcribe", "utterance"]);
|
|
5682
|
+
const PROXYABLE_CONNECTOR_CALLS = new Set(["transcribe", "utterance", "speak"]);
|
|
5683
5683
|
|
|
5684
5684
|
function connectorAdvertisedEndpoint(connector: NonNullable<ReturnType<typeof getConnector>>): string | null {
|
|
5685
5685
|
const raw = connector.state?.raw;
|