agent-office 0.4.7 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -5
- package/dist/cli.js +1 -0
- package/dist/commands/communicator.d.ts +1 -0
- package/dist/commands/communicator.js +803 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -40,10 +40,6 @@ An office for your AI agents. Manage multiple [OpenCode](https://opencode.ai) co
|
|
|
40
40
|
+-----------------------+
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
-
## Breaking Changes in v0.4.7
|
|
44
|
-
|
|
45
|
-
**Cron Job Approval Workflow**: Workers can no longer create cron jobs directly. Instead, they must use `agent-office worker cron request` to submit requests that require human approval. The old `agent-office worker cron create` command has been renamed to `cron request`. This change ensures all automated tasks have human oversight.
|
|
46
|
-
|
|
47
43
|
## Installation
|
|
48
44
|
|
|
49
45
|
```bash
|
|
@@ -206,9 +202,10 @@ Options:
|
|
|
206
202
|
--password <password> API password (env: AGENT_OFFICE_PASSWORD)
|
|
207
203
|
--host <host> Communicator bind host (default: 127.0.0.1)
|
|
208
204
|
--port <port> Communicator bind port (default: 7655)
|
|
205
|
+
--xai-key <key> xAI API key for voice chat (enables voice button)
|
|
209
206
|
```
|
|
210
207
|
|
|
211
|
-
Features: dark theme, iMessage-style chat bubbles, auto-scroll, Enter to send (Shift+Enter for newline), live message polling (5s), unread indicators, status display, and a reset button to revert the agent's session.
|
|
208
|
+
Features: dark theme, iMessage-style chat bubbles, auto-scroll, Enter to send (Shift+Enter for newline), live message polling (5s), unread indicators, status display, and a reset button to revert the agent's session. **Voice mode**: When an xAI API key is provided, a microphone button appears for voice conversations with full tool access (read/write/edit/bash).
|
|
212
209
|
|
|
213
210
|
### `agent-office worker` (for AI agents)
|
|
214
211
|
|
package/dist/cli.js
CHANGED
|
@@ -129,6 +129,7 @@ appCmd
|
|
|
129
129
|
.option("--password <password>", "API password for the agent-office server", process.env.AGENT_OFFICE_PASSWORD ?? "secret")
|
|
130
130
|
.option("--host <host>", "Host to bind the web server to", "127.0.0.1")
|
|
131
131
|
.option("--port <port>", "Port to run the web server on", "7655")
|
|
132
|
+
.option("--xai-key <key>", "xAI API key for voice chat (enables voice button)", process.env.XAI_API_KEY)
|
|
132
133
|
.action(async (options) => {
|
|
133
134
|
const { appCoworkerChatWeb } = await import("./commands/communicator.js");
|
|
134
135
|
await appCoworkerChatWeb(options);
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import express from "express";
|
|
2
|
+
import { exec } from "child_process";
|
|
3
|
+
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
4
|
+
import { dirname } from "path";
|
|
2
5
|
// ── API helpers ───────────────────────────────────────────────────────────────
|
|
3
6
|
async function apiFetch(agentUrl, password, path, init = {}) {
|
|
4
7
|
const res = await fetch(`${agentUrl}${path}`, {
|
|
@@ -489,6 +492,122 @@ function renderPage(coworker, coworkers, msgs, humanName) {
|
|
|
489
492
|
|
|
490
493
|
/* ── HTMX request indicator ── */
|
|
491
494
|
.htmx-request .send-btn { background: var(--accent-dim); }
|
|
495
|
+
|
|
496
|
+
/* ── Voice button ── */
|
|
497
|
+
.voice-btn {
|
|
498
|
+
width: 36px; height: 36px;
|
|
499
|
+
border-radius: 50%;
|
|
500
|
+
background: var(--surface2);
|
|
501
|
+
border: 1px solid var(--border);
|
|
502
|
+
color: var(--text-dim);
|
|
503
|
+
cursor: pointer;
|
|
504
|
+
display: flex;
|
|
505
|
+
align-items: center;
|
|
506
|
+
justify-content: center;
|
|
507
|
+
flex-shrink: 0;
|
|
508
|
+
transition: all 0.2s;
|
|
509
|
+
padding: 0;
|
|
510
|
+
}
|
|
511
|
+
.voice-btn:hover { border-color: var(--accent); color: var(--accent); }
|
|
512
|
+
.voice-btn:disabled { opacity: 0.4; cursor: not-allowed; }
|
|
513
|
+
.voice-btn svg { width: 18px; height: 18px; }
|
|
514
|
+
.voice-btn.active {
|
|
515
|
+
background: var(--red);
|
|
516
|
+
border-color: var(--red);
|
|
517
|
+
color: #fff;
|
|
518
|
+
animation: voice-pulse 1.5s ease-in-out infinite;
|
|
519
|
+
}
|
|
520
|
+
.voice-btn.connecting {
|
|
521
|
+
background: var(--accent-dim);
|
|
522
|
+
border-color: var(--accent);
|
|
523
|
+
color: var(--accent);
|
|
524
|
+
animation: voice-pulse 0.8s ease-in-out infinite;
|
|
525
|
+
}
|
|
526
|
+
@keyframes voice-pulse {
|
|
527
|
+
0%, 100% { box-shadow: 0 0 0 0 rgba(255, 107, 107, 0.4); }
|
|
528
|
+
50% { box-shadow: 0 0 0 8px rgba(255, 107, 107, 0); }
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/* ── Voice overlay ── */
|
|
532
|
+
.voice-overlay {
|
|
533
|
+
position: absolute;
|
|
534
|
+
top: var(--header-h);
|
|
535
|
+
left: 0; right: 0; bottom: 0;
|
|
536
|
+
background: rgba(15, 17, 23, 0.95);
|
|
537
|
+
z-index: 50;
|
|
538
|
+
display: flex;
|
|
539
|
+
align-items: center;
|
|
540
|
+
justify-content: center;
|
|
541
|
+
backdrop-filter: blur(8px);
|
|
542
|
+
}
|
|
543
|
+
.voice-overlay-content {
|
|
544
|
+
display: flex;
|
|
545
|
+
flex-direction: column;
|
|
546
|
+
align-items: center;
|
|
547
|
+
gap: 24px;
|
|
548
|
+
padding: 32px;
|
|
549
|
+
}
|
|
550
|
+
.voice-visualizer {
|
|
551
|
+
position: relative;
|
|
552
|
+
width: 120px; height: 120px;
|
|
553
|
+
display: flex;
|
|
554
|
+
align-items: center;
|
|
555
|
+
justify-content: center;
|
|
556
|
+
}
|
|
557
|
+
.voice-ring {
|
|
558
|
+
position: absolute;
|
|
559
|
+
width: 100%; height: 100%;
|
|
560
|
+
border-radius: 50%;
|
|
561
|
+
border: 2px solid var(--accent);
|
|
562
|
+
opacity: 0.3;
|
|
563
|
+
animation: voice-ring-pulse 2s ease-in-out infinite;
|
|
564
|
+
}
|
|
565
|
+
.voice-ring-2 { animation-delay: 0.4s; width: 140%; height: 140%; top: -20%; left: -20%; opacity: 0.15; }
|
|
566
|
+
.voice-ring-3 { animation-delay: 0.8s; width: 180%; height: 180%; top: -40%; left: -40%; opacity: 0.08; }
|
|
567
|
+
.voice-overlay.speaking .voice-ring { border-color: var(--green); }
|
|
568
|
+
.voice-overlay.listening .voice-ring { border-color: var(--accent); }
|
|
569
|
+
@keyframes voice-ring-pulse {
|
|
570
|
+
0%, 100% { transform: scale(1); opacity: 0.3; }
|
|
571
|
+
50% { transform: scale(1.1); opacity: 0.1; }
|
|
572
|
+
}
|
|
573
|
+
.voice-avatar {
|
|
574
|
+
width: 64px; height: 64px;
|
|
575
|
+
border-radius: 50%;
|
|
576
|
+
background: var(--accent-dim);
|
|
577
|
+
color: var(--accent);
|
|
578
|
+
display: flex;
|
|
579
|
+
align-items: center;
|
|
580
|
+
justify-content: center;
|
|
581
|
+
font-weight: 700;
|
|
582
|
+
font-size: 24px;
|
|
583
|
+
z-index: 1;
|
|
584
|
+
}
|
|
585
|
+
.voice-status {
|
|
586
|
+
font-size: 16px;
|
|
587
|
+
color: var(--text);
|
|
588
|
+
font-weight: 500;
|
|
589
|
+
}
|
|
590
|
+
.voice-transcript {
|
|
591
|
+
font-size: 14px;
|
|
592
|
+
color: var(--text-dim);
|
|
593
|
+
text-align: center;
|
|
594
|
+
max-width: 400px;
|
|
595
|
+
min-height: 40px;
|
|
596
|
+
line-height: 1.4;
|
|
597
|
+
}
|
|
598
|
+
.voice-end-btn {
|
|
599
|
+
background: var(--red);
|
|
600
|
+
border: none;
|
|
601
|
+
border-radius: 22px;
|
|
602
|
+
color: #fff;
|
|
603
|
+
cursor: pointer;
|
|
604
|
+
font-size: 14px;
|
|
605
|
+
font-weight: 600;
|
|
606
|
+
padding: 10px 24px;
|
|
607
|
+
transition: background 0.15s, transform 0.1s;
|
|
608
|
+
}
|
|
609
|
+
.voice-end-btn:hover { background: #ff8888; }
|
|
610
|
+
.voice-end-btn:active { transform: scale(0.95); }
|
|
492
611
|
</style>
|
|
493
612
|
</head>
|
|
494
613
|
<body>
|
|
@@ -511,6 +630,23 @@ function renderPage(coworker, coworkers, msgs, humanName) {
|
|
|
511
630
|
hx-swap="innerHTML"></div>
|
|
512
631
|
</div>
|
|
513
632
|
<a href="/cron-requests" class="header-link" title="Manage cron job requests">⚙️</a>
|
|
633
|
+
<button class="voice-btn" id="voice-btn"
|
|
634
|
+
onclick="toggleVoice()"
|
|
635
|
+
title="Voice chat"
|
|
636
|
+
style="display:none"
|
|
637
|
+
${!selected ? 'disabled' : ''}>
|
|
638
|
+
<svg class="voice-icon-mic" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
|
639
|
+
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
|
|
640
|
+
<path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
|
|
641
|
+
<line x1="12" y1="19" x2="12" y2="23"></line>
|
|
642
|
+
<line x1="8" y1="23" x2="16" y2="23"></line>
|
|
643
|
+
</svg>
|
|
644
|
+
<div class="voice-icon-stop" style="display:none">
|
|
645
|
+
<svg viewBox="0 0 24 24" fill="currentColor">
|
|
646
|
+
<rect x="6" y="6" width="12" height="12" rx="2"></rect>
|
|
647
|
+
</svg>
|
|
648
|
+
</div>
|
|
649
|
+
</button>
|
|
514
650
|
<button class="reset-btn"
|
|
515
651
|
hx-post="/reset?coworker=${encodeURIComponent(selected)}"
|
|
516
652
|
hx-target="#reset-status"
|
|
@@ -531,6 +667,21 @@ function renderPage(coworker, coworkers, msgs, humanName) {
|
|
|
531
667
|
|
|
532
668
|
<div id="reset-status"></div>
|
|
533
669
|
|
|
670
|
+
<!-- Voice overlay -->
|
|
671
|
+
<div id="voice-overlay" class="voice-overlay" style="display:none">
|
|
672
|
+
<div class="voice-overlay-content">
|
|
673
|
+
<div class="voice-visualizer" id="voice-visualizer">
|
|
674
|
+
<div class="voice-ring"></div>
|
|
675
|
+
<div class="voice-ring voice-ring-2"></div>
|
|
676
|
+
<div class="voice-ring voice-ring-3"></div>
|
|
677
|
+
<div class="voice-avatar" id="voice-avatar">?</div>
|
|
678
|
+
</div>
|
|
679
|
+
<div class="voice-status" id="voice-status">Connecting...</div>
|
|
680
|
+
<div class="voice-transcript" id="voice-transcript"></div>
|
|
681
|
+
<button class="voice-end-btn" onclick="toggleVoice()">End Voice Chat</button>
|
|
682
|
+
</div>
|
|
683
|
+
</div>
|
|
684
|
+
|
|
534
685
|
<!-- Messages -->
|
|
535
686
|
<div class="messages-outer" id="messages-outer">
|
|
536
687
|
<div id="messages"
|
|
@@ -640,9 +791,10 @@ function renderPage(coworker, coworkers, msgs, humanName) {
|
|
|
640
791
|
// Initial scroll
|
|
641
792
|
scrollToBottom()
|
|
642
793
|
|
|
643
|
-
// Switch to a different coworker
|
|
794
|
+
// Switch to a different coworker — stop any active voice session first
|
|
644
795
|
function switchCoworker(name) {
|
|
645
796
|
if (!name) return
|
|
797
|
+
if (voiceState.active) stopVoice()
|
|
646
798
|
const url = new URL(window.location.href)
|
|
647
799
|
url.searchParams.set('coworker', name)
|
|
648
800
|
window.location.href = url.toString()
|
|
@@ -679,6 +831,459 @@ function renderPage(coworker, coworkers, msgs, humanName) {
|
|
|
679
831
|
document.addEventListener('htmx:afterSwap', () => {
|
|
680
832
|
renderMarkdown()
|
|
681
833
|
})
|
|
834
|
+
|
|
835
|
+
// ── Voice Chat ─────────────────────────────────────────────────────────────
|
|
836
|
+
const SAMPLE_RATE = 24000
|
|
837
|
+
|
|
838
|
+
const voiceState = {
|
|
839
|
+
active: false,
|
|
840
|
+
ws: null,
|
|
841
|
+
audioCtx: null,
|
|
842
|
+
micStream: null,
|
|
843
|
+
scriptProcessor: null,
|
|
844
|
+
playbackQueue: [],
|
|
845
|
+
isPlaying: false,
|
|
846
|
+
nextPlayTime: 0,
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
// Check if voice is enabled and show button
|
|
850
|
+
fetch('/voice/config').then(r => r.json()).then(cfg => {
|
|
851
|
+
if (cfg.enabled) {
|
|
852
|
+
const btn = document.getElementById('voice-btn')
|
|
853
|
+
if (btn) btn.style.display = 'flex'
|
|
854
|
+
}
|
|
855
|
+
}).catch(() => {})
|
|
856
|
+
|
|
857
|
+
function toggleVoice() {
|
|
858
|
+
if (voiceState.active) {
|
|
859
|
+
stopVoice()
|
|
860
|
+
} else {
|
|
861
|
+
startVoice()
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
async function startVoice() {
|
|
866
|
+
const select = document.getElementById('coworker-select')
|
|
867
|
+
const coworker = select ? select.value : ''
|
|
868
|
+
if (!coworker) return
|
|
869
|
+
|
|
870
|
+
const btn = document.getElementById('voice-btn')
|
|
871
|
+
const overlay = document.getElementById('voice-overlay')
|
|
872
|
+
const statusEl = document.getElementById('voice-status')
|
|
873
|
+
const transcriptEl = document.getElementById('voice-transcript')
|
|
874
|
+
const avatarEl = document.getElementById('voice-avatar')
|
|
875
|
+
|
|
876
|
+
// Update UI to connecting state
|
|
877
|
+
btn.classList.add('connecting')
|
|
878
|
+
btn.querySelector('.voice-icon-mic').style.display = 'none'
|
|
879
|
+
btn.querySelector('.voice-icon-stop').style.display = 'flex'
|
|
880
|
+
overlay.style.display = 'flex'
|
|
881
|
+
statusEl.textContent = 'Connecting...'
|
|
882
|
+
transcriptEl.textContent = ''
|
|
883
|
+
avatarEl.textContent = coworker.charAt(0).toUpperCase()
|
|
884
|
+
|
|
885
|
+
try {
|
|
886
|
+
// Request ephemeral token from our backend
|
|
887
|
+
const sessRes = await fetch('/voice/session', {
|
|
888
|
+
method: 'POST',
|
|
889
|
+
headers: { 'Content-Type': 'application/json' },
|
|
890
|
+
body: JSON.stringify({ coworker }),
|
|
891
|
+
})
|
|
892
|
+
if (!sessRes.ok) {
|
|
893
|
+
const err = await sessRes.json().catch(() => ({}))
|
|
894
|
+
throw new Error(err.error || 'Failed to create voice session')
|
|
895
|
+
}
|
|
896
|
+
const sessData = await sessRes.json()
|
|
897
|
+
const token = sessData.token
|
|
898
|
+
const instructions = sessData.instructions
|
|
899
|
+
|
|
900
|
+
if (!token) throw new Error('No ephemeral token received')
|
|
901
|
+
|
|
902
|
+
// Request microphone access
|
|
903
|
+
const micStream = await navigator.mediaDevices.getUserMedia({ audio: {
|
|
904
|
+
sampleRate: SAMPLE_RATE,
|
|
905
|
+
channelCount: 1,
|
|
906
|
+
echoCancellation: true,
|
|
907
|
+
noiseSuppression: true,
|
|
908
|
+
autoGainControl: true,
|
|
909
|
+
}})
|
|
910
|
+
voiceState.micStream = micStream
|
|
911
|
+
|
|
912
|
+
// Create audio context for playback
|
|
913
|
+
const audioCtx = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: SAMPLE_RATE })
|
|
914
|
+
voiceState.audioCtx = audioCtx
|
|
915
|
+
voiceState.nextPlayTime = 0
|
|
916
|
+
|
|
917
|
+
// Connect WebSocket to xAI realtime API using subprotocol for auth
|
|
918
|
+
const ws = new WebSocket('wss://api.x.ai/v1/realtime', [
|
|
919
|
+
'xai-client-secret.' + token
|
|
920
|
+
])
|
|
921
|
+
voiceState.ws = ws
|
|
922
|
+
|
|
923
|
+
ws.onopen = () => {
|
|
924
|
+
voiceState.active = true
|
|
925
|
+
btn.classList.remove('connecting')
|
|
926
|
+
btn.classList.add('active')
|
|
927
|
+
statusEl.textContent = 'Listening...'
|
|
928
|
+
overlay.classList.add('listening')
|
|
929
|
+
overlay.classList.remove('speaking')
|
|
930
|
+
|
|
931
|
+
// Configure session with tools
|
|
932
|
+
ws.send(JSON.stringify({
|
|
933
|
+
type: 'session.update',
|
|
934
|
+
session: {
|
|
935
|
+
voice: 'Ara',
|
|
936
|
+
instructions: instructions,
|
|
937
|
+
turn_detection: { type: 'server_vad' },
|
|
938
|
+
audio: {
|
|
939
|
+
input: { format: { type: 'audio/pcm', rate: SAMPLE_RATE } },
|
|
940
|
+
output: { format: { type: 'audio/pcm', rate: SAMPLE_RATE } },
|
|
941
|
+
},
|
|
942
|
+
tools: [
|
|
943
|
+
{
|
|
944
|
+
type: 'function',
|
|
945
|
+
name: 'read',
|
|
946
|
+
description: 'Read a file from the filesystem. Returns the file contents. Use this to examine source code, config files, or any text file.',
|
|
947
|
+
parameters: {
|
|
948
|
+
type: 'object',
|
|
949
|
+
properties: {
|
|
950
|
+
path: { type: 'string', description: 'Absolute or relative file path to read' },
|
|
951
|
+
offset: { type: 'number', description: 'Line number to start reading from (1-indexed). Optional.' },
|
|
952
|
+
limit: { type: 'number', description: 'Maximum number of lines to read. Optional, defaults to 200.' },
|
|
953
|
+
},
|
|
954
|
+
required: ['path'],
|
|
955
|
+
},
|
|
956
|
+
},
|
|
957
|
+
{
|
|
958
|
+
type: 'function',
|
|
959
|
+
name: 'write',
|
|
960
|
+
description: 'Write content to a file, creating it if it does not exist or overwriting if it does. Use this to create new files.',
|
|
961
|
+
parameters: {
|
|
962
|
+
type: 'object',
|
|
963
|
+
properties: {
|
|
964
|
+
path: { type: 'string', description: 'Absolute or relative file path to write' },
|
|
965
|
+
content: { type: 'string', description: 'The full content to write to the file' },
|
|
966
|
+
},
|
|
967
|
+
required: ['path', 'content'],
|
|
968
|
+
},
|
|
969
|
+
},
|
|
970
|
+
{
|
|
971
|
+
type: 'function',
|
|
972
|
+
name: 'edit',
|
|
973
|
+
description: 'Edit a file by replacing an exact string match with new content. The oldString must match exactly (including whitespace and indentation).',
|
|
974
|
+
parameters: {
|
|
975
|
+
type: 'object',
|
|
976
|
+
properties: {
|
|
977
|
+
path: { type: 'string', description: 'Absolute or relative file path to edit' },
|
|
978
|
+
oldText: { type: 'string', description: 'The exact text to find and replace' },
|
|
979
|
+
newText: { type: 'string', description: 'The replacement text' },
|
|
980
|
+
},
|
|
981
|
+
required: ['path', 'oldText', 'newText'],
|
|
982
|
+
},
|
|
983
|
+
},
|
|
984
|
+
{
|
|
985
|
+
type: 'function',
|
|
986
|
+
name: 'bash',
|
|
987
|
+
description: 'Execute a bash command and return its output. Use for running scripts, git commands, build tools, listing files, searching, etc.',
|
|
988
|
+
parameters: {
|
|
989
|
+
type: 'object',
|
|
990
|
+
properties: {
|
|
991
|
+
command: { type: 'string', description: 'The bash command to execute' },
|
|
992
|
+
timeout: { type: 'number', description: 'Timeout in seconds. Optional, defaults to 30.' },
|
|
993
|
+
},
|
|
994
|
+
required: ['command'],
|
|
995
|
+
},
|
|
996
|
+
},
|
|
997
|
+
],
|
|
998
|
+
},
|
|
999
|
+
}))
|
|
1000
|
+
|
|
1001
|
+
// Start streaming microphone audio
|
|
1002
|
+
startMicStreaming(ws, micStream, audioCtx)
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
ws.onmessage = (event) => {
|
|
1006
|
+
const data = JSON.parse(event.data)
|
|
1007
|
+
handleVoiceEvent(data)
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
ws.onerror = () => {
|
|
1011
|
+
statusEl.textContent = 'Connection error'
|
|
1012
|
+
setTimeout(() => stopVoice(), 2000)
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
ws.onclose = () => {
|
|
1016
|
+
if (voiceState.active) {
|
|
1017
|
+
stopVoice()
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
} catch (err) {
|
|
1022
|
+
console.error('Voice start error:', err)
|
|
1023
|
+
const statusEl = document.getElementById('voice-status')
|
|
1024
|
+
if (statusEl) statusEl.textContent = 'Error: ' + (err.message || 'Unknown error')
|
|
1025
|
+
setTimeout(() => stopVoice(), 2500)
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
function startMicStreaming(ws, micStream, audioCtx) {
|
|
1030
|
+
const source = audioCtx.createMediaStreamSource(micStream)
|
|
1031
|
+
// Use ScriptProcessorNode for broad compatibility (including mobile)
|
|
1032
|
+
const bufSize = 4096
|
|
1033
|
+
const processor = audioCtx.createScriptProcessor(bufSize, 1, 1)
|
|
1034
|
+
voiceState.scriptProcessor = processor
|
|
1035
|
+
|
|
1036
|
+
processor.onaudioprocess = (e) => {
|
|
1037
|
+
if (!voiceState.active || ws.readyState !== WebSocket.OPEN) return
|
|
1038
|
+
const inputData = e.inputBuffer.getChannelData(0)
|
|
1039
|
+
|
|
1040
|
+
// Resample if audioCtx sample rate differs from target
|
|
1041
|
+
let pcmFloat
|
|
1042
|
+
if (audioCtx.sampleRate !== SAMPLE_RATE) {
|
|
1043
|
+
const ratio = SAMPLE_RATE / audioCtx.sampleRate
|
|
1044
|
+
const newLen = Math.round(inputData.length * ratio)
|
|
1045
|
+
pcmFloat = new Float32Array(newLen)
|
|
1046
|
+
for (let i = 0; i < newLen; i++) {
|
|
1047
|
+
const srcIdx = i / ratio
|
|
1048
|
+
const lo = Math.floor(srcIdx)
|
|
1049
|
+
const hi = Math.min(lo + 1, inputData.length - 1)
|
|
1050
|
+
const frac = srcIdx - lo
|
|
1051
|
+
pcmFloat[i] = inputData[lo] * (1 - frac) + inputData[hi] * frac
|
|
1052
|
+
}
|
|
1053
|
+
} else {
|
|
1054
|
+
pcmFloat = inputData
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
// Convert Float32 to Int16 PCM
|
|
1058
|
+
const pcm16 = new Int16Array(pcmFloat.length)
|
|
1059
|
+
for (let i = 0; i < pcmFloat.length; i++) {
|
|
1060
|
+
const s = Math.max(-1, Math.min(1, pcmFloat[i]))
|
|
1061
|
+
pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
// Base64 encode
|
|
1065
|
+
const bytes = new Uint8Array(pcm16.buffer)
|
|
1066
|
+
let binary = ''
|
|
1067
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
1068
|
+
binary += String.fromCharCode(bytes[i])
|
|
1069
|
+
}
|
|
1070
|
+
const b64 = btoa(binary)
|
|
1071
|
+
|
|
1072
|
+
ws.send(JSON.stringify({
|
|
1073
|
+
type: 'input_audio_buffer.append',
|
|
1074
|
+
audio: b64,
|
|
1075
|
+
}))
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
source.connect(processor)
|
|
1079
|
+
processor.connect(audioCtx.destination)
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
function handleVoiceEvent(data) {
|
|
1083
|
+
const overlay = document.getElementById('voice-overlay')
|
|
1084
|
+
const statusEl = document.getElementById('voice-status')
|
|
1085
|
+
const transcriptEl = document.getElementById('voice-transcript')
|
|
1086
|
+
|
|
1087
|
+
switch (data.type) {
|
|
1088
|
+
case 'input_audio_buffer.speech_started':
|
|
1089
|
+
if (overlay) { overlay.classList.add('listening'); overlay.classList.remove('speaking') }
|
|
1090
|
+
if (statusEl) statusEl.textContent = 'Listening...'
|
|
1091
|
+
break
|
|
1092
|
+
|
|
1093
|
+
case 'input_audio_buffer.speech_stopped':
|
|
1094
|
+
if (statusEl) statusEl.textContent = 'Processing...'
|
|
1095
|
+
break
|
|
1096
|
+
|
|
1097
|
+
case 'conversation.item.input_audio_transcription.completed':
|
|
1098
|
+
if (transcriptEl && data.transcript) {
|
|
1099
|
+
transcriptEl.textContent = 'You: ' + data.transcript
|
|
1100
|
+
}
|
|
1101
|
+
break
|
|
1102
|
+
|
|
1103
|
+
case 'response.function_call_arguments.done':
|
|
1104
|
+
handleToolCall(data)
|
|
1105
|
+
break
|
|
1106
|
+
|
|
1107
|
+
case 'response.output_audio_transcript.delta':
|
|
1108
|
+
if (overlay) { overlay.classList.remove('listening'); overlay.classList.add('speaking') }
|
|
1109
|
+
if (statusEl) statusEl.textContent = 'Speaking...'
|
|
1110
|
+
if (transcriptEl) {
|
|
1111
|
+
const current = transcriptEl.textContent
|
|
1112
|
+
if (current.startsWith('You:') || current.startsWith('[Tool')) {
|
|
1113
|
+
transcriptEl.textContent = data.delta
|
|
1114
|
+
} else {
|
|
1115
|
+
transcriptEl.textContent += data.delta
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
break
|
|
1119
|
+
|
|
1120
|
+
case 'response.output_audio.delta':
|
|
1121
|
+
if (data.delta) {
|
|
1122
|
+
playAudioChunk(data.delta)
|
|
1123
|
+
}
|
|
1124
|
+
break
|
|
1125
|
+
|
|
1126
|
+
case 'response.done':
|
|
1127
|
+
if (overlay) { overlay.classList.add('listening'); overlay.classList.remove('speaking') }
|
|
1128
|
+
if (statusEl) statusEl.textContent = 'Listening...'
|
|
1129
|
+
break
|
|
1130
|
+
|
|
1131
|
+
case 'error':
|
|
1132
|
+
console.error('Voice API error:', data)
|
|
1133
|
+
if (statusEl) statusEl.textContent = 'Error: ' + (data.error?.message || 'Unknown')
|
|
1134
|
+
break
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
async function handleToolCall(event) {
|
|
1139
|
+
const ws = voiceState.ws
|
|
1140
|
+
if (!ws || ws.readyState !== WebSocket.OPEN) return
|
|
1141
|
+
|
|
1142
|
+
const toolName = event.name
|
|
1143
|
+
const callId = event.call_id
|
|
1144
|
+
const args = event.arguments
|
|
1145
|
+
|
|
1146
|
+
const statusEl = document.getElementById('voice-status')
|
|
1147
|
+
const transcriptEl = document.getElementById('voice-transcript')
|
|
1148
|
+
const overlay = document.getElementById('voice-overlay')
|
|
1149
|
+
|
|
1150
|
+
// Show tool execution in UI
|
|
1151
|
+
if (overlay) { overlay.classList.remove('listening', 'speaking') }
|
|
1152
|
+
if (statusEl) statusEl.textContent = 'Running tool: ' + toolName + '...'
|
|
1153
|
+
|
|
1154
|
+
// Parse args for display
|
|
1155
|
+
let argsObj = {}
|
|
1156
|
+
try { argsObj = JSON.parse(args) } catch {}
|
|
1157
|
+
const brief = toolName === 'bash' ? (argsObj.command || '').slice(0, 80) :
|
|
1158
|
+
toolName === 'read' ? argsObj.path || '' :
|
|
1159
|
+
toolName === 'write' ? argsObj.path || '' :
|
|
1160
|
+
toolName === 'edit' ? argsObj.path || '' : ''
|
|
1161
|
+
if (transcriptEl) transcriptEl.textContent = '[Tool: ' + toolName + '] ' + brief
|
|
1162
|
+
|
|
1163
|
+
try {
|
|
1164
|
+
// Execute tool via our backend
|
|
1165
|
+
const toolRes = await fetch('/voice/tool', {
|
|
1166
|
+
method: 'POST',
|
|
1167
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1168
|
+
body: JSON.stringify({ name: toolName, arguments: args }),
|
|
1169
|
+
})
|
|
1170
|
+
const toolData = await toolRes.json()
|
|
1171
|
+
const output = toolData.output || '(no output)'
|
|
1172
|
+
|
|
1173
|
+
// Show brief result
|
|
1174
|
+
const shortOutput = output.length > 120 ? output.slice(0, 120) + '...' : output
|
|
1175
|
+
if (transcriptEl) transcriptEl.textContent = '[Tool: ' + toolName + '] ' + shortOutput
|
|
1176
|
+
|
|
1177
|
+
// Send result back to voice agent
|
|
1178
|
+
ws.send(JSON.stringify({
|
|
1179
|
+
type: 'conversation.item.create',
|
|
1180
|
+
item: {
|
|
1181
|
+
type: 'function_call_output',
|
|
1182
|
+
call_id: callId,
|
|
1183
|
+
output: output,
|
|
1184
|
+
},
|
|
1185
|
+
}))
|
|
1186
|
+
|
|
1187
|
+
// Request the agent to continue
|
|
1188
|
+
ws.send(JSON.stringify({ type: 'response.create' }))
|
|
1189
|
+
|
|
1190
|
+
if (statusEl) statusEl.textContent = 'Processing...'
|
|
1191
|
+
} catch (err) {
|
|
1192
|
+
console.error('Tool execution error:', err)
|
|
1193
|
+
const errMsg = err.message || 'Tool execution failed'
|
|
1194
|
+
|
|
1195
|
+
// Send error back as tool output so the agent can handle it
|
|
1196
|
+
ws.send(JSON.stringify({
|
|
1197
|
+
type: 'conversation.item.create',
|
|
1198
|
+
item: {
|
|
1199
|
+
type: 'function_call_output',
|
|
1200
|
+
call_id: callId,
|
|
1201
|
+
output: 'Error: ' + errMsg,
|
|
1202
|
+
},
|
|
1203
|
+
}))
|
|
1204
|
+
ws.send(JSON.stringify({ type: 'response.create' }))
|
|
1205
|
+
|
|
1206
|
+
if (statusEl) statusEl.textContent = 'Listening...'
|
|
1207
|
+
if (overlay) overlay.classList.add('listening')
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
|
|
1211
|
+
function playAudioChunk(base64Audio) {
|
|
1212
|
+
if (!voiceState.audioCtx) return
|
|
1213
|
+
const ctx = voiceState.audioCtx
|
|
1214
|
+
|
|
1215
|
+
// Decode base64 to Int16 PCM
|
|
1216
|
+
const binaryStr = atob(base64Audio)
|
|
1217
|
+
const bytes = new Uint8Array(binaryStr.length)
|
|
1218
|
+
for (let i = 0; i < binaryStr.length; i++) {
|
|
1219
|
+
bytes[i] = binaryStr.charCodeAt(i)
|
|
1220
|
+
}
|
|
1221
|
+
const pcm16 = new Int16Array(bytes.buffer)
|
|
1222
|
+
|
|
1223
|
+
// Convert to Float32 for Web Audio
|
|
1224
|
+
const float32 = new Float32Array(pcm16.length)
|
|
1225
|
+
for (let i = 0; i < pcm16.length; i++) {
|
|
1226
|
+
float32[i] = pcm16[i] / 32768.0
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
// Create audio buffer and schedule playback
|
|
1230
|
+
const buffer = ctx.createBuffer(1, float32.length, SAMPLE_RATE)
|
|
1231
|
+
buffer.getChannelData(0).set(float32)
|
|
1232
|
+
|
|
1233
|
+
const source = ctx.createBufferSource()
|
|
1234
|
+
source.buffer = buffer
|
|
1235
|
+
source.connect(ctx.destination)
|
|
1236
|
+
|
|
1237
|
+
// Schedule seamless playback
|
|
1238
|
+
const now = ctx.currentTime
|
|
1239
|
+
const startTime = Math.max(now, voiceState.nextPlayTime)
|
|
1240
|
+
source.start(startTime)
|
|
1241
|
+
voiceState.nextPlayTime = startTime + buffer.duration
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1244
|
+
function stopVoice() {
|
|
1245
|
+
voiceState.active = false
|
|
1246
|
+
|
|
1247
|
+
// Close WebSocket
|
|
1248
|
+
if (voiceState.ws) {
|
|
1249
|
+
try { voiceState.ws.close() } catch {}
|
|
1250
|
+
voiceState.ws = null
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
// Stop microphone
|
|
1254
|
+
if (voiceState.micStream) {
|
|
1255
|
+
voiceState.micStream.getTracks().forEach(t => t.stop())
|
|
1256
|
+
voiceState.micStream = null
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1259
|
+
// Disconnect audio processor
|
|
1260
|
+
if (voiceState.scriptProcessor) {
|
|
1261
|
+
try { voiceState.scriptProcessor.disconnect() } catch {}
|
|
1262
|
+
voiceState.scriptProcessor = null
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
// Close audio context
|
|
1266
|
+
if (voiceState.audioCtx) {
|
|
1267
|
+
try { voiceState.audioCtx.close() } catch {}
|
|
1268
|
+
voiceState.audioCtx = null
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
voiceState.nextPlayTime = 0
|
|
1272
|
+
|
|
1273
|
+
// Reset UI
|
|
1274
|
+
const btn = document.getElementById('voice-btn')
|
|
1275
|
+
if (btn) {
|
|
1276
|
+
btn.classList.remove('active', 'connecting')
|
|
1277
|
+
btn.querySelector('.voice-icon-mic').style.display = 'block'
|
|
1278
|
+
btn.querySelector('.voice-icon-stop').style.display = 'none'
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
const overlay = document.getElementById('voice-overlay')
|
|
1282
|
+
if (overlay) {
|
|
1283
|
+
overlay.style.display = 'none'
|
|
1284
|
+
overlay.classList.remove('listening', 'speaking')
|
|
1285
|
+
}
|
|
1286
|
+
}
|
|
682
1287
|
</script>
|
|
683
1288
|
</body>
|
|
684
1289
|
</html>`;
|
|
@@ -1180,7 +1785,8 @@ function renderCronRequestsPage(requests) {
|
|
|
1180
1785
|
}
|
|
1181
1786
|
// ── Express app ───────────────────────────────────────────────────────────────
|
|
1182
1787
|
export async function appCoworkerChatWeb(options) {
|
|
1183
|
-
const { url: agentUrl, password, host, port: portStr } = options;
|
|
1788
|
+
const { url: agentUrl, password, host, port: portStr, xaiKey } = options;
|
|
1789
|
+
const voiceEnabled = !!xaiKey;
|
|
1184
1790
|
const port = parseInt(portStr, 10);
|
|
1185
1791
|
if (isNaN(port) || port < 1 || port > 65535) {
|
|
1186
1792
|
console.error(`Error: invalid port "${portStr}"`);
|
|
@@ -1203,6 +1809,9 @@ export async function appCoworkerChatWeb(options) {
|
|
|
1203
1809
|
console.error("Check that agent-office serve is running and --password is correct.");
|
|
1204
1810
|
}
|
|
1205
1811
|
console.log(`Communicator: chatting as "${humanName}"`);
|
|
1812
|
+
if (voiceEnabled) {
|
|
1813
|
+
console.log(`Voice chat enabled (xAI API key configured)`);
|
|
1814
|
+
}
|
|
1206
1815
|
const app = express();
|
|
1207
1816
|
app.use(express.urlencoded({ extended: false }));
|
|
1208
1817
|
app.use(express.json());
|
|
@@ -1294,6 +1903,198 @@ export async function appCoworkerChatWeb(options) {
|
|
|
1294
1903
|
res.send(`<span style="color:var(--red)">✗ Rejection failed: ${escapeHtml(msg)}</span>`);
|
|
1295
1904
|
}
|
|
1296
1905
|
});
|
|
1906
|
+
// ── GET /voice/config — whether voice is enabled ──────────────────────────
|
|
1907
|
+
app.get("/voice/config", (_req, res) => {
|
|
1908
|
+
res.json({ enabled: voiceEnabled });
|
|
1909
|
+
});
|
|
1910
|
+
// ── POST /voice/session — fetch ephemeral token from xAI ─────────────────
|
|
1911
|
+
app.post("/voice/session", async (req, res) => {
|
|
1912
|
+
if (!voiceEnabled || !xaiKey) {
|
|
1913
|
+
res.status(403).json({ error: "Voice is not enabled" });
|
|
1914
|
+
return;
|
|
1915
|
+
}
|
|
1916
|
+
const { coworker } = req.body;
|
|
1917
|
+
if (!coworker) {
|
|
1918
|
+
res.status(400).json({ error: "coworker is required" });
|
|
1919
|
+
return;
|
|
1920
|
+
}
|
|
1921
|
+
try {
|
|
1922
|
+
// Fetch the coworker's status for context
|
|
1923
|
+
const status = await fetchCoworkerStatus(agentUrl, password, coworker);
|
|
1924
|
+
// Get ephemeral token from xAI
|
|
1925
|
+
const tokenRes = await fetch("https://api.x.ai/v1/realtime/client_secrets", {
|
|
1926
|
+
method: "POST",
|
|
1927
|
+
headers: {
|
|
1928
|
+
"Authorization": `Bearer ${xaiKey}`,
|
|
1929
|
+
"Content-Type": "application/json",
|
|
1930
|
+
},
|
|
1931
|
+
body: JSON.stringify({ expires_after: { seconds: 300 } }),
|
|
1932
|
+
});
|
|
1933
|
+
if (!tokenRes.ok) {
|
|
1934
|
+
const errBody = await tokenRes.json().catch(() => ({}));
|
|
1935
|
+
res.status(502).json({ error: `xAI API error: ${errBody.error ?? `HTTP ${tokenRes.status}`}` });
|
|
1936
|
+
return;
|
|
1937
|
+
}
|
|
1938
|
+
// xAI returns { value: string, expires_at: number } at the top level
|
|
1939
|
+
const tokenData = await tokenRes.json();
|
|
1940
|
+
const token = tokenData.value;
|
|
1941
|
+
if (!token) {
|
|
1942
|
+
console.error("Voice session: unexpected xAI response shape:", JSON.stringify(tokenData));
|
|
1943
|
+
res.status(502).json({ error: "No ephemeral token in xAI response" });
|
|
1944
|
+
return;
|
|
1945
|
+
}
|
|
1946
|
+
// Build voice instructions based on the coworker
|
|
1947
|
+
const instructions = [
|
|
1948
|
+
`You are ${escapeHtml(coworker)}, an AI coworker in the agent office.`,
|
|
1949
|
+
status ? `Your current status is: "${status}".` : "",
|
|
1950
|
+
`You are having a voice conversation with your human manager ${humanName}.`,
|
|
1951
|
+
`Be helpful, collaborative, and keep your responses concise since this is a voice conversation.`,
|
|
1952
|
+
`You can discuss work, answer questions, and collaborate on tasks.`,
|
|
1953
|
+
``,
|
|
1954
|
+
`You have access to the agent-office CLI tool which can:`,
|
|
1955
|
+
`- Create and manage AI coworker sessions`,
|
|
1956
|
+
`- Send messages between coworkers`,
|
|
1957
|
+
`- Set status messages for visibility`,
|
|
1958
|
+
`- Schedule cron jobs for recurring tasks`,
|
|
1959
|
+
`- Run a web chat interface for human interaction`,
|
|
1960
|
+
`- Manage task boards with kanban-style workflows`,
|
|
1961
|
+
`- Send email notifications for unread messages`,
|
|
1962
|
+
``,
|
|
1963
|
+
`You have access to coding tools that you can use when the human asks you to look at, create, or modify files, or run commands:`,
|
|
1964
|
+
`- read: Read a file from the filesystem. Use this to examine source code, config files, etc.`,
|
|
1965
|
+
`- write: Write content to a file, creating or overwriting it.`,
|
|
1966
|
+
`- edit: Edit a file by finding and replacing an exact string.`,
|
|
1967
|
+
`- bash: Execute a shell command and get the output.`,
|
|
1968
|
+
``,
|
|
1969
|
+
`When using tools, briefly tell the human what you're doing before calling the tool.`,
|
|
1970
|
+
`After getting tool results, summarize the key information verbally rather than reading everything.`,
|
|
1971
|
+
`The working directory is: ${process.cwd()}`,
|
|
1972
|
+
].filter(Boolean).join("\n");
|
|
1973
|
+
res.json({
|
|
1974
|
+
token,
|
|
1975
|
+
instructions,
|
|
1976
|
+
coworker,
|
|
1977
|
+
});
|
|
1978
|
+
}
|
|
1979
|
+
catch (err) {
|
|
1980
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1981
|
+
res.status(502).json({ error: `Failed to create voice session: ${msg}` });
|
|
1982
|
+
}
|
|
1983
|
+
});
|
|
1984
|
+
// ── POST /voice/tool — execute a tool call server-side ────────────────────
|
|
1985
|
+
app.post("/voice/tool", async (req, res) => {
|
|
1986
|
+
const { name, arguments: argsStr } = req.body;
|
|
1987
|
+
if (!name || typeof name !== "string") {
|
|
1988
|
+
res.status(400).json({ error: "name is required" });
|
|
1989
|
+
return;
|
|
1990
|
+
}
|
|
1991
|
+
let args;
|
|
1992
|
+
try {
|
|
1993
|
+
args = typeof argsStr === "string" ? JSON.parse(argsStr) : (argsStr ?? {});
|
|
1994
|
+
}
|
|
1995
|
+
catch {
|
|
1996
|
+
res.status(400).json({ error: "Invalid arguments JSON" });
|
|
1997
|
+
return;
|
|
1998
|
+
}
|
|
1999
|
+
try {
|
|
2000
|
+
let result;
|
|
2001
|
+
switch (name) {
|
|
2002
|
+
case "read": {
|
|
2003
|
+
const filePath = String(args.path ?? "");
|
|
2004
|
+
if (!filePath) {
|
|
2005
|
+
res.json({ output: "Error: path is required" });
|
|
2006
|
+
return;
|
|
2007
|
+
}
|
|
2008
|
+
const content = await readFile(filePath, "utf-8");
|
|
2009
|
+
const lines = content.split("\n");
|
|
2010
|
+
const offset = Math.max(1, Number(args.offset) || 1);
|
|
2011
|
+
const limit = Math.min(2000, Number(args.limit) || 200);
|
|
2012
|
+
const sliced = lines.slice(offset - 1, offset - 1 + limit);
|
|
2013
|
+
result = sliced.map((line, i) => `${offset + i}: ${line}`).join("\n");
|
|
2014
|
+
if (lines.length > offset - 1 + limit) {
|
|
2015
|
+
result += `\n... (${lines.length} total lines)`;
|
|
2016
|
+
}
|
|
2017
|
+
break;
|
|
2018
|
+
}
|
|
2019
|
+
case "write": {
|
|
2020
|
+
const filePath = String(args.path ?? "");
|
|
2021
|
+
const content = String(args.content ?? "");
|
|
2022
|
+
if (!filePath) {
|
|
2023
|
+
res.json({ output: "Error: path is required" });
|
|
2024
|
+
return;
|
|
2025
|
+
}
|
|
2026
|
+
await mkdir(dirname(filePath), { recursive: true });
|
|
2027
|
+
await writeFile(filePath, content, "utf-8");
|
|
2028
|
+
result = `Written ${content.length} bytes to ${filePath}`;
|
|
2029
|
+
break;
|
|
2030
|
+
}
|
|
2031
|
+
case "edit": {
|
|
2032
|
+
const filePath = String(args.path ?? "");
|
|
2033
|
+
const oldStr = String(args.oldText ?? "");
|
|
2034
|
+
const newStr = String(args.newText ?? "");
|
|
2035
|
+
if (!filePath) {
|
|
2036
|
+
res.json({ output: "Error: path is required" });
|
|
2037
|
+
return;
|
|
2038
|
+
}
|
|
2039
|
+
if (!oldStr) {
|
|
2040
|
+
res.json({ output: "Error: oldText is required" });
|
|
2041
|
+
return;
|
|
2042
|
+
}
|
|
2043
|
+
const fileContent = await readFile(filePath, "utf-8");
|
|
2044
|
+
const idx = fileContent.indexOf(oldStr);
|
|
2045
|
+
if (idx === -1) {
|
|
2046
|
+
result = "Error: oldText not found in file";
|
|
2047
|
+
}
|
|
2048
|
+
else if (fileContent.indexOf(oldStr, idx + 1) !== -1) {
|
|
2049
|
+
result = "Error: oldText found multiple times. Provide more context to make it unique.";
|
|
2050
|
+
}
|
|
2051
|
+
else {
|
|
2052
|
+
const edited = fileContent.slice(0, idx) + newStr + fileContent.slice(idx + oldStr.length);
|
|
2053
|
+
await writeFile(filePath, edited, "utf-8");
|
|
2054
|
+
result = `Edit applied to ${filePath}`;
|
|
2055
|
+
}
|
|
2056
|
+
break;
|
|
2057
|
+
}
|
|
2058
|
+
case "bash": {
|
|
2059
|
+
const command = String(args.command ?? "");
|
|
2060
|
+
if (!command) {
|
|
2061
|
+
res.json({ output: "Error: command is required" });
|
|
2062
|
+
return;
|
|
2063
|
+
}
|
|
2064
|
+
const timeoutSec = Math.min(120, Number(args.timeout) || 30);
|
|
2065
|
+
const timeout = timeoutSec * 1000;
|
|
2066
|
+
result = await new Promise((resolve) => {
|
|
2067
|
+
exec(command, { timeout, maxBuffer: 1024 * 1024, cwd: process.cwd() }, (err, stdout, stderr) => {
|
|
2068
|
+
const out = (stdout || "").trim();
|
|
2069
|
+
const errOut = (stderr || "").trim();
|
|
2070
|
+
if (err && err.killed) {
|
|
2071
|
+
resolve(`Command timed out after ${timeout}ms`);
|
|
2072
|
+
}
|
|
2073
|
+
else if (err) {
|
|
2074
|
+
resolve(`Exit code ${err.code ?? 1}\n${errOut}\n${out}`.trim());
|
|
2075
|
+
}
|
|
2076
|
+
else {
|
|
2077
|
+
const combined = errOut ? `${out}\n${errOut}` : out;
|
|
2078
|
+
resolve(combined || "(no output)");
|
|
2079
|
+
}
|
|
2080
|
+
});
|
|
2081
|
+
});
|
|
2082
|
+
// Truncate very long output for the voice context
|
|
2083
|
+
if (result.length > 4000) {
|
|
2084
|
+
result = result.slice(0, 4000) + "\n... (output truncated)";
|
|
2085
|
+
}
|
|
2086
|
+
break;
|
|
2087
|
+
}
|
|
2088
|
+
default:
|
|
2089
|
+
result = `Unknown tool: ${name}`;
|
|
2090
|
+
}
|
|
2091
|
+
res.json({ output: result });
|
|
2092
|
+
}
|
|
2093
|
+
catch (err) {
|
|
2094
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
2095
|
+
res.json({ output: `Error: ${msg}` });
|
|
2096
|
+
}
|
|
2097
|
+
});
|
|
1297
2098
|
// ── GET / — full page ────────────────────────────────────────────────────
|
|
1298
2099
|
app.get("/", async (req, res) => {
|
|
1299
2100
|
try {
|