agent-office 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,7 @@
1
1
  import express from "express";
2
+ import { exec } from "child_process";
3
+ import { readFile, writeFile, mkdir } from "fs/promises";
4
+ import { dirname } from "path";
2
5
  // ── API helpers ───────────────────────────────────────────────────────────────
3
6
  async function apiFetch(agentUrl, password, path, init = {}) {
4
7
  const res = await fetch(`${agentUrl}${path}`, {
@@ -43,6 +46,22 @@ async function fetchMessages(agentUrl, password, humanName, coworker) {
43
46
  async function markRead(agentUrl, password, id) {
44
47
  await apiFetch(agentUrl, password, `/messages/${id}/read`, { method: "POST" });
45
48
  }
49
+ async function fetchCronRequests(agentUrl, password, status) {
50
+ const params = status ? `?status=${encodeURIComponent(status)}` : "";
51
+ return apiFetch(agentUrl, password, `/cron-requests${params}`);
52
+ }
53
+ async function approveCronRequest(agentUrl, password, id, notes) {
54
+ await apiFetch(agentUrl, password, `/cron-requests/${id}/approve`, {
55
+ method: "POST",
56
+ body: JSON.stringify({ notes })
57
+ });
58
+ }
59
+ async function rejectCronRequest(agentUrl, password, id, notes) {
60
+ await apiFetch(agentUrl, password, `/cron-requests/${id}/reject`, {
61
+ method: "POST",
62
+ body: JSON.stringify({ notes })
63
+ });
64
+ }
46
65
  // ── HTML helpers ──────────────────────────────────────────────────────────────
47
66
  function escapeHtml(str) {
48
67
  return str
@@ -58,6 +77,15 @@ function formatTime(iso) {
58
77
  hour: "2-digit", minute: "2-digit",
59
78
  });
60
79
  }
80
+ function formatFullTime(iso) {
81
+ return new Date(iso).toLocaleString(undefined, {
82
+ year: "numeric",
83
+ month: "short",
84
+ day: "numeric",
85
+ hour: "2-digit",
86
+ minute: "2-digit",
87
+ });
88
+ }
61
89
  function renderMessage(msg, humanName, spacingClass) {
62
90
  const isMine = msg.from_name === humanName;
63
91
  const bubbleClass = isMine ? "bubble bubble-mine" : "bubble bubble-theirs";
@@ -227,6 +255,16 @@ function renderPage(coworker, coworkers, msgs, humanName) {
227
255
  .refresh-indicator.active { background: var(--green); }
228
256
 
229
257
  /* ── Reset button ── */
258
+ .header-link {
259
+ color: var(--text-dim);
260
+ text-decoration: none;
261
+ font-size: 18px;
262
+ margin-right: 8px;
263
+ transition: color 0.15s;
264
+ flex-shrink: 0;
265
+ }
266
+ .header-link:hover { color: var(--accent); }
267
+
230
268
  .reset-btn {
231
269
  background: none;
232
270
  border: 1px solid var(--border);
@@ -454,6 +492,122 @@ function renderPage(coworker, coworkers, msgs, humanName) {
454
492
 
455
493
  /* ── HTMX request indicator ── */
456
494
  .htmx-request .send-btn { background: var(--accent-dim); }
495
+
496
+ /* ── Voice button ── */
497
+ .voice-btn {
498
+ width: 36px; height: 36px;
499
+ border-radius: 50%;
500
+ background: var(--surface2);
501
+ border: 1px solid var(--border);
502
+ color: var(--text-dim);
503
+ cursor: pointer;
504
+ display: flex;
505
+ align-items: center;
506
+ justify-content: center;
507
+ flex-shrink: 0;
508
+ transition: all 0.2s;
509
+ padding: 0;
510
+ }
511
+ .voice-btn:hover { border-color: var(--accent); color: var(--accent); }
512
+ .voice-btn:disabled { opacity: 0.4; cursor: not-allowed; }
513
+ .voice-btn svg { width: 18px; height: 18px; }
514
+ .voice-btn.active {
515
+ background: var(--red);
516
+ border-color: var(--red);
517
+ color: #fff;
518
+ animation: voice-pulse 1.5s ease-in-out infinite;
519
+ }
520
+ .voice-btn.connecting {
521
+ background: var(--accent-dim);
522
+ border-color: var(--accent);
523
+ color: var(--accent);
524
+ animation: voice-pulse 0.8s ease-in-out infinite;
525
+ }
526
+ @keyframes voice-pulse {
527
+ 0%, 100% { box-shadow: 0 0 0 0 rgba(255, 107, 107, 0.4); }
528
+ 50% { box-shadow: 0 0 0 8px rgba(255, 107, 107, 0); }
529
+ }
530
+
531
+ /* ── Voice overlay ── */
532
+ .voice-overlay {
533
+ position: absolute;
534
+ top: var(--header-h);
535
+ left: 0; right: 0; bottom: 0;
536
+ background: rgba(15, 17, 23, 0.95);
537
+ z-index: 50;
538
+ display: flex;
539
+ align-items: center;
540
+ justify-content: center;
541
+ backdrop-filter: blur(8px);
542
+ }
543
+ .voice-overlay-content {
544
+ display: flex;
545
+ flex-direction: column;
546
+ align-items: center;
547
+ gap: 24px;
548
+ padding: 32px;
549
+ }
550
+ .voice-visualizer {
551
+ position: relative;
552
+ width: 120px; height: 120px;
553
+ display: flex;
554
+ align-items: center;
555
+ justify-content: center;
556
+ }
557
+ .voice-ring {
558
+ position: absolute;
559
+ width: 100%; height: 100%;
560
+ border-radius: 50%;
561
+ border: 2px solid var(--accent);
562
+ opacity: 0.3;
563
+ animation: voice-ring-pulse 2s ease-in-out infinite;
564
+ }
565
+ .voice-ring-2 { animation-delay: 0.4s; width: 140%; height: 140%; top: -20%; left: -20%; opacity: 0.15; }
566
+ .voice-ring-3 { animation-delay: 0.8s; width: 180%; height: 180%; top: -40%; left: -40%; opacity: 0.08; }
567
+ .voice-overlay.speaking .voice-ring { border-color: var(--green); }
568
+ .voice-overlay.listening .voice-ring { border-color: var(--accent); }
569
+ @keyframes voice-ring-pulse {
570
+ 0%, 100% { transform: scale(1); opacity: 0.3; }
571
+ 50% { transform: scale(1.1); opacity: 0.1; }
572
+ }
573
+ .voice-avatar {
574
+ width: 64px; height: 64px;
575
+ border-radius: 50%;
576
+ background: var(--accent-dim);
577
+ color: var(--accent);
578
+ display: flex;
579
+ align-items: center;
580
+ justify-content: center;
581
+ font-weight: 700;
582
+ font-size: 24px;
583
+ z-index: 1;
584
+ }
585
+ .voice-status {
586
+ font-size: 16px;
587
+ color: var(--text);
588
+ font-weight: 500;
589
+ }
590
+ .voice-transcript {
591
+ font-size: 14px;
592
+ color: var(--text-dim);
593
+ text-align: center;
594
+ max-width: 400px;
595
+ min-height: 40px;
596
+ line-height: 1.4;
597
+ }
598
+ .voice-end-btn {
599
+ background: var(--red);
600
+ border: none;
601
+ border-radius: 22px;
602
+ color: #fff;
603
+ cursor: pointer;
604
+ font-size: 14px;
605
+ font-weight: 600;
606
+ padding: 10px 24px;
607
+ transition: background 0.15s, transform 0.1s;
608
+ }
609
+ .voice-end-btn:hover { background: #ff8888; }
610
+ .voice-end-btn:active { transform: scale(0.95); }
457
611
  </style>
458
612
  </head>
459
613
  <body>
@@ -475,6 +629,24 @@ function renderPage(coworker, coworkers, msgs, humanName) {
475
629
  hx-trigger="load, every 5s"
476
630
  hx-swap="innerHTML"></div>
477
631
  </div>
632
+ <a href="/cron-requests" class="header-link" title="Manage cron job requests">⚙️</a>
633
+ <button class="voice-btn" id="voice-btn"
634
+ onclick="toggleVoice()"
635
+ title="Voice chat"
636
+ style="display:none"
637
+ ${!selected ? 'disabled' : ''}>
638
+ <svg class="voice-icon-mic" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
639
+ <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
640
+ <path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
641
+ <line x1="12" y1="19" x2="12" y2="23"></line>
642
+ <line x1="8" y1="23" x2="16" y2="23"></line>
643
+ </svg>
644
+ <div class="voice-icon-stop" style="display:none">
645
+ <svg viewBox="0 0 24 24" fill="currentColor">
646
+ <rect x="6" y="6" width="12" height="12" rx="2"></rect>
647
+ </svg>
648
+ </div>
649
+ </button>
478
650
  <button class="reset-btn"
479
651
  hx-post="/reset?coworker=${encodeURIComponent(selected)}"
480
652
  hx-target="#reset-status"
@@ -495,6 +667,21 @@ function renderPage(coworker, coworkers, msgs, humanName) {
495
667
 
496
668
  <div id="reset-status"></div>
497
669
 
670
+ <!-- Voice overlay -->
671
+ <div id="voice-overlay" class="voice-overlay" style="display:none">
672
+ <div class="voice-overlay-content">
673
+ <div class="voice-visualizer" id="voice-visualizer">
674
+ <div class="voice-ring"></div>
675
+ <div class="voice-ring voice-ring-2"></div>
676
+ <div class="voice-ring voice-ring-3"></div>
677
+ <div class="voice-avatar" id="voice-avatar">?</div>
678
+ </div>
679
+ <div class="voice-status" id="voice-status">Connecting...</div>
680
+ <div class="voice-transcript" id="voice-transcript"></div>
681
+ <button class="voice-end-btn" onclick="toggleVoice()">End Voice Chat</button>
682
+ </div>
683
+ </div>
684
+
498
685
  <!-- Messages -->
499
686
  <div class="messages-outer" id="messages-outer">
500
687
  <div id="messages"
@@ -604,9 +791,10 @@ function renderPage(coworker, coworkers, msgs, humanName) {
604
791
  // Initial scroll
605
792
  scrollToBottom()
606
793
 
607
- // Switch to a different coworker
794
+ // Switch to a different coworker — stop any active voice session first
608
795
  function switchCoworker(name) {
609
796
  if (!name) return
797
+ if (voiceState.active) stopVoice()
610
798
  const url = new URL(window.location.href)
611
799
  url.searchParams.set('coworker', name)
612
800
  window.location.href = url.toString()
@@ -643,13 +831,962 @@ function renderPage(coworker, coworkers, msgs, humanName) {
643
831
  document.addEventListener('htmx:afterSwap', () => {
644
832
  renderMarkdown()
645
833
  })
834
+
835
+ // ── Voice Chat ─────────────────────────────────────────────────────────────
836
+ const SAMPLE_RATE = 24000
837
+
838
+ const voiceState = {
839
+ active: false,
840
+ ws: null,
841
+ audioCtx: null,
842
+ micStream: null,
843
+ scriptProcessor: null,
844
+ playbackQueue: [],
845
+ isPlaying: false,
846
+ nextPlayTime: 0,
847
+ }
848
+
849
+ // Check if voice is enabled and show button
850
+ fetch('/voice/config').then(r => r.json()).then(cfg => {
851
+ if (cfg.enabled) {
852
+ const btn = document.getElementById('voice-btn')
853
+ if (btn) btn.style.display = 'flex'
854
+ }
855
+ }).catch(() => {})
856
+
857
+ function toggleVoice() {
858
+ if (voiceState.active) {
859
+ stopVoice()
860
+ } else {
861
+ startVoice()
862
+ }
863
+ }
864
+
865
+ async function startVoice() {
866
+ const select = document.getElementById('coworker-select')
867
+ const coworker = select ? select.value : ''
868
+ if (!coworker) return
869
+
870
+ const btn = document.getElementById('voice-btn')
871
+ const overlay = document.getElementById('voice-overlay')
872
+ const statusEl = document.getElementById('voice-status')
873
+ const transcriptEl = document.getElementById('voice-transcript')
874
+ const avatarEl = document.getElementById('voice-avatar')
875
+
876
+ // Update UI to connecting state
877
+ btn.classList.add('connecting')
878
+ btn.querySelector('.voice-icon-mic').style.display = 'none'
879
+ btn.querySelector('.voice-icon-stop').style.display = 'flex'
880
+ overlay.style.display = 'flex'
881
+ statusEl.textContent = 'Connecting...'
882
+ transcriptEl.textContent = ''
883
+ avatarEl.textContent = coworker.charAt(0).toUpperCase()
884
+
885
+ try {
886
+ // Request ephemeral token from our backend
887
+ const sessRes = await fetch('/voice/session', {
888
+ method: 'POST',
889
+ headers: { 'Content-Type': 'application/json' },
890
+ body: JSON.stringify({ coworker }),
891
+ })
892
+ if (!sessRes.ok) {
893
+ const err = await sessRes.json().catch(() => ({}))
894
+ throw new Error(err.error || 'Failed to create voice session')
895
+ }
896
+ const sessData = await sessRes.json()
897
+ const token = sessData.token
898
+ const instructions = sessData.instructions
899
+
900
+ if (!token) throw new Error('No ephemeral token received')
901
+
902
+ // Request microphone access
903
+ const micStream = await navigator.mediaDevices.getUserMedia({ audio: {
904
+ sampleRate: SAMPLE_RATE,
905
+ channelCount: 1,
906
+ echoCancellation: true,
907
+ noiseSuppression: true,
908
+ autoGainControl: true,
909
+ }})
910
+ voiceState.micStream = micStream
911
+
912
+ // Create audio context for playback
913
+ const audioCtx = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: SAMPLE_RATE })
914
+ voiceState.audioCtx = audioCtx
915
+ voiceState.nextPlayTime = 0
916
+
917
+ // Connect WebSocket to xAI realtime API using subprotocol for auth
918
+ const ws = new WebSocket('wss://api.x.ai/v1/realtime', [
919
+ 'xai-client-secret.' + token
920
+ ])
921
+ voiceState.ws = ws
922
+
923
+ ws.onopen = () => {
924
+ voiceState.active = true
925
+ btn.classList.remove('connecting')
926
+ btn.classList.add('active')
927
+ statusEl.textContent = 'Listening...'
928
+ overlay.classList.add('listening')
929
+ overlay.classList.remove('speaking')
930
+
931
+ // Configure session with tools
932
+ ws.send(JSON.stringify({
933
+ type: 'session.update',
934
+ session: {
935
+ voice: 'Ara',
936
+ instructions: instructions,
937
+ turn_detection: { type: 'server_vad' },
938
+ audio: {
939
+ input: { format: { type: 'audio/pcm', rate: SAMPLE_RATE } },
940
+ output: { format: { type: 'audio/pcm', rate: SAMPLE_RATE } },
941
+ },
942
+ tools: [
943
+ {
944
+ type: 'function',
945
+ name: 'read',
946
+ description: 'Read a file from the filesystem. Returns the file contents. Use this to examine source code, config files, or any text file.',
947
+ parameters: {
948
+ type: 'object',
949
+ properties: {
950
+ path: { type: 'string', description: 'Absolute or relative file path to read' },
951
+ offset: { type: 'number', description: 'Line number to start reading from (1-indexed). Optional.' },
952
+ limit: { type: 'number', description: 'Maximum number of lines to read. Optional, defaults to 200.' },
953
+ },
954
+ required: ['path'],
955
+ },
956
+ },
957
+ {
958
+ type: 'function',
959
+ name: 'write',
960
+ description: 'Write content to a file, creating it if it does not exist or overwriting if it does. Use this to create new files.',
961
+ parameters: {
962
+ type: 'object',
963
+ properties: {
964
+ path: { type: 'string', description: 'Absolute or relative file path to write' },
965
+ content: { type: 'string', description: 'The full content to write to the file' },
966
+ },
967
+ required: ['path', 'content'],
968
+ },
969
+ },
970
+ {
971
+ type: 'function',
972
+ name: 'edit',
973
+ description: 'Edit a file by replacing an exact string match with new content. The oldString must match exactly (including whitespace and indentation).',
974
+ parameters: {
975
+ type: 'object',
976
+ properties: {
977
+ path: { type: 'string', description: 'Absolute or relative file path to edit' },
978
+ oldText: { type: 'string', description: 'The exact text to find and replace' },
979
+ newText: { type: 'string', description: 'The replacement text' },
980
+ },
981
+ required: ['path', 'oldText', 'newText'],
982
+ },
983
+ },
984
+ {
985
+ type: 'function',
986
+ name: 'bash',
987
+ description: 'Execute a bash command and return its output. Use for running scripts, git commands, build tools, listing files, searching, etc.',
988
+ parameters: {
989
+ type: 'object',
990
+ properties: {
991
+ command: { type: 'string', description: 'The bash command to execute' },
992
+ timeout: { type: 'number', description: 'Timeout in seconds. Optional, defaults to 30.' },
993
+ },
994
+ required: ['command'],
995
+ },
996
+ },
997
+ ],
998
+ },
999
+ }))
1000
+
1001
+ // Start streaming microphone audio
1002
+ startMicStreaming(ws, micStream, audioCtx)
1003
+ }
1004
+
1005
+ ws.onmessage = (event) => {
1006
+ const data = JSON.parse(event.data)
1007
+ handleVoiceEvent(data)
1008
+ }
1009
+
1010
+ ws.onerror = () => {
1011
+ statusEl.textContent = 'Connection error'
1012
+ setTimeout(() => stopVoice(), 2000)
1013
+ }
1014
+
1015
+ ws.onclose = () => {
1016
+ if (voiceState.active) {
1017
+ stopVoice()
1018
+ }
1019
+ }
1020
+
1021
+ } catch (err) {
1022
+ console.error('Voice start error:', err)
1023
+ const statusEl = document.getElementById('voice-status')
1024
+ if (statusEl) statusEl.textContent = 'Error: ' + (err.message || 'Unknown error')
1025
+ setTimeout(() => stopVoice(), 2500)
1026
+ }
1027
+ }
1028
+
1029
+ function startMicStreaming(ws, micStream, audioCtx) {
1030
+ const source = audioCtx.createMediaStreamSource(micStream)
1031
+ // Use ScriptProcessorNode for broad compatibility (including mobile)
1032
+ const bufSize = 4096
1033
+ const processor = audioCtx.createScriptProcessor(bufSize, 1, 1)
1034
+ voiceState.scriptProcessor = processor
1035
+
1036
+ processor.onaudioprocess = (e) => {
1037
+ if (!voiceState.active || ws.readyState !== WebSocket.OPEN) return
1038
+ const inputData = e.inputBuffer.getChannelData(0)
1039
+
1040
+ // Resample if audioCtx sample rate differs from target
1041
+ let pcmFloat
1042
+ if (audioCtx.sampleRate !== SAMPLE_RATE) {
1043
+ const ratio = SAMPLE_RATE / audioCtx.sampleRate
1044
+ const newLen = Math.round(inputData.length * ratio)
1045
+ pcmFloat = new Float32Array(newLen)
1046
+ for (let i = 0; i < newLen; i++) {
1047
+ const srcIdx = i / ratio
1048
+ const lo = Math.floor(srcIdx)
1049
+ const hi = Math.min(lo + 1, inputData.length - 1)
1050
+ const frac = srcIdx - lo
1051
+ pcmFloat[i] = inputData[lo] * (1 - frac) + inputData[hi] * frac
1052
+ }
1053
+ } else {
1054
+ pcmFloat = inputData
1055
+ }
1056
+
1057
+ // Convert Float32 to Int16 PCM
1058
+ const pcm16 = new Int16Array(pcmFloat.length)
1059
+ for (let i = 0; i < pcmFloat.length; i++) {
1060
+ const s = Math.max(-1, Math.min(1, pcmFloat[i]))
1061
+ pcm16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF
1062
+ }
1063
+
1064
+ // Base64 encode
1065
+ const bytes = new Uint8Array(pcm16.buffer)
1066
+ let binary = ''
1067
+ for (let i = 0; i < bytes.length; i++) {
1068
+ binary += String.fromCharCode(bytes[i])
1069
+ }
1070
+ const b64 = btoa(binary)
1071
+
1072
+ ws.send(JSON.stringify({
1073
+ type: 'input_audio_buffer.append',
1074
+ audio: b64,
1075
+ }))
1076
+ }
1077
+
1078
+ source.connect(processor)
1079
+ processor.connect(audioCtx.destination)
1080
+ }
1081
+
1082
+ function handleVoiceEvent(data) {
1083
+ const overlay = document.getElementById('voice-overlay')
1084
+ const statusEl = document.getElementById('voice-status')
1085
+ const transcriptEl = document.getElementById('voice-transcript')
1086
+
1087
+ switch (data.type) {
1088
+ case 'input_audio_buffer.speech_started':
1089
+ if (overlay) { overlay.classList.add('listening'); overlay.classList.remove('speaking') }
1090
+ if (statusEl) statusEl.textContent = 'Listening...'
1091
+ break
1092
+
1093
+ case 'input_audio_buffer.speech_stopped':
1094
+ if (statusEl) statusEl.textContent = 'Processing...'
1095
+ break
1096
+
1097
+ case 'conversation.item.input_audio_transcription.completed':
1098
+ if (transcriptEl && data.transcript) {
1099
+ transcriptEl.textContent = 'You: ' + data.transcript
1100
+ }
1101
+ break
1102
+
1103
+ case 'response.function_call_arguments.done':
1104
+ handleToolCall(data)
1105
+ break
1106
+
1107
+ case 'response.output_audio_transcript.delta':
1108
+ if (overlay) { overlay.classList.remove('listening'); overlay.classList.add('speaking') }
1109
+ if (statusEl) statusEl.textContent = 'Speaking...'
1110
+ if (transcriptEl) {
1111
+ const current = transcriptEl.textContent
1112
+ if (current.startsWith('You:') || current.startsWith('[Tool')) {
1113
+ transcriptEl.textContent = data.delta
1114
+ } else {
1115
+ transcriptEl.textContent += data.delta
1116
+ }
1117
+ }
1118
+ break
1119
+
1120
+ case 'response.output_audio.delta':
1121
+ if (data.delta) {
1122
+ playAudioChunk(data.delta)
1123
+ }
1124
+ break
1125
+
1126
+ case 'response.done':
1127
+ if (overlay) { overlay.classList.add('listening'); overlay.classList.remove('speaking') }
1128
+ if (statusEl) statusEl.textContent = 'Listening...'
1129
+ break
1130
+
1131
+ case 'error':
1132
+ console.error('Voice API error:', data)
1133
+ if (statusEl) statusEl.textContent = 'Error: ' + (data.error?.message || 'Unknown')
1134
+ break
1135
+ }
1136
+ }
1137
+
1138
+ async function handleToolCall(event) {
1139
+ const ws = voiceState.ws
1140
+ if (!ws || ws.readyState !== WebSocket.OPEN) return
1141
+
1142
+ const toolName = event.name
1143
+ const callId = event.call_id
1144
+ const args = event.arguments
1145
+
1146
+ const statusEl = document.getElementById('voice-status')
1147
+ const transcriptEl = document.getElementById('voice-transcript')
1148
+ const overlay = document.getElementById('voice-overlay')
1149
+
1150
+ // Show tool execution in UI
1151
+ if (overlay) { overlay.classList.remove('listening', 'speaking') }
1152
+ if (statusEl) statusEl.textContent = 'Running tool: ' + toolName + '...'
1153
+
1154
+ // Parse args for display
1155
+ let argsObj = {}
1156
+ try { argsObj = JSON.parse(args) } catch {}
1157
+ const brief = toolName === 'bash' ? (argsObj.command || '').slice(0, 80) :
1158
+ toolName === 'read' ? argsObj.path || '' :
1159
+ toolName === 'write' ? argsObj.path || '' :
1160
+ toolName === 'edit' ? argsObj.path || '' : ''
1161
+ if (transcriptEl) transcriptEl.textContent = '[Tool: ' + toolName + '] ' + brief
1162
+
1163
+ try {
1164
+ // Execute tool via our backend
1165
+ const toolRes = await fetch('/voice/tool', {
1166
+ method: 'POST',
1167
+ headers: { 'Content-Type': 'application/json' },
1168
+ body: JSON.stringify({ name: toolName, arguments: args }),
1169
+ })
1170
+ const toolData = await toolRes.json()
1171
+ const output = toolData.output || '(no output)'
1172
+
1173
+ // Show brief result
1174
+ const shortOutput = output.length > 120 ? output.slice(0, 120) + '...' : output
1175
+ if (transcriptEl) transcriptEl.textContent = '[Tool: ' + toolName + '] ' + shortOutput
1176
+
1177
+ // Send result back to voice agent
1178
+ ws.send(JSON.stringify({
1179
+ type: 'conversation.item.create',
1180
+ item: {
1181
+ type: 'function_call_output',
1182
+ call_id: callId,
1183
+ output: output,
1184
+ },
1185
+ }))
1186
+
1187
+ // Request the agent to continue
1188
+ ws.send(JSON.stringify({ type: 'response.create' }))
1189
+
1190
+ if (statusEl) statusEl.textContent = 'Processing...'
1191
+ } catch (err) {
1192
+ console.error('Tool execution error:', err)
1193
+ const errMsg = err.message || 'Tool execution failed'
1194
+
1195
+ // Send error back as tool output so the agent can handle it
1196
+ ws.send(JSON.stringify({
1197
+ type: 'conversation.item.create',
1198
+ item: {
1199
+ type: 'function_call_output',
1200
+ call_id: callId,
1201
+ output: 'Error: ' + errMsg,
1202
+ },
1203
+ }))
1204
+ ws.send(JSON.stringify({ type: 'response.create' }))
1205
+
1206
+ if (statusEl) statusEl.textContent = 'Listening...'
1207
+ if (overlay) overlay.classList.add('listening')
1208
+ }
1209
+ }
1210
+
1211
+ function playAudioChunk(base64Audio) {
1212
+ if (!voiceState.audioCtx) return
1213
+ const ctx = voiceState.audioCtx
1214
+
1215
+ // Decode base64 to Int16 PCM
1216
+ const binaryStr = atob(base64Audio)
1217
+ const bytes = new Uint8Array(binaryStr.length)
1218
+ for (let i = 0; i < binaryStr.length; i++) {
1219
+ bytes[i] = binaryStr.charCodeAt(i)
1220
+ }
1221
+ const pcm16 = new Int16Array(bytes.buffer)
1222
+
1223
+ // Convert to Float32 for Web Audio
1224
+ const float32 = new Float32Array(pcm16.length)
1225
+ for (let i = 0; i < pcm16.length; i++) {
1226
+ float32[i] = pcm16[i] / 32768.0
1227
+ }
1228
+
1229
+ // Create audio buffer and schedule playback
1230
+ const buffer = ctx.createBuffer(1, float32.length, SAMPLE_RATE)
1231
+ buffer.getChannelData(0).set(float32)
1232
+
1233
+ const source = ctx.createBufferSource()
1234
+ source.buffer = buffer
1235
+ source.connect(ctx.destination)
1236
+
1237
+ // Schedule seamless playback
1238
+ const now = ctx.currentTime
1239
+ const startTime = Math.max(now, voiceState.nextPlayTime)
1240
+ source.start(startTime)
1241
+ voiceState.nextPlayTime = startTime + buffer.duration
1242
+ }
1243
+
1244
+ function stopVoice() {
1245
+ voiceState.active = false
1246
+
1247
+ // Close WebSocket
1248
+ if (voiceState.ws) {
1249
+ try { voiceState.ws.close() } catch {}
1250
+ voiceState.ws = null
1251
+ }
1252
+
1253
+ // Stop microphone
1254
+ if (voiceState.micStream) {
1255
+ voiceState.micStream.getTracks().forEach(t => t.stop())
1256
+ voiceState.micStream = null
1257
+ }
1258
+
1259
+ // Disconnect audio processor
1260
+ if (voiceState.scriptProcessor) {
1261
+ try { voiceState.scriptProcessor.disconnect() } catch {}
1262
+ voiceState.scriptProcessor = null
1263
+ }
1264
+
1265
+ // Close audio context
1266
+ if (voiceState.audioCtx) {
1267
+ try { voiceState.audioCtx.close() } catch {}
1268
+ voiceState.audioCtx = null
1269
+ }
1270
+
1271
+ voiceState.nextPlayTime = 0
1272
+
1273
+ // Reset UI
1274
+ const btn = document.getElementById('voice-btn')
1275
+ if (btn) {
1276
+ btn.classList.remove('active', 'connecting')
1277
+ btn.querySelector('.voice-icon-mic').style.display = 'block'
1278
+ btn.querySelector('.voice-icon-stop').style.display = 'none'
1279
+ }
1280
+
1281
+ const overlay = document.getElementById('voice-overlay')
1282
+ if (overlay) {
1283
+ overlay.style.display = 'none'
1284
+ overlay.classList.remove('listening', 'speaking')
1285
+ }
1286
+ }
646
1287
  </script>
647
1288
  </body>
648
1289
  </html>`;
649
1290
  }
1291
+ function renderCronRequestsPage(requests) {
1292
+ const pendingCount = requests.filter(r => r.status === 'pending').length;
1293
+ const approvedCount = requests.filter(r => r.status === 'approved').length;
1294
+ const rejectedCount = requests.filter(r => r.status === 'rejected').length;
1295
+ const requestsHtml = requests.length === 0
1296
+ ? `<div class="empty-state">No cron requests found.</div>`
1297
+ : requests.map(r => {
1298
+ const statusColor = r.status === 'pending' ? 'var(--accent)' :
1299
+ r.status === 'approved' ? 'var(--green)' : 'var(--red)';
1300
+ const actionButtons = r.status === 'pending'
1301
+ ? `<div class="request-actions">
1302
+ <button class="action-btn approve-btn"
1303
+ hx-post="/approve-request?id=${r.id}"
1304
+ hx-confirm="Approve this cron job request?"
1305
+ hx-target="#action-status"
1306
+ hx-swap="innerHTML"
1307
+ title="Approve request">✓</button>
1308
+ <button class="action-btn reject-btn"
1309
+ hx-post="/reject-request?id=${r.id}"
1310
+ hx-confirm="Reject this cron job request?"
1311
+ hx-target="#action-status"
1312
+ hx-swap="innerHTML"
1313
+ title="Reject request">✗</button>
1314
+ </div>`
1315
+ : '';
1316
+ return `<div class="request-card" style="border-left: 4px solid ${statusColor}">
1317
+ <div class="request-header">
1318
+ <div class="request-title">
1319
+ <strong>${escapeHtml(r.name)}</strong>
1320
+ <span class="request-status" style="color: ${statusColor}">${r.status.toUpperCase()}</span>
1321
+ </div>
1322
+ <div class="request-meta">
1323
+ <span class="request-coworker">👤 ${escapeHtml(r.session_name)}</span>
1324
+ <span class="request-time">🕒 ${formatFullTime(r.requested_at)}</span>
1325
+ </div>
1326
+ </div>
1327
+ <div class="request-details">
1328
+ <div class="request-schedule">
1329
+ <strong>Schedule:</strong> <code>${escapeHtml(r.schedule)}</code>
1330
+ ${r.timezone ? `<span class="timezone">(TZ: ${escapeHtml(r.timezone)})</span>` : ''}
1331
+ </div>
1332
+ <div class="request-message">
1333
+ <strong>Message:</strong>
1334
+ <div class="message-content markdown-body" data-markdown>${escapeHtml(r.message)}</div>
1335
+ </div>
1336
+ ${r.reviewed_at && r.reviewed_by ? `
1337
+ <div class="request-review">
1338
+ <strong>Reviewed by ${escapeHtml(r.reviewed_by)} on ${formatFullTime(r.reviewed_at)}</strong>
1339
+ ${r.reviewer_notes ? `<div class="review-notes">${escapeHtml(r.reviewer_notes)}</div>` : ''}
1340
+ </div>
1341
+ ` : ''}
1342
+ </div>
1343
+ ${actionButtons}
1344
+ </div>`;
1345
+ }).join('\n');
1346
+ return `<!DOCTYPE html>
1347
+ <html lang="en">
1348
+ <head>
1349
+ <meta charset="UTF-8">
1350
+ <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
1351
+ <title>Cron Requests — agent-office</title>
1352
+ <script src="https://unpkg.com/htmx.org@2.0.4/dist/htmx.min.js"></script>
1353
+ <script src="https://cdn.jsdelivr.net/npm/marked@11.1.1/marked.min.js"></script>
1354
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/github-markdown-css@5.5.1/github-markdown-light.min.css" media="(prefers-color-scheme: light)">
1355
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/github-markdown-css@5.5.1/github-markdown-dark.min.css" media="(prefers-color-scheme: dark)">
1356
+ <style>
1357
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
1358
+
1359
+ :root {
1360
+ --bg: #0f1117;
1361
+ --surface: #1a1d27;
1362
+ --surface2: #22263a;
1363
+ --border: #2e3248;
1364
+ --accent: #6c8eff;
1365
+ --accent-dim: #3d52a0;
1366
+ --text: #e2e8f0;
1367
+ --text-dim: #8892a4;
1368
+ --green: #6bffb8;
1369
+ --red: #ff6b6b;
1370
+ --radius: 12px;
1371
+ --radius-sm: 6px;
1372
+ --header-h: 56px;
1373
+ font-size: 16px;
1374
+ }
1375
+
1376
+ html, body {
1377
+ height: 100%;
1378
+ background: var(--bg);
1379
+ color: var(--text);
1380
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif;
1381
+ overflow: hidden;
1382
+ }
1383
+
1384
+ .app {
1385
+ display: flex;
1386
+ flex-direction: column;
1387
+ height: 100dvh;
1388
+ }
1389
+
1390
+ /* Header */
1391
+ .header {
1392
+ flex-shrink: 0;
1393
+ height: var(--header-h);
1394
+ background: var(--surface);
1395
+ border-bottom: 1px solid var(--border);
1396
+ display: flex;
1397
+ align-items: center;
1398
+ padding: 0 16px;
1399
+ gap: 12px;
1400
+ }
1401
+
1402
+ .header-title {
1403
+ font-weight: 600;
1404
+ font-size: 18px;
1405
+ color: var(--accent);
1406
+ }
1407
+
1408
+ .header-stats {
1409
+ margin-left: auto;
1410
+ display: flex;
1411
+ gap: 16px;
1412
+ font-size: 14px;
1413
+ }
1414
+
1415
+ .stat-item {
1416
+ display: flex;
1417
+ align-items: center;
1418
+ gap: 4px;
1419
+ }
1420
+
1421
+ .stat-count {
1422
+ font-weight: 600;
1423
+ }
1424
+
1425
+ .back-link {
1426
+ color: var(--text-dim);
1427
+ text-decoration: none;
1428
+ font-size: 16px;
1429
+ transition: color 0.15s;
1430
+ }
1431
+ .back-link:hover { color: var(--accent); }
1432
+
1433
+ /* Content */
1434
+ .content {
1435
+ flex: 1;
1436
+ overflow-y: auto;
1437
+ padding: 20px;
1438
+ display: flex;
1439
+ flex-direction: column;
1440
+ gap: 16px;
1441
+ }
1442
+
1443
+ .filter-tabs {
1444
+ display: flex;
1445
+ gap: 8px;
1446
+ margin-bottom: 16px;
1447
+ }
1448
+
1449
+ .filter-tab {
1450
+ padding: 8px 16px;
1451
+ background: var(--surface2);
1452
+ border: 1px solid var(--border);
1453
+ border-radius: var(--radius-sm);
1454
+ color: var(--text-dim);
1455
+ text-decoration: none;
1456
+ font-size: 14px;
1457
+ transition: all 0.15s;
1458
+ }
1459
+
1460
+ .filter-tab:hover {
1461
+ border-color: var(--accent-dim);
1462
+ color: var(--accent);
1463
+ }
1464
+
1465
+ .filter-tab.active {
1466
+ background: var(--accent);
1467
+ color: #fff;
1468
+ border-color: var(--accent);
1469
+ }
1470
+
1471
+ .empty-state {
1472
+ text-align: center;
1473
+ color: var(--text-dim);
1474
+ font-size: 16px;
1475
+ padding: 48px;
1476
+ }
1477
+
1478
+ /* Request Cards */
1479
+ .request-card {
1480
+ background: var(--surface);
1481
+ border: 1px solid var(--border);
1482
+ border-radius: var(--radius);
1483
+ padding: 16px;
1484
+ margin-bottom: 12px;
1485
+ }
1486
+
1487
+ .request-header {
1488
+ display: flex;
1489
+ justify-content: space-between;
1490
+ align-items: flex-start;
1491
+ margin-bottom: 12px;
1492
+ }
1493
+
1494
+ .request-title {
1495
+ display: flex;
1496
+ align-items: center;
1497
+ gap: 8px;
1498
+ font-size: 16px;
1499
+ }
1500
+
1501
+ .request-status {
1502
+ font-size: 12px;
1503
+ font-weight: 600;
1504
+ padding: 2px 8px;
1505
+ border-radius: var(--radius-sm);
1506
+ background: rgba(255, 255, 255, 0.1);
1507
+ }
1508
+
1509
+ .request-meta {
1510
+ display: flex;
1511
+ flex-direction: column;
1512
+ align-items: flex-end;
1513
+ gap: 2px;
1514
+ font-size: 12px;
1515
+ color: var(--text-dim);
1516
+ }
1517
+
1518
+ .request-details {
1519
+ display: flex;
1520
+ flex-direction: column;
1521
+ gap: 8px;
1522
+ font-size: 14px;
1523
+ }
1524
+
1525
+ .request-schedule {
1526
+ display: flex;
1527
+ align-items: center;
1528
+ gap: 8px;
1529
+ flex-wrap: wrap;
1530
+ }
1531
+
1532
+ .request-schedule code {
1533
+ background: var(--surface2);
1534
+ padding: 2px 6px;
1535
+ border-radius: var(--radius-sm);
1536
+ font-family: ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace;
1537
+ font-size: 13px;
1538
+ }
1539
+
1540
+ .timezone {
1541
+ color: var(--text-dim);
1542
+ font-size: 12px;
1543
+ }
1544
+
1545
+ .request-message {
1546
+ margin-top: 4px;
1547
+ }
1548
+
1549
+ .message-content {
1550
+ background: var(--surface2);
1551
+ border: 1px solid var(--border);
1552
+ border-radius: var(--radius-sm);
1553
+ padding: 8px 12px;
1554
+ margin-top: 4px;
1555
+ font-size: 13px;
1556
+ line-height: 1.4;
1557
+ white-space: pre-wrap;
1558
+ word-break: break-word;
1559
+ }
1560
+
1561
+ .request-review {
1562
+ margin-top: 8px;
1563
+ padding-top: 8px;
1564
+ border-top: 1px solid var(--border);
1565
+ font-size: 13px;
1566
+ color: var(--text-dim);
1567
+ }
1568
+
1569
+ .review-notes {
1570
+ margin-top: 4px;
1571
+ background: rgba(255, 255, 255, 0.05);
1572
+ padding: 6px 10px;
1573
+ border-radius: var(--radius-sm);
1574
+ border-left: 3px solid var(--accent);
1575
+ }
1576
+
1577
+ .request-actions {
1578
+ display: flex;
1579
+ gap: 8px;
1580
+ margin-top: 12px;
1581
+ padding-top: 12px;
1582
+ border-top: 1px solid var(--border);
1583
+ }
1584
+
1585
+ .action-btn {
1586
+ padding: 6px 12px;
1587
+ border: 1px solid var(--border);
1588
+ border-radius: var(--radius-sm);
1589
+ cursor: pointer;
1590
+ font-size: 14px;
1591
+ font-weight: 600;
1592
+ transition: all 0.15s;
1593
+ display: flex;
1594
+ align-items: center;
1595
+ justify-content: center;
1596
+ min-width: 32px;
1597
+ }
1598
+
1599
+ .approve-btn {
1600
+ background: rgba(107, 255, 184, 0.1);
1601
+ border-color: var(--green);
1602
+ color: var(--green);
1603
+ }
1604
+
1605
+ .approve-btn:hover {
1606
+ background: var(--green);
1607
+ color: #fff;
1608
+ }
1609
+
1610
+ .reject-btn {
1611
+ background: rgba(255, 107, 107, 0.1);
1612
+ border-color: var(--red);
1613
+ color: var(--red);
1614
+ }
1615
+
1616
+ .reject-btn:hover {
1617
+ background: var(--red);
1618
+ color: #fff;
1619
+ }
1620
+
1621
+ .action-btn.htmx-request {
1622
+ opacity: 0.6;
1623
+ pointer-events: none;
1624
+ }
1625
+
1626
+ /* Status messages */
1627
+ #action-status {
1628
+ position: fixed;
1629
+ bottom: 20px;
1630
+ left: 50%;
1631
+ transform: translateX(-50%);
1632
+ background: var(--surface);
1633
+ border: 1px solid var(--border);
1634
+ border-radius: var(--radius);
1635
+ padding: 12px 20px;
1636
+ font-size: 14px;
1637
+ pointer-events: none;
1638
+ opacity: 0;
1639
+ transition: opacity 0.3s;
1640
+ z-index: 1000;
1641
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
1642
+ }
1643
+
1644
+ #action-status.visible {
1645
+ opacity: 1;
1646
+ }
1647
+
1648
+ #action-status.success {
1649
+ border-color: var(--green);
1650
+ color: var(--green);
1651
+ }
1652
+
1653
+ #action-status.error {
1654
+ border-color: var(--red);
1655
+ color: var(--red);
1656
+ }
1657
+
1658
+ /* Markdown styling */
1659
+ .markdown-body {
1660
+ background: transparent;
1661
+ color: inherit;
1662
+ }
1663
+
1664
+ .markdown-body p { margin: 0 0 8px 0; }
1665
+ .markdown-body p:last-child { margin-bottom: 0; }
1666
+ .markdown-body pre {
1667
+ background: rgba(0,0,0,0.3);
1668
+ border-radius: 6px;
1669
+ padding: 8px 12px;
1670
+ overflow-x: auto;
1671
+ margin: 8px 0;
1672
+ }
1673
+ .markdown-body code {
1674
+ background: rgba(0,0,0,0.2);
1675
+ padding: 2px 5px;
1676
+ border-radius: 3px;
1677
+ font-size: 12px;
1678
+ }
1679
+ .markdown-body pre code {
1680
+ background: transparent;
1681
+ padding: 0;
1682
+ }
1683
+ </style>
1684
+ </head>
1685
+ <body>
1686
+ <div class="app">
1687
+ <div class="header">
1688
+ <a href="/" class="back-link" title="Back to chat">←</a>
1689
+ <div class="header-title">Cron Requests</div>
1690
+ <div class="header-stats">
1691
+ <div class="stat-item">
1692
+ <span>⏳</span>
1693
+ <span class="stat-count">${pendingCount}</span>
1694
+ <span>pending</span>
1695
+ </div>
1696
+ <div class="stat-item">
1697
+ <span>✅</span>
1698
+ <span class="stat-count">${approvedCount}</span>
1699
+ <span>approved</span>
1700
+ </div>
1701
+ <div class="stat-item">
1702
+ <span>❌</span>
1703
+ <span class="stat-count">${rejectedCount}</span>
1704
+ <span>rejected</span>
1705
+ </div>
1706
+ </div>
1707
+ </div>
1708
+
1709
+ <div class="content">
1710
+ <div class="filter-tabs">
1711
+ <a href="/cron-requests" class="filter-tab">All</a>
1712
+ <a href="/cron-requests?status=pending" class="filter-tab">Pending</a>
1713
+ <a href="/cron-requests?status=approved" class="filter-tab">Approved</a>
1714
+ <a href="/cron-requests?status=rejected" class="filter-tab">Rejected</a>
1715
+ </div>
1716
+
1717
+ ${requestsHtml}
1718
+ </div>
1719
+
1720
+ <div id="action-status"></div>
1721
+ </div>
1722
+
1723
+ <script>
1724
+ // Set active filter tab based on URL
1725
+ function setActiveTab() {
1726
+ const urlParams = new URLSearchParams(window.location.search)
1727
+ const status = urlParams.get('status') || 'all'
1728
+ document.querySelectorAll('.filter-tab').forEach(tab => {
1729
+ const href = tab.getAttribute('href')
1730
+ const tabStatus = href === '/cron-requests' ? 'all' : href.split('=')[1]
1731
+ if (tabStatus === status) {
1732
+ tab.classList.add('active')
1733
+ } else {
1734
+ tab.classList.remove('active')
1735
+ }
1736
+ })
1737
+ }
1738
+
1739
+ // Render markdown in request messages
1740
+ function renderMarkdown() {
1741
+ if (typeof marked === 'undefined') return
1742
+ document.querySelectorAll('.message-content').forEach(el => {
1743
+ if (!el.hasAttribute('data-rendered')) {
1744
+ el.innerHTML = marked.parse(el.textContent || '')
1745
+ el.setAttribute('data-rendered', 'true')
1746
+ }
1747
+ })
1748
+ }
1749
+
1750
+ // Show status messages
1751
+ function showStatus(message, type = 'success') {
1752
+ const el = document.getElementById('action-status')
1753
+ if (!el) return
1754
+ el.textContent = message
1755
+ el.className = type
1756
+ el.classList.add('visible')
1757
+ clearTimeout(el._hideTimer)
1758
+ el._hideTimer = setTimeout(() => el.classList.remove('visible'), 4000)
1759
+ }
1760
+
1761
+ // Initial setup
1762
+ setActiveTab()
1763
+ renderMarkdown()
1764
+
1765
+ // HTMX event handlers
1766
+ document.addEventListener('htmx:afterRequest', (e) => {
1767
+ const xhr = e.detail.xhr
1768
+ if (xhr.status >= 200 && xhr.status < 300) {
1769
+ // Success - refresh the page
1770
+ setTimeout(() => window.location.reload(), 1000)
1771
+ showStatus('Action completed successfully')
1772
+ } else {
1773
+ // Error
1774
+ try {
1775
+ const response = JSON.parse(xhr.responseText)
1776
+ showStatus('Error: ' + (response.error || 'Unknown error'), 'error')
1777
+ } catch {
1778
+ showStatus('Error: Request failed', 'error')
1779
+ }
1780
+ }
1781
+ })
1782
+ </script>
1783
+ </body>
1784
+ </html>`;
1785
+ }
650
1786
  // ── Express app ───────────────────────────────────────────────────────────────
651
1787
  export async function appCoworkerChatWeb(options) {
652
- const { url: agentUrl, password, host, port: portStr } = options;
1788
+ const { url: agentUrl, password, host, port: portStr, xaiKey } = options;
1789
+ const voiceEnabled = !!xaiKey;
653
1790
  const port = parseInt(portStr, 10);
654
1791
  if (isNaN(port) || port < 1 || port > 65535) {
655
1792
  console.error(`Error: invalid port "${portStr}"`);
@@ -672,6 +1809,9 @@ export async function appCoworkerChatWeb(options) {
672
1809
  console.error("Check that agent-office serve is running and --password is correct.");
673
1810
  }
674
1811
  console.log(`Communicator: chatting as "${humanName}"`);
1812
+ if (voiceEnabled) {
1813
+ console.log(`Voice chat enabled (xAI API key configured)`);
1814
+ }
675
1815
  const app = express();
676
1816
  app.use(express.urlencoded({ extended: false }));
677
1817
  app.use(express.json());
@@ -712,6 +1852,249 @@ export async function appCoworkerChatWeb(options) {
712
1852
  res.send(renderDropdown(coworker ?? null, [{ name: humanName, status: null, isHuman: true, unreadMessages: 0 }]));
713
1853
  }
714
1854
  });
1855
+ // ── GET /cron-requests — cron requests page ──────────────────────────────
1856
+ app.get("/cron-requests", async (req, res) => {
1857
+ try {
1858
+ const status = req.query.status;
1859
+ const requests = await fetchCronRequests(agentUrl, password, status);
1860
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
1861
+ res.send(renderCronRequestsPage(requests));
1862
+ }
1863
+ catch (err) {
1864
+ const msg = err instanceof Error ? err.message : String(err);
1865
+ res.status(502).send(`<pre>Error connecting to agent-office: ${escapeHtml(msg)}</pre>`);
1866
+ }
1867
+ });
1868
+ // ── POST /approve-request — approve a cron request ─────────────────────────
1869
+ app.post("/approve-request", async (req, res) => {
1870
+ const id = req.query.id;
1871
+ if (!id) {
1872
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
1873
+ res.send(`<span style="color:var(--red)">Error: Request ID required</span>`);
1874
+ return;
1875
+ }
1876
+ try {
1877
+ await approveCronRequest(agentUrl, password, parseInt(id, 10));
1878
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
1879
+ res.send(`<span style="color:var(--green)">✓ Request approved successfully</span>`);
1880
+ }
1881
+ catch (err) {
1882
+ const msg = err instanceof Error ? err.message : String(err);
1883
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
1884
+ res.send(`<span style="color:var(--red)">✗ Approval failed: ${escapeHtml(msg)}</span>`);
1885
+ }
1886
+ });
1887
+ // ── POST /reject-request — reject a cron request ───────────────────────────
1888
+ app.post("/reject-request", async (req, res) => {
1889
+ const id = req.query.id;
1890
+ if (!id) {
1891
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
1892
+ res.send(`<span style="color:var(--red)">Error: Request ID required</span>`);
1893
+ return;
1894
+ }
1895
+ try {
1896
+ await rejectCronRequest(agentUrl, password, parseInt(id, 10));
1897
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
1898
+ res.send(`<span style="color:var(--green)">✓ Request rejected successfully</span>`);
1899
+ }
1900
+ catch (err) {
1901
+ const msg = err instanceof Error ? err.message : String(err);
1902
+ res.setHeader("Content-Type", "text/html; charset=utf-8");
1903
+ res.send(`<span style="color:var(--red)">✗ Rejection failed: ${escapeHtml(msg)}</span>`);
1904
+ }
1905
+ });
1906
+ // ── GET /voice/config — whether voice is enabled ──────────────────────────
1907
+ app.get("/voice/config", (_req, res) => {
1908
+ res.json({ enabled: voiceEnabled });
1909
+ });
1910
+ // ── POST /voice/session — fetch ephemeral token from xAI ─────────────────
1911
+ app.post("/voice/session", async (req, res) => {
1912
+ if (!voiceEnabled || !xaiKey) {
1913
+ res.status(403).json({ error: "Voice is not enabled" });
1914
+ return;
1915
+ }
1916
+ const { coworker } = req.body;
1917
+ if (!coworker) {
1918
+ res.status(400).json({ error: "coworker is required" });
1919
+ return;
1920
+ }
1921
+ try {
1922
+ // Fetch the coworker's status for context
1923
+ const status = await fetchCoworkerStatus(agentUrl, password, coworker);
1924
+ // Get ephemeral token from xAI
1925
+ const tokenRes = await fetch("https://api.x.ai/v1/realtime/client_secrets", {
1926
+ method: "POST",
1927
+ headers: {
1928
+ "Authorization": `Bearer ${xaiKey}`,
1929
+ "Content-Type": "application/json",
1930
+ },
1931
+ body: JSON.stringify({ expires_after: { seconds: 300 } }),
1932
+ });
1933
+ if (!tokenRes.ok) {
1934
+ const errBody = await tokenRes.json().catch(() => ({}));
1935
+ res.status(502).json({ error: `xAI API error: ${errBody.error ?? `HTTP ${tokenRes.status}`}` });
1936
+ return;
1937
+ }
1938
+ // xAI returns { value: string, expires_at: number } at the top level
1939
+ const tokenData = await tokenRes.json();
1940
+ const token = tokenData.value;
1941
+ if (!token) {
1942
+ console.error("Voice session: unexpected xAI response shape:", JSON.stringify(tokenData));
1943
+ res.status(502).json({ error: "No ephemeral token in xAI response" });
1944
+ return;
1945
+ }
1946
+ // Build voice instructions based on the coworker
1947
+ const instructions = [
1948
+ `You are ${escapeHtml(coworker)}, an AI coworker in the agent office.`,
1949
+ status ? `Your current status is: "${status}".` : "",
1950
+ `You are having a voice conversation with your human manager ${humanName}.`,
1951
+ `Be helpful, collaborative, and keep your responses concise since this is a voice conversation.`,
1952
+ `You can discuss work, answer questions, and collaborate on tasks.`,
1953
+ ``,
1954
+ `You have access to the agent-office CLI tool which can:`,
1955
+ `- Create and manage AI coworker sessions`,
1956
+ `- Send messages between coworkers`,
1957
+ `- Set status messages for visibility`,
1958
+ `- Schedule cron jobs for recurring tasks`,
1959
+ `- Run a web chat interface for human interaction`,
1960
+ `- Manage task boards with kanban-style workflows`,
1961
+ `- Send email notifications for unread messages`,
1962
+ ``,
1963
+ `You have access to coding tools that you can use when the human asks you to look at, create, or modify files, or run commands:`,
1964
+ `- read: Read a file from the filesystem. Use this to examine source code, config files, etc.`,
1965
+ `- write: Write content to a file, creating or overwriting it.`,
1966
+ `- edit: Edit a file by finding and replacing an exact string.`,
1967
+ `- bash: Execute a shell command and get the output.`,
1968
+ ``,
1969
+ `When using tools, briefly tell the human what you're doing before calling the tool.`,
1970
+ `After getting tool results, summarize the key information verbally rather than reading everything.`,
1971
+ `The working directory is: ${process.cwd()}`,
1972
+ ].filter(Boolean).join("\n");
1973
+ res.json({
1974
+ token,
1975
+ instructions,
1976
+ coworker,
1977
+ });
1978
+ }
1979
+ catch (err) {
1980
+ const msg = err instanceof Error ? err.message : String(err);
1981
+ res.status(502).json({ error: `Failed to create voice session: ${msg}` });
1982
+ }
1983
+ });
1984
+ // ── POST /voice/tool — execute a tool call server-side ────────────────────
1985
+ app.post("/voice/tool", async (req, res) => {
1986
+ const { name, arguments: argsStr } = req.body;
1987
+ if (!name || typeof name !== "string") {
1988
+ res.status(400).json({ error: "name is required" });
1989
+ return;
1990
+ }
1991
+ let args;
1992
+ try {
1993
+ args = typeof argsStr === "string" ? JSON.parse(argsStr) : (argsStr ?? {});
1994
+ }
1995
+ catch {
1996
+ res.status(400).json({ error: "Invalid arguments JSON" });
1997
+ return;
1998
+ }
1999
+ try {
2000
+ let result;
2001
+ switch (name) {
2002
+ case "read": {
2003
+ const filePath = String(args.path ?? "");
2004
+ if (!filePath) {
2005
+ res.json({ output: "Error: path is required" });
2006
+ return;
2007
+ }
2008
+ const content = await readFile(filePath, "utf-8");
2009
+ const lines = content.split("\n");
2010
+ const offset = Math.max(1, Number(args.offset) || 1);
2011
+ const limit = Math.min(2000, Number(args.limit) || 200);
2012
+ const sliced = lines.slice(offset - 1, offset - 1 + limit);
2013
+ result = sliced.map((line, i) => `${offset + i}: ${line}`).join("\n");
2014
+ if (lines.length > offset - 1 + limit) {
2015
+ result += `\n... (${lines.length} total lines)`;
2016
+ }
2017
+ break;
2018
+ }
2019
+ case "write": {
2020
+ const filePath = String(args.path ?? "");
2021
+ const content = String(args.content ?? "");
2022
+ if (!filePath) {
2023
+ res.json({ output: "Error: path is required" });
2024
+ return;
2025
+ }
2026
+ await mkdir(dirname(filePath), { recursive: true });
2027
+ await writeFile(filePath, content, "utf-8");
2028
+ result = `Written ${content.length} bytes to ${filePath}`;
2029
+ break;
2030
+ }
2031
+ case "edit": {
2032
+ const filePath = String(args.path ?? "");
2033
+ const oldStr = String(args.oldText ?? "");
2034
+ const newStr = String(args.newText ?? "");
2035
+ if (!filePath) {
2036
+ res.json({ output: "Error: path is required" });
2037
+ return;
2038
+ }
2039
+ if (!oldStr) {
2040
+ res.json({ output: "Error: oldText is required" });
2041
+ return;
2042
+ }
2043
+ const fileContent = await readFile(filePath, "utf-8");
2044
+ const idx = fileContent.indexOf(oldStr);
2045
+ if (idx === -1) {
2046
+ result = "Error: oldText not found in file";
2047
+ }
2048
+ else if (fileContent.indexOf(oldStr, idx + 1) !== -1) {
2049
+ result = "Error: oldText found multiple times. Provide more context to make it unique.";
2050
+ }
2051
+ else {
2052
+ const edited = fileContent.slice(0, idx) + newStr + fileContent.slice(idx + oldStr.length);
2053
+ await writeFile(filePath, edited, "utf-8");
2054
+ result = `Edit applied to ${filePath}`;
2055
+ }
2056
+ break;
2057
+ }
2058
+ case "bash": {
2059
+ const command = String(args.command ?? "");
2060
+ if (!command) {
2061
+ res.json({ output: "Error: command is required" });
2062
+ return;
2063
+ }
2064
+ const timeoutSec = Math.min(120, Number(args.timeout) || 30);
2065
+ const timeout = timeoutSec * 1000;
2066
+ result = await new Promise((resolve) => {
2067
+ exec(command, { timeout, maxBuffer: 1024 * 1024, cwd: process.cwd() }, (err, stdout, stderr) => {
2068
+ const out = (stdout || "").trim();
2069
+ const errOut = (stderr || "").trim();
2070
+ if (err && err.killed) {
2071
+ resolve(`Command timed out after ${timeout}ms`);
2072
+ }
2073
+ else if (err) {
2074
+ resolve(`Exit code ${err.code ?? 1}\n${errOut}\n${out}`.trim());
2075
+ }
2076
+ else {
2077
+ const combined = errOut ? `${out}\n${errOut}` : out;
2078
+ resolve(combined || "(no output)");
2079
+ }
2080
+ });
2081
+ });
2082
+ // Truncate very long output for the voice context
2083
+ if (result.length > 4000) {
2084
+ result = result.slice(0, 4000) + "\n... (output truncated)";
2085
+ }
2086
+ break;
2087
+ }
2088
+ default:
2089
+ result = `Unknown tool: ${name}`;
2090
+ }
2091
+ res.json({ output: result });
2092
+ }
2093
+ catch (err) {
2094
+ const msg = err instanceof Error ? err.message : String(err);
2095
+ res.json({ output: `Error: ${msg}` });
2096
+ }
2097
+ });
715
2098
  // ── GET / — full page ────────────────────────────────────────────────────
716
2099
  app.get("/", async (req, res) => {
717
2100
  try {