@astra-code/astra-ai 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/app/App.tsx CHANGED
@@ -3,7 +3,7 @@ import {Box, Text, useApp, useInput} from "ink";
3
3
  import Spinner from "ink-spinner";
4
4
  import TextInput from "ink-text-input";
5
5
  import {spawn} from "child_process";
6
- import {mkdirSync, unlinkSync, writeFileSync} from "fs";
6
+ import {existsSync, mkdirSync, unlinkSync, writeFileSync} from "fs";
7
7
  import {dirname, join} from "path";
8
8
  import {BackendClient, type SessionSummary} from "../lib/backendClient.js";
9
9
  import {clearSession, loadSession, saveSession} from "../lib/sessionStore.js";
@@ -17,7 +17,7 @@ import {
17
17
  import {runTerminalCommand} from "../lib/terminalBridge.js";
18
18
  import {isWorkspaceTrusted, trustWorkspace} from "../lib/trustStore.js";
19
19
  import {scanWorkspace} from "../lib/workspaceScanner.js";
20
- import {startLiveTranscription, transcribeOnce, type LiveTranscriptionController} from "../lib/voice.js";
20
+ import {startLiveTranscription, transcribeOnce, resolveAudioDevice, setAudioDevice, listAvfAudioDevices, writeAstraKey, type LiveTranscriptionController} from "../lib/voice.js";
21
21
  import type {AgentEvent, AuthSession, ChatMessage} from "../types/events.js";
22
22
  import type {WorkspaceFile} from "../lib/workspaceScanner.js";
23
23
 
@@ -76,6 +76,7 @@ const FOUNDER_WELCOME = centerLine("Welcome to Astra from Astra CEO & Founder, S
76
76
 
77
77
  const HISTORY_SETTINGS_URL = "https://astra-web-builder.vercel.app/settings";
78
78
  const VOICE_SILENCE_MS = Number(process.env.ASTRA_VOICE_SILENCE_MS ?? "3000");
79
+
79
80
  const VOICE_MIN_CHARS = Number(process.env.ASTRA_VOICE_MIN_CHARS ?? "4");
80
81
  const VOICE_DUPLICATE_WINDOW_MS = Number(process.env.ASTRA_VOICE_DUPLICATE_WINDOW_MS ?? "10000");
81
82
  const VOICE_NOISE_WORDS = new Set(["you", "yes", "yeah", "yep", "ok", "okay", "uh", "um", "hmm"]);
@@ -306,6 +307,25 @@ const summarizeToolResult = (toolName: string, data: Record<string, unknown>, su
306
307
  return `${toolName} completed`;
307
308
  };
308
309
 
310
+ const isLikelyVoiceNoise = (text: string): boolean => {
311
+ const normalized = text.trim().toLowerCase();
312
+ if (!normalized) {
313
+ return true;
314
+ }
315
+ if (normalized.length < VOICE_MIN_CHARS) {
316
+ return true;
317
+ }
318
+ const tokens = normalized.split(/\s+/).filter(Boolean);
319
+ if (tokens.length === 0) {
320
+ return true;
321
+ }
322
+ const nonNoise = tokens.filter((t) => !VOICE_NOISE_WORDS.has(t));
323
+ if (nonNoise.length === 0) {
324
+ return true;
325
+ }
326
+ return false;
327
+ };
328
+
309
329
  type InlineToken = {text: string; bold?: boolean; italic?: boolean; code?: boolean};
310
330
 
311
331
  const parseInline = (line: string): InlineToken[] => {
@@ -640,6 +660,7 @@ export const AstraApp = (): React.JSX.Element => {
640
660
  const [voicePreparing, setVoicePreparing] = useState(false);
641
661
  const [voiceWaitingForSilence, setVoiceWaitingForSilence] = useState(false);
642
662
  const [voiceQueuedPrompt, setVoiceQueuedPrompt] = useState<string | null>(null);
663
+ const [micSetupDevices, setMicSetupDevices] = useState<Array<{index: number; name: string}> | null>(null);
643
664
  const [toolFeedMode, setToolFeedMode] = useState<"compact" | "expanded">("compact");
644
665
  const [historyOpen, setHistoryOpen] = useState(false);
645
666
  const [historyMode, setHistoryMode] = useState<HistoryMode>("picker");
@@ -771,34 +792,67 @@ export const AstraApp = (): React.JSX.Element => {
771
792
  if (liveVoiceRef.current) {
772
793
  return;
773
794
  }
795
+ // #region agent log
796
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_1',hypothesisId:'H5',location:'App.tsx:startLiveVoice',message:'startLiveVoice called',data:{announce,voiceEnabled,thinking,hasController:Boolean(liveVoiceRef.current)},timestamp:Date.now()})}).catch(()=>{});
797
+ // #endregion
774
798
  setVoiceEnabled(true);
775
799
  setVoicePreparing(true);
776
800
  setVoiceListening(false);
777
801
  setVoiceWaitingForSilence(false);
778
802
  if (announce) {
779
- pushMessage("system", "Dictation armed. Preparing microphone...");
803
+ pushMessage("system", "Voice input armed. Preparing microphone...");
780
804
  }
781
- liveVoiceRef.current = startLiveTranscription({
805
+
806
+ // Resolve mic device before starting — triggers onboarding if not configured.
807
+ void resolveAudioDevice(workspaceRoot).then((device) => {
808
+ if (device === null) {
809
+ // No device configured — run onboarding inline.
810
+ setVoicePreparing(false);
811
+ const devices = listAvfAudioDevices();
812
+ if (!devices.length) {
813
+ pushMessage("error", "No audio devices found. Install ffmpeg: brew install ffmpeg");
814
+ setVoiceEnabled(false);
815
+ return;
816
+ }
817
+ setMicSetupDevices(devices);
818
+ const lines = [
819
+ "Let's set up your microphone first.",
820
+ ...devices.map(d => ` [${d.index}] ${d.name}`),
821
+ "Type the number for your mic and press Enter:"
822
+ ];
823
+ pushMessage("system", lines.join("\n"));
824
+ return;
825
+ }
826
+ // Device resolved — start transcription.
827
+ liveVoiceRef.current = startLiveTranscription({
782
828
  onPartial: (text) => {
783
829
  setVoicePreparing(false);
784
830
  setVoiceListening(true);
785
- setPrompt(text);
786
831
  if (voiceSilenceTimerRef.current) {
787
832
  clearTimeout(voiceSilenceTimerRef.current);
788
833
  }
789
834
  const candidate = text.trim();
790
835
  if (!candidate) {
836
+ setPrompt("");
791
837
  setVoiceWaitingForSilence(false);
792
838
  return;
793
839
  }
794
840
  const normalized = candidate.toLowerCase();
795
- const isLikelyNoise = normalized.length < VOICE_MIN_CHARS || VOICE_NOISE_WORDS.has(normalized);
841
+ const isLikelyNoise = isLikelyVoiceNoise(normalized);
842
+ // #region agent log
843
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_1',hypothesisId:'H1',location:'App.tsx:startLiveVoice.onPartial',message:'partial transcript observed',data:{textLen:text.length,candidateLen:candidate.length,normalized,isLikelyNoise,silenceMs:VOICE_SILENCE_MS},timestamp:Date.now()})}).catch(()=>{});
844
+ // #endregion
796
845
  if (isLikelyNoise) {
846
+ setPrompt("");
797
847
  setVoiceWaitingForSilence(false);
798
848
  return;
799
849
  }
850
+ setPrompt(text);
800
851
  setVoiceWaitingForSilence(true);
801
852
  voiceSilenceTimerRef.current = setTimeout(() => {
853
+ // #region agent log
854
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_1',hypothesisId:'H2',location:'App.tsx:startLiveVoice.silenceTimeout',message:'silence timeout fired and queueing prompt',data:{candidate,voiceWaitingForSilence:true},timestamp:Date.now()})}).catch(()=>{});
855
+ // #endregion
802
856
  setVoiceQueuedPrompt(candidate);
803
857
  void stopLiveVoice();
804
858
  }, VOICE_SILENCE_MS);
@@ -813,15 +867,22 @@ export const AstraApp = (): React.JSX.Element => {
813
867
  setVoicePreparing(false);
814
868
  setVoiceListening(false);
815
869
  setVoiceWaitingForSilence(false);
870
+ // #region agent log
871
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_1',hypothesisId:'H4',location:'App.tsx:startLiveVoice.onFinal',message:'final transcript emitted',data:{finalLen:text.length,finalText:text.slice(0,80)},timestamp:Date.now()})}).catch(()=>{});
872
+ // #endregion
816
873
  },
817
874
  onError: (error) => {
818
875
  setVoicePreparing(false);
819
876
  setVoiceListening(false);
820
877
  setVoiceWaitingForSilence(false);
821
878
  pushMessage("error", `Voice transcription error: ${error.message}`);
879
+ // #region agent log
880
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_1',hypothesisId:'H4',location:'App.tsx:startLiveVoice.onError',message:'voice transcription error',data:{error:error.message},timestamp:Date.now()})}).catch(()=>{});
881
+ // #endregion
822
882
  }
823
- });
824
- }, [pushMessage, stopLiveVoice]);
883
+ });
884
+ }); // end resolveAudioDevice.then
885
+ }, [pushMessage, stopLiveVoice, workspaceRoot]);
825
886
 
826
887
  useEffect(() => {
827
888
  return () => {
@@ -910,6 +971,11 @@ export const AstraApp = (): React.JSX.Element => {
910
971
  if (key.return) {
911
972
  if (trustSelection === 0) {
912
973
  trustWorkspace(workspaceRoot);
974
+ // Create .astra settings file at workspace root if it doesn't exist yet.
975
+ try {
976
+ const astraPath = join(workspaceRoot, ".astra");
977
+ if (!existsSync(astraPath)) writeFileSync(astraPath, "");
978
+ } catch { /* non-fatal */ }
913
979
  setTrustedWorkspace(true);
914
980
  setBooting(true);
915
981
  return;
@@ -1312,7 +1378,7 @@ export const AstraApp = (): React.JSX.Element => {
1312
1378
  if (voiceEnabled) {
1313
1379
  await stopLiveVoice();
1314
1380
  setVoiceEnabled(false);
1315
- pushMessage("system", "Dictation paused: credits exhausted. Recharge, then run /dictate on.");
1381
+ pushMessage("system", "Voice input paused: credits exhausted. Recharge, then run /voice on.");
1316
1382
  pushMessage("system", "");
1317
1383
  }
1318
1384
  }
@@ -1387,10 +1453,32 @@ export const AstraApp = (): React.JSX.Element => {
1387
1453
  return;
1388
1454
  }
1389
1455
 
1456
+ // Mic onboarding: intercept when waiting for device selection.
1457
+ if (micSetupDevices !== null) {
1458
+ const idx = parseInt(text, 10);
1459
+ const valid = !isNaN(idx) && idx >= 0 && micSetupDevices.some(d => d.index === idx);
1460
+ if (!valid) {
1461
+ pushMessage("error", `Please type one of: ${micSetupDevices.map(d => d.index).join(", ")}`);
1462
+ return;
1463
+ }
1464
+ const device = `:${idx}`;
1465
+ // Write to .astra local cache
1466
+ writeAstraKey(workspaceRoot, "ASTRA_STT_DEVICE", device);
1467
+ // Persist to backend
1468
+ void backend.updateCliSettings({audio_device_index: idx});
1469
+ // Update in-process cache
1470
+ setAudioDevice(device);
1471
+ setMicSetupDevices(null);
1472
+ pushMessage("system", `Mic set to [${idx}] ${micSetupDevices.find(d => d.index === idx)?.name ?? ""}. Starting voice...`);
1473
+ pushMessage("system", "");
1474
+ startLiveVoice(false);
1475
+ return;
1476
+ }
1477
+
1390
1478
  if (text === "/help") {
1391
1479
  pushMessage(
1392
1480
  "system",
1393
- "/new /history /dictate /dictate on|off|status /tools compact|expanded /settings /settings model <id> /logout /exit"
1481
+ "/new /history /voice /dictate on|off|status /tools compact|expanded /settings /settings model <id> /logout /exit"
1394
1482
  );
1395
1483
  pushMessage("system", "");
1396
1484
  return;
@@ -1410,12 +1498,12 @@ export const AstraApp = (): React.JSX.Element => {
1410
1498
  if (text === "/settings") {
1411
1499
  pushMessage(
1412
1500
  "system",
1413
- `Settings: mode=${runtimeMode} scope=${workspaceRoot} model=${activeModel} provider=${getProviderForModel(activeModel)} dictate=${voiceEnabled ? "on" : "off"} listening=${voiceListening ? "yes" : "no"} tool_feed=${toolFeedMode} silence_ms=${VOICE_SILENCE_MS} role=${user.role ?? "user"} client_id=${getDefaultClientId()} backend=${getBackendUrl()}`
1501
+ `Settings: mode=${runtimeMode} scope=${workspaceRoot} model=${activeModel} provider=${getProviderForModel(activeModel)} voice=${voiceEnabled ? "on" : "off"} listening=${voiceListening ? "yes" : "no"} tool_feed=${toolFeedMode} silence_ms=${VOICE_SILENCE_MS} role=${user.role ?? "user"} client_id=${getDefaultClientId()} backend=${getBackendUrl()}`
1414
1502
  );
1415
1503
  pushMessage("system", "");
1416
1504
  return;
1417
1505
  }
1418
- if (text === "/dictate" || text === "/voice") {
1506
+ if (text === "/voice") {
1419
1507
  if (!voiceEnabled) {
1420
1508
  setVoiceEnabled(true);
1421
1509
  startLiveVoice(true);
@@ -1423,7 +1511,7 @@ export const AstraApp = (): React.JSX.Element => {
1423
1511
  }
1424
1512
  pushMessage(
1425
1513
  "system",
1426
- `Dictation is on${voiceListening ? " (currently listening)" : ""}. Use /dictate off to disable.`
1514
+ `Voice input is on${voiceListening ? " (currently listening)" : ""}. Use /voice off to disable.`
1427
1515
  );
1428
1516
  pushMessage("system", "");
1429
1517
  return;
@@ -1448,29 +1536,29 @@ export const AstraApp = (): React.JSX.Element => {
1448
1536
  await openHistory();
1449
1537
  return;
1450
1538
  }
1451
- if (text === "/dictate status" || text === "/voice status") {
1539
+ if (text === "/dictate status") {
1452
1540
  pushMessage(
1453
1541
  "system",
1454
- `Dictation is ${voiceEnabled ? "on" : "off"}${voicePreparing ? " (preparing mic)" : ""}${voiceListening ? " (listening)" : ""}${voiceWaitingForSilence ? " (waiting for silence to auto-send)" : ""}.`
1542
+ `Voice input is ${voiceEnabled ? "on" : "off"}${voicePreparing ? " (preparing mic)" : ""}${voiceListening ? " (listening)" : ""}${voiceWaitingForSilence ? " (waiting for silence to auto-send)" : ""}.`
1455
1543
  );
1456
1544
  pushMessage("system", "");
1457
1545
  return;
1458
1546
  }
1459
- if (text === "/dictate on" || text === "/voice on") {
1547
+ if (text === "/dictate on") {
1460
1548
  setVoiceEnabled(true);
1461
1549
  startLiveVoice(true);
1462
- pushMessage("system", `Dictation enabled. Auto-send after ${Math.round(VOICE_SILENCE_MS / 1000)}s silence.`);
1550
+ pushMessage("system", `Voice input enabled. Auto-send after ${Math.round(VOICE_SILENCE_MS / 1000)}s silence.`);
1463
1551
  pushMessage("system", "");
1464
1552
  return;
1465
1553
  }
1466
- if (text === "/dictate off" || text === "/voice off") {
1554
+ if (text === "/dictate off") {
1467
1555
  await stopLiveVoice();
1468
1556
  setVoiceEnabled(false);
1469
- pushMessage("system", "Dictation disabled.");
1557
+ pushMessage("system", "Voice input disabled.");
1470
1558
  pushMessage("system", "");
1471
1559
  return;
1472
1560
  }
1473
- if (text === "/dictate input" || text === "/voice input") {
1561
+ if (text === "/dictate input") {
1474
1562
  const transcribed = await transcribeOnce();
1475
1563
  if (!transcribed) {
1476
1564
  pushMessage(
@@ -1583,9 +1671,11 @@ export const AstraApp = (): React.JSX.Element => {
1583
1671
  exit,
1584
1672
  handleEvent,
1585
1673
  localFileCache,
1674
+ micSetupDevices,
1586
1675
  openHistory,
1587
1676
  pushMessage,
1588
1677
  sessionId,
1678
+ setMicSetupDevices,
1589
1679
  startLiveVoice,
1590
1680
  stopLiveVoice,
1591
1681
  thinking,
@@ -1610,11 +1700,14 @@ export const AstraApp = (): React.JSX.Element => {
1610
1700
  }
1611
1701
  const normalizedQueued = queued.toLowerCase();
1612
1702
  const last = lastVoicePromptRef.current;
1613
- const isLikelyNoise = normalizedQueued.length < VOICE_MIN_CHARS || VOICE_NOISE_WORDS.has(normalizedQueued);
1703
+ const isLikelyNoise = isLikelyVoiceNoise(normalizedQueued);
1614
1704
  const isFastDuplicate =
1615
1705
  last !== null &&
1616
1706
  last.text === normalizedQueued &&
1617
1707
  Date.now() - last.at <= VOICE_DUPLICATE_WINDOW_MS;
1708
+ // #region agent log
1709
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_1',hypothesisId:'H3',location:'App.tsx:voiceQueuedPromptEffect',message:'queued prompt evaluated',data:{queued,normalizedQueued,isLikelyNoise,isFastDuplicate,thinking},timestamp:Date.now()})}).catch(()=>{});
1710
+ // #endregion
1618
1711
  if (isLikelyNoise || isFastDuplicate) {
1619
1712
  const now = Date.now();
1620
1713
  const lastIgnored = lastIgnoredVoiceRef.current;
@@ -1629,8 +1722,11 @@ export const AstraApp = (): React.JSX.Element => {
1629
1722
  return;
1630
1723
  }
1631
1724
  lastVoicePromptRef.current = {text: normalizedQueued, at: Date.now()};
1632
- pushMessage("system", `Dictation input: ${queued}`);
1725
+ pushMessage("system", `Voice input: ${queued}`);
1633
1726
  setPrompt("");
1727
+ // #region agent log
1728
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_1',hypothesisId:'H3',location:'App.tsx:voiceQueuedPromptEffect',message:'queued prompt forwarded to sendPrompt',data:{queuedLen:queued.length},timestamp:Date.now()})}).catch(()=>{});
1729
+ // #endregion
1634
1730
  void sendPrompt(queued);
1635
1731
  }, [pushMessage, sendPrompt, thinking, user, voiceQueuedPrompt]);
1636
1732
 
@@ -1829,7 +1925,7 @@ export const AstraApp = (): React.JSX.Element => {
1829
1925
  <Text color="#6c88a8">
1830
1926
  {`scope ${
1831
1927
  workspaceRoot.length > HEADER_PATH_MAX ? `…${workspaceRoot.slice(-(HEADER_PATH_MAX - 1))}` : workspaceRoot
1832
- } · dictate ${
1928
+ } · voice ${
1833
1929
  voiceEnabled
1834
1930
  ? voicePreparing
1835
1931
  ? "on/preparing"
@@ -1842,7 +1938,7 @@ export const AstraApp = (): React.JSX.Element => {
1842
1938
  }`}
1843
1939
  </Text>
1844
1940
  <Text color="#2a3a50">{divider}</Text>
1845
- <Text color="#3a5068">/help /new /history /dictate /dictate on|off|status</Text>
1941
+ <Text color="#3a5068">/help /new /history /voice /dictate on|off|status</Text>
1846
1942
  <Text color="#3a5068">/tools compact|expanded /settings /logout /exit</Text>
1847
1943
  <Text color="#2a3a50">{divider}</Text>
1848
1944
  <Box flexDirection="column" marginTop={1}>
@@ -1958,7 +2054,7 @@ export const AstraApp = (): React.JSX.Element => {
1958
2054
  ) : null}
1959
2055
  {voiceEnabled && !thinking ? (
1960
2056
  <Box flexDirection="row" marginTop={1}>
1961
- <Text color="#9ad5ff">{"🎤 dictate".padEnd(LABEL_WIDTH, " ")}</Text>
2057
+ <Text color="#9ad5ff">{"🎤 voice".padEnd(LABEL_WIDTH, " ")}</Text>
1962
2058
  {voicePreparing ? (
1963
2059
  <Text color="#f4d58a">🟡 preparing microphone...</Text>
1964
2060
  ) : voiceListening && !voiceWaitingForSilence ? (
@@ -1983,7 +2079,7 @@ export const AstraApp = (): React.JSX.Element => {
1983
2079
  setPrompt(value);
1984
2080
  }
1985
2081
  }}
1986
- placeholder={voiceEnabled ? "Ask Astra... (dictate on: auto listen + send on silence)" : "Ask Astra..."}
2082
+ placeholder={voiceEnabled ? "Ask Astra... (voice on: auto listen + send on silence)" : "Ask Astra..."}
1987
2083
  />
1988
2084
  </Box>
1989
2085
  </Box>
@@ -162,6 +162,24 @@ export class BackendClient {
162
162
  return out;
163
163
  }
164
164
 
165
+ public async getCliSettings(): Promise<Record<string, unknown>> {
166
+ try {
167
+ const data = await this.get("/api/user/cli-settings");
168
+ return (data.cli_settings as Record<string, unknown>) ?? {};
169
+ } catch {
170
+ return {};
171
+ }
172
+ }
173
+
174
+ public async updateCliSettings(settings: Record<string, unknown>): Promise<Record<string, unknown>> {
175
+ try {
176
+ const data = await this.patch("/api/user/cli-settings", settings);
177
+ return (data.cli_settings as Record<string, unknown>) ?? {};
178
+ } catch {
179
+ return {};
180
+ }
181
+ }
182
+
165
183
  public async deleteSession(sessionId: string): Promise<void> {
166
184
  await this.delete(`/api/sessions/${sessionId}`);
167
185
  }
package/src/lib/voice.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import {spawn, spawnSync} from "child_process";
2
2
  import {randomUUID} from "crypto";
3
+ import {existsSync, readFileSync, writeFileSync} from "fs";
3
4
  import {basename, join} from "path";
4
5
  import {tmpdir} from "os";
5
6
  import {readFile, rm} from "fs/promises";
@@ -10,6 +11,101 @@ const VOICE_TEXT_LIMIT = 600;
10
11
  const DEFAULT_STT_MODEL = process.env.ASTRA_STT_MODEL?.trim() || "whisper-1";
11
12
  const DEFAULT_CHUNK_SECONDS = Number(process.env.ASTRA_STT_CHUNK_SECONDS ?? "2.5");
12
13
 
14
+ // Module-level device cache — resolved once per process lifetime.
15
+ // undefined = not yet resolved; null = not configured; string = resolved device (e.g. ":1")
16
+ let _cachedDevice: string | null | undefined = undefined;
17
+
18
+ /** Read/write the .astra file at the workspace root. */
19
+ const ASTRA_FILE_NAME = ".astra";
20
+
21
+ const readAstraFile = (workspaceRoot: string): Record<string, string> => {
22
+ const filePath = join(workspaceRoot, ASTRA_FILE_NAME);
23
+ if (!existsSync(filePath)) return {};
24
+ const lines = readFileSync(filePath, "utf8").split("\n");
25
+ const result: Record<string, string> = {};
26
+ for (const line of lines) {
27
+ const eq = line.indexOf("=");
28
+ if (eq > 0) result[line.slice(0, eq).trim()] = line.slice(eq + 1).trim();
29
+ }
30
+ return result;
31
+ };
32
+
33
+ export const writeAstraKey = (workspaceRoot: string, key: string, value: string): void => {
34
+ const filePath = join(workspaceRoot, ASTRA_FILE_NAME);
35
+ const existing = existsSync(filePath) ? readFileSync(filePath, "utf8") : "";
36
+ const lines = existing.split("\n").filter(l => l.trim() && !l.startsWith(`${key}=`));
37
+ lines.push(`${key}=${value}`);
38
+ writeFileSync(filePath, lines.join("\n") + "\n");
39
+ };
40
+
41
+ /**
42
+ * Resolve the mic device string for ffmpeg (e.g. ":1").
43
+ * Priority: in-process cache → .astra file → backend cli_settings → null (needs onboarding).
44
+ * Writes the value to .astra when fetched from the backend so future runs are instant.
45
+ */
46
+ export const resolveAudioDevice = async (workspaceRoot: string): Promise<string | null> => {
47
+ if (_cachedDevice !== undefined) return _cachedDevice;
48
+
49
+ // 1. Read local .astra cache
50
+ const local = readAstraFile(workspaceRoot);
51
+ if (local.ASTRA_STT_DEVICE) {
52
+ _cachedDevice = local.ASTRA_STT_DEVICE;
53
+ return _cachedDevice;
54
+ }
55
+
56
+ // 2. Fetch from backend
57
+ const token = loadSession()?.access_token;
58
+ if (token) {
59
+ try {
60
+ const resp = await fetch(`${getBackendUrl()}/api/user/cli-settings`, {
61
+ headers: {Authorization: `Bearer ${token}`}
62
+ });
63
+ if (resp.ok) {
64
+ const data = (await resp.json()) as {cli_settings?: {audio_device_index?: number}};
65
+ const idx = data.cli_settings?.audio_device_index;
66
+ if (typeof idx === "number") {
67
+ const device = `:${idx}`;
68
+ writeAstraKey(workspaceRoot, "ASTRA_STT_DEVICE", device);
69
+ _cachedDevice = device;
70
+ return _cachedDevice;
71
+ }
72
+ }
73
+ } catch {
74
+ // Silently fall through to onboarding
75
+ }
76
+ }
77
+
78
+ _cachedDevice = null;
79
+ return null;
80
+ };
81
+
82
+ /** Call after onboarding saves a new device so the running process uses it immediately. */
83
+ export const setAudioDevice = (device: string): void => {
84
+ _cachedDevice = device;
85
+ };
86
+
87
+ /** List AVFoundation audio devices. Returns [] on non-macOS or if ffmpeg is missing. */
88
+ export const listAvfAudioDevices = (): Array<{index: number; name: string}> => {
89
+ if (process.platform !== "darwin") return [];
90
+ const result = spawnSync(
91
+ "ffmpeg",
92
+ ["-f", "avfoundation", "-list_devices", "true", "-i", ""],
93
+ {encoding: "utf8", timeout: 8000}
94
+ );
95
+ const output = (result.stderr ?? "") + (result.stdout ?? "");
96
+ const devices: Array<{index: number; name: string}> = [];
97
+ let inAudio = false;
98
+ for (const line of output.split("\n")) {
99
+ if (line.includes("AVFoundation audio devices")) { inAudio = true; continue; }
100
+ if (inAudio) {
101
+ const m = line.match(/\[(\d+)\]\s+(.+)$/);
102
+ if (m?.[1] && m[2]) devices.push({index: parseInt(m[1], 10), name: m[2].trim()});
103
+ else if (devices.length) break;
104
+ }
105
+ }
106
+ return devices;
107
+ };
108
+
13
109
  const safeText = (text: string): string => text.replace(/\s+/g, " ").trim().slice(0, VOICE_TEXT_LIMIT);
14
110
 
15
111
  const commandExists = (binary: string): boolean => {
@@ -39,7 +135,8 @@ const captureAudioChunk = async (seconds: number): Promise<string> => {
39
135
  if (process.platform === "darwin") {
40
136
  // Prefer ffmpeg on macOS (works on Apple Silicon/Homebrew setups).
41
137
  if (commandExists("ffmpeg")) {
42
- cmd = `ffmpeg -hide_banner -loglevel error -f avfoundation -i ":0" -ar 16000 -ac 1 -t ${seconds} "${outPath}"`;
138
+ const micDevice = _cachedDevice ?? process.env.ASTRA_STT_DEVICE?.trim() ?? ":0";
139
+ cmd = `ffmpeg -hide_banner -loglevel error -f avfoundation -i "${micDevice}" -ar 16000 -ac 1 -t ${seconds} "${outPath}"`;
43
140
  } else if (commandExists("rec")) {
44
141
  cmd = `rec -q -r 16000 -c 1 "${outPath}" trim 0 ${seconds}`;
45
142
  } else {
@@ -62,12 +159,21 @@ const captureAudioChunk = async (seconds: number): Promise<string> => {
62
159
  cmd = cmd.replaceAll("{output}", outPath).replaceAll("{seconds}", String(seconds));
63
160
  }
64
161
 
162
+ // #region agent log
163
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_2',hypothesisId:'H6',location:'voice.ts:captureAudioChunk',message:'capture command selected',data:{platform:process.platform,seconds,usesCustom:Boolean(custom),cmdPreview:cmd.slice(0,180)},timestamp:Date.now()})}).catch(()=>{});
164
+ // #endregion
65
165
  await runShell(cmd);
166
+ // #region agent log
167
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_2',hypothesisId:'H6',location:'voice.ts:captureAudioChunk',message:'capture command completed',data:{outputPath:outPath},timestamp:Date.now()})}).catch(()=>{});
168
+ // #endregion
66
169
  return outPath;
67
170
  };
68
171
 
69
172
  const transcribeAudioFile = async (filePath: string): Promise<string> => {
70
173
  const bytes = await readFile(filePath);
174
+ // #region agent log
175
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_2',hypothesisId:'H7',location:'voice.ts:transcribeAudioFile',message:'audio chunk loaded',data:{filePath,byteLength:bytes.length},timestamp:Date.now()})}).catch(()=>{});
176
+ // #endregion
71
177
  const file = new File([bytes], basename(filePath), {type: "audio/wav"});
72
178
 
73
179
  // Backend proxy only: backend holds provider secrets.
@@ -83,12 +189,19 @@ const transcribeAudioFile = async (filePath: string): Promise<string> => {
83
189
  headers: {Authorization: `Bearer ${token}`},
84
190
  body: form
85
191
  });
192
+ // #region agent log
193
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_2',hypothesisId:'H7',location:'voice.ts:transcribeAudioFile',message:'backend transcribe response',data:{status:response.status,ok:response.ok},timestamp:Date.now()})}).catch(()=>{});
194
+ // #endregion
86
195
  if (!response.ok) {
87
196
  const detail = (await response.text()).slice(0, 400);
88
197
  throw new Error(`Backend transcription failed ${response.status}: ${detail}`);
89
198
  }
90
199
  const data = (await response.json()) as {text?: string};
91
- return String(data.text ?? "").trim();
200
+ const out = String(data.text ?? "").trim();
201
+ // #region agent log
202
+ fetch('http://127.0.0.1:7573/ingest/fdd4f018-1ba3-4303-b1bb-375443267476',{method:'POST',headers:{'Content-Type':'application/json','X-Debug-Session-Id':'17f1ea'},body:JSON.stringify({sessionId:'17f1ea',runId:'voice_run_2',hypothesisId:'H8',location:'voice.ts:transcribeAudioFile',message:'transcribe text received',data:{textLen:out.length,textPreview:out.slice(0,80)},timestamp:Date.now()})}).catch(()=>{});
203
+ // #endregion
204
+ return out;
92
205
  };
93
206
 
94
207
  /**