@arach/lattices 0.2.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +144 -69
  3. package/apps/mac/Info.plist +43 -0
  4. package/apps/mac/Lattices.app/Contents/Info.plist +43 -0
  5. package/apps/mac/Lattices.app/Contents/MacOS/Lattices +0 -0
  6. package/apps/mac/Lattices.app/Contents/Resources/AppIcon.icns +0 -0
  7. package/apps/mac/Lattices.app/Contents/Resources/docs/assistant-knowledge.md +130 -0
  8. package/apps/mac/Lattices.app/Contents/Resources/tap.wav +0 -0
  9. package/apps/mac/Lattices.app/Contents/_CodeSignature/CodeResources +150 -0
  10. package/apps/mac/Lattices.entitlements +21 -0
  11. package/apps/mac/Resources/Pets/assistant-spark/pet.json +62 -0
  12. package/apps/mac/Resources/Pets/assistant-spark/spritesheet.webp +0 -0
  13. package/apps/mac/Resources/Pets/scout-ranger/pet.json +6 -0
  14. package/apps/mac/Resources/Pets/scout-ranger/spritesheet.webp +0 -0
  15. package/apps/mac/Resources/tap.wav +0 -0
  16. package/assets/AppIcon.icns +0 -0
  17. package/bin/assistant-intelligence.ts +912 -0
  18. package/bin/cli/capture.ts +252 -0
  19. package/bin/cli/daemon.ts +22 -0
  20. package/bin/cli/helpers.ts +105 -0
  21. package/bin/cli/layer.ts +178 -0
  22. package/bin/cli/runs.ts +43 -0
  23. package/bin/cli/search.ts +141 -0
  24. package/bin/cli/session.ts +32 -0
  25. package/bin/client.ts +17 -0
  26. package/bin/cua.ts +26 -0
  27. package/bin/{daemon-client.js → daemon-client.ts} +49 -30
  28. package/bin/handsoff-infer.ts +96 -0
  29. package/bin/handsoff-worker.ts +531 -0
  30. package/bin/infer.ts +424 -0
  31. package/bin/keychain.ts +75 -0
  32. package/bin/lattices-app.ts +655 -0
  33. package/bin/lattices-build +125 -0
  34. package/bin/lattices-build-env.ts +77 -0
  35. package/bin/lattices-dev +362 -0
  36. package/bin/lattices.ts +3260 -0
  37. package/bin/project-twin.ts +645 -0
  38. package/docs/agent-execution-plan.md +562 -0
  39. package/docs/agent-layer-guide.md +207 -0
  40. package/docs/agents.md +233 -0
  41. package/docs/ai-chat-ux-review.md +416 -0
  42. package/docs/api.md +1041 -47
  43. package/docs/app.md +96 -13
  44. package/docs/assistant-knowledge.md +130 -0
  45. package/docs/companion-deck.md +209 -0
  46. package/docs/component-extraction-roadmap.md +392 -0
  47. package/docs/concepts.md +13 -12
  48. package/docs/config.md +83 -10
  49. package/docs/gesture-customization-proposal.md +520 -0
  50. package/docs/handsoff-test-scenarios.md +84 -0
  51. package/docs/hyperspace-grid-snappiness.md +210 -0
  52. package/docs/layers.md +176 -28
  53. package/docs/mouse-gestures.md +244 -0
  54. package/docs/ocr.md +21 -9
  55. package/docs/overview.md +42 -23
  56. package/docs/presentation-execution-review.md +491 -0
  57. package/docs/prompts/hands-off-system.md +382 -0
  58. package/docs/prompts/hands-off-turn.md +30 -0
  59. package/docs/prompts/voice-advisor.md +31 -0
  60. package/docs/prompts/voice-fallback.md +23 -0
  61. package/docs/proposals/LAT-001-gesture-visual-customization.md +522 -0
  62. package/docs/proposals/LAT-002-shared-overlay-canvas.md +353 -0
  63. package/docs/proposals/LAT-003-menu-bar-controller-architecture.md +291 -0
  64. package/docs/proposals/LAT-004-interactive-overlay-actors.md +534 -0
  65. package/docs/proposals/LAT-005-action-runtime-product-spine.md +914 -0
  66. package/docs/proposals/LAT-006-followup-gaps.md +103 -0
  67. package/docs/proposals/LAT-006-runs-and-capture-in-lattices.md +566 -0
  68. package/docs/proposals/LAT-007-unified-app-shell.md +128 -0
  69. package/docs/quickstart.md +8 -12
  70. package/docs/reference/dewey.config.ts +74 -0
  71. package/docs/reference/install-agent.md +79 -0
  72. package/docs/release.md +172 -0
  73. package/docs/repo-structure.md +100 -0
  74. package/docs/terminal-kit.md +87 -0
  75. package/docs/tiling-reference.md +224 -0
  76. package/docs/twins.md +138 -0
  77. package/docs/voice-command-protocol.md +278 -0
  78. package/docs/voice-error-model.md +73 -0
  79. package/docs/voice.md +221 -0
  80. package/package.json +69 -16
  81. package/packages/npm/sdk/cua.d.mts +1 -0
  82. package/packages/npm/sdk/cua.d.ts +188 -0
  83. package/packages/npm/sdk/cua.mjs +376 -0
  84. package/app/Lattices.app/Contents/Info.plist +0 -24
  85. package/app/Package.swift +0 -13
  86. package/app/Sources/ActionRow.swift +0 -61
  87. package/app/Sources/App.swift +0 -10
  88. package/app/Sources/AppDelegate.swift +0 -234
  89. package/app/Sources/AppShellView.swift +0 -62
  90. package/app/Sources/AppTypeClassifier.swift +0 -70
  91. package/app/Sources/AppWindowShell.swift +0 -63
  92. package/app/Sources/CheatSheetHUD.swift +0 -332
  93. package/app/Sources/CommandModeState.swift +0 -1362
  94. package/app/Sources/CommandModeView.swift +0 -1405
  95. package/app/Sources/CommandModeWindow.swift +0 -192
  96. package/app/Sources/CommandPaletteView.swift +0 -307
  97. package/app/Sources/CommandPaletteWindow.swift +0 -134
  98. package/app/Sources/DaemonProtocol.swift +0 -101
  99. package/app/Sources/DaemonServer.swift +0 -414
  100. package/app/Sources/DesktopModel.swift +0 -121
  101. package/app/Sources/DesktopModelTypes.swift +0 -71
  102. package/app/Sources/DiagnosticLog.swift +0 -271
  103. package/app/Sources/EventBus.swift +0 -30
  104. package/app/Sources/HotkeyManager.swift +0 -250
  105. package/app/Sources/HotkeyStore.swift +0 -338
  106. package/app/Sources/InventoryManager.swift +0 -35
  107. package/app/Sources/InventoryPath.swift +0 -43
  108. package/app/Sources/KeyRecorderView.swift +0 -210
  109. package/app/Sources/LatticesApi.swift +0 -1125
  110. package/app/Sources/MainView.swift +0 -467
  111. package/app/Sources/MainWindow.swift +0 -83
  112. package/app/Sources/OcrModel.swift +0 -309
  113. package/app/Sources/OcrStore.swift +0 -295
  114. package/app/Sources/OmniSearchState.swift +0 -283
  115. package/app/Sources/OmniSearchView.swift +0 -288
  116. package/app/Sources/OmniSearchWindow.swift +0 -105
  117. package/app/Sources/OrphanRow.swift +0 -129
  118. package/app/Sources/PaletteCommand.swift +0 -419
  119. package/app/Sources/PermissionChecker.swift +0 -125
  120. package/app/Sources/Preferences.swift +0 -92
  121. package/app/Sources/ProcessModel.swift +0 -199
  122. package/app/Sources/ProcessQuery.swift +0 -151
  123. package/app/Sources/Project.swift +0 -28
  124. package/app/Sources/ProjectRow.swift +0 -368
  125. package/app/Sources/ProjectScanner.swift +0 -121
  126. package/app/Sources/ScreenMapState.swift +0 -2387
  127. package/app/Sources/ScreenMapView.swift +0 -2820
  128. package/app/Sources/ScreenMapWindowController.swift +0 -89
  129. package/app/Sources/SessionManager.swift +0 -72
  130. package/app/Sources/SettingsView.swift +0 -1053
  131. package/app/Sources/SettingsWindow.swift +0 -20
  132. package/app/Sources/TabGroupRow.swift +0 -178
  133. package/app/Sources/Terminal.swift +0 -259
  134. package/app/Sources/TerminalQuery.swift +0 -156
  135. package/app/Sources/TerminalSynthesizer.swift +0 -200
  136. package/app/Sources/Theme.swift +0 -163
  137. package/app/Sources/TilePickerView.swift +0 -209
  138. package/app/Sources/TmuxModel.swift +0 -53
  139. package/app/Sources/TmuxQuery.swift +0 -81
  140. package/app/Sources/WindowTiler.swift +0 -1755
  141. package/app/Sources/WorkspaceManager.swift +0 -434
  142. package/bin/lattices-app.js +0 -221
  143. package/bin/lattices.js +0 -1418
@@ -0,0 +1,531 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Hands-off worker — long-running process that handles both inference and TTS.
4
+ *
5
+ * Reads newline-delimited JSON commands from stdin, writes JSON responses to stdout.
6
+ * Keeps TTS and inference warm — no cold starts.
7
+ *
8
+ * Commands:
9
+ * {"cmd":"infer","transcript":"...","snapshot":{...},"history":[...]}
10
+ * {"cmd":"speak","text":"..."}
11
+ * {"cmd":"ack","text":"..."} (speak + don't wait for completion)
12
+ * {"cmd":"ping"}
13
+ *
14
+ * Responses:
15
+ * {"ok":true,"data":{...}}
16
+ * {"ok":false,"error":"..."}
17
+ */
18
+
19
+ import {
20
+ assistantPromptPath,
21
+ buildAssistantContextMessage,
22
+ buildAssistantSystemPrompt,
23
+ normalizeAssistantPlan,
24
+ tryLocalAssistantPlan,
25
+ } from "./assistant-intelligence.ts";
26
+ import { infer, resolveVoiceInferenceOptions } from "./infer.ts";
27
+
28
+ const INFER_TIMEOUT_MS = 15_000;
29
+ const voiceInference = resolveVoiceInferenceOptions();
30
+
31
+ /** Call infer and parse JSON if possible, otherwise treat as spoken-only response */
32
+ async function inferSmart(prompt: string, options: any): Promise<{ data: any; raw: any }> {
33
+ const controller = new AbortController();
34
+ const timer = setTimeout(() => controller.abort(), INFER_TIMEOUT_MS);
35
+ let raw: any;
36
+ try {
37
+ raw = await infer(prompt, { ...options, abortSignal: controller.signal });
38
+ } finally {
39
+ clearTimeout(timer);
40
+ }
41
+
42
+ // Try to parse as JSON
43
+ let cleaned = raw.text
44
+ .replace(/```json\s*/g, "")
45
+ .replace(/```\s*/g, "")
46
+ .trim();
47
+
48
+ const start = cleaned.indexOf("{");
49
+ const end = cleaned.lastIndexOf("}");
50
+
51
+ if (start !== -1 && end !== -1) {
52
+ try {
53
+ const data = JSON.parse(cleaned.slice(start, end + 1));
54
+ return { data, raw };
55
+ } catch {}
56
+ }
57
+
58
+ // Not JSON — treat as conversational response (spoken-only, no actions)
59
+ log(`response was plain text, wrapping as spoken: "${raw.text.slice(0, 80)}"`);
60
+ return {
61
+ data: { actions: [], spoken: raw.text },
62
+ raw,
63
+ };
64
+ }
65
+ import { readFileSync } from "fs";
66
+ import { join } from "path";
67
+ import { spawn } from "child_process";
68
+
69
+ // ── Streaming TTS via OpenAI API → ffplay ──────────────────────────
70
+
71
+ const OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
72
+ const ttsConfig = loadTTSConfig();
73
+
74
+ function loadTTSConfig() {
75
+ // Load API key from speakeasy config or env
76
+ let apiKey = process.env.OPENAI_API_KEY || "";
77
+ let voice = "nova";
78
+
79
+ try {
80
+ const cfg = JSON.parse(
81
+ readFileSync(join(process.env.HOME || "", ".config/speakeasy/settings.json"), "utf-8")
82
+ );
83
+ if (!apiKey && cfg.providers?.openai?.apiKey) apiKey = cfg.providers.openai.apiKey;
84
+ if (cfg.providers?.openai?.voice) voice = cfg.providers.openai.voice;
85
+ } catch {}
86
+
87
+ return { apiKey, voice };
88
+ }
89
+
90
+ /** Stream TTS: fetch audio from OpenAI and pipe directly to ffplay. Playback starts immediately. */
91
+ async function streamSpeak(text: string): Promise<number> {
92
+ const start = performance.now();
93
+
94
+ const res = await fetch(OPENAI_TTS_URL, {
95
+ method: "POST",
96
+ headers: {
97
+ "Authorization": `Bearer ${ttsConfig.apiKey}`,
98
+ "Content-Type": "application/json",
99
+ },
100
+ body: JSON.stringify({
101
+ model: "tts-1",
102
+ voice: ttsConfig.voice,
103
+ input: text,
104
+ response_format: "pcm",
105
+ speed: 1.1,
106
+ }),
107
+ });
108
+
109
+ if (!res.ok) {
110
+ throw new Error(`OpenAI TTS error: ${res.status} ${res.statusText}`);
111
+ }
112
+
113
+ const ttfb = Math.round(performance.now() - start);
114
+ log(`TTS first byte in ${ttfb}ms`);
115
+
116
+ // Pipe response body directly to ffplay — playback starts as chunks arrive
117
+ return new Promise((resolve, reject) => {
118
+ const player = spawn("ffplay", [
119
+ "-nodisp", // no video window
120
+ "-autoexit", // quit when done
121
+ "-loglevel", "quiet",
122
+ "-f", "s16le", // PCM signed 16-bit little-endian
123
+ "-ar", "24000", // OpenAI TTS outputs 24kHz
124
+ "-ch_layout", "mono",
125
+ "-", // read from stdin
126
+ ], { stdio: ["pipe", "ignore", "ignore"] });
127
+
128
+ const reader = res.body?.getReader();
129
+ if (!reader) {
130
+ reject(new Error("No response body"));
131
+ return;
132
+ }
133
+
134
+ // Pump chunks from fetch → ffplay stdin
135
+ (async () => {
136
+ while (true) {
137
+ const { done, value } = await reader.read();
138
+ if (done) break;
139
+ player.stdin.write(value);
140
+ }
141
+ player.stdin.end();
142
+ })().catch(reject);
143
+
144
+ player.on("close", () => {
145
+ const ms = Math.round(performance.now() - start);
146
+ resolve(ms);
147
+ });
148
+
149
+ player.on("error", reject);
150
+ });
151
+ }
152
+
153
+ // ── Pre-cached ack sounds (no API call needed) ────────────────────
154
+
155
+ // Ack phrases — played immediately when user stops talking
156
+ const ACK_PHRASES = [
157
+ "Got it.",
158
+ "Heard you.",
159
+ "On it.",
160
+ "Yep.",
161
+ "Cool.",
162
+ "Sure.",
163
+ "Okay.",
164
+ "One sec.",
165
+ ];
166
+
167
+ // Confirmation phrases — played after executing known actions
168
+ const CONFIRM_PHRASES = [
169
+ "Tiled.",
170
+ "Focused.",
171
+ "Done.",
172
+ "Maximized.",
173
+ "Split.",
174
+ "Switched.",
175
+ "Distributed.",
176
+ "Restored.",
177
+ "Searching.",
178
+ ];
179
+
180
+ const ackCacheDir = join(process.env.HOME || "", ".lattices", "tts-cache");
181
+ const ackCache = new Map<string, string>(); // phrase → file path
182
+
183
+ async function ensureVoiceCache() {
184
+ const { mkdirSync, existsSync, writeFileSync } = await import("fs");
185
+ mkdirSync(ackCacheDir, { recursive: true });
186
+
187
+ const allPhrases = [...ACK_PHRASES, ...CONFIRM_PHRASES];
188
+ let cached = 0;
189
+ let generated = 0;
190
+
191
+ for (const phrase of allPhrases) {
192
+ const safeName = phrase.replace(/[^a-z]/gi, "_").toLowerCase();
193
+ const filePath = join(ackCacheDir, `voice_${safeName}.pcm`);
194
+
195
+ if (existsSync(filePath)) {
196
+ ackCache.set(phrase, filePath);
197
+ cached++;
198
+ continue;
199
+ }
200
+
201
+ // Generate and cache
202
+ try {
203
+ const res = await fetch(OPENAI_TTS_URL, {
204
+ method: "POST",
205
+ headers: {
206
+ "Authorization": `Bearer ${ttsConfig.apiKey}`,
207
+ "Content-Type": "application/json",
208
+ },
209
+ body: JSON.stringify({
210
+ model: "tts-1",
211
+ voice: ttsConfig.voice,
212
+ input: phrase,
213
+ response_format: "pcm",
214
+ speed: 1.1,
215
+ }),
216
+ });
217
+
218
+ if (res.ok) {
219
+ const buf = Buffer.from(await res.arrayBuffer());
220
+ writeFileSync(filePath, buf);
221
+ ackCache.set(phrase, filePath);
222
+ generated++;
223
+ log(`cached: "${phrase}"`);
224
+ }
225
+ } catch (e: any) {
226
+ log(`cache failed for "${phrase}": ${e.message}`);
227
+ }
228
+ }
229
+ log(`voice cache: ${cached} hit, ${generated} generated, ${allPhrases.length} total`);
230
+ }
231
+
232
+ /** Play a pre-cached audio file. Near-instant — no API call. */
233
+ async function playCached(phrase: string): Promise<number> {
234
+ const start = performance.now();
235
+ const filePath = ackCache.get(phrase);
236
+
237
+ if (!filePath) {
238
+ log(`playCached: cache miss for "${phrase}", falling back to TTS`);
239
+ return streamSpeak(phrase);
240
+ }
241
+
242
+ log(`playing cached: "${phrase}"`);
243
+ return new Promise((resolve, reject) => {
244
+ const player = spawn("ffplay", [
245
+ "-nodisp", "-autoexit", "-loglevel", "quiet",
246
+ "-f", "s16le", "-ar", "24000", "-ch_layout", "mono",
247
+ filePath,
248
+ ], { stdio: ["ignore", "ignore", "pipe"] });
249
+
250
+ let stderr = "";
251
+ player.stderr?.on("data", (d: Buffer) => { stderr += d.toString(); });
252
+
253
+ player.on("close", (code: number) => {
254
+ const ms = Math.round(performance.now() - start);
255
+ if (code !== 0) log(`ffplay error (code ${code}): ${stderr.slice(0, 100)}`);
256
+ else log(`played "${phrase}" in ${ms}ms`);
257
+ resolve(ms);
258
+ });
259
+
260
+ player.on("error", (err: Error) => {
261
+ log(`ffplay spawn error: ${err.message}`);
262
+ reject(err);
263
+ });
264
+ });
265
+ }
266
+
267
+ /** Play a random ack phrase from cache. */
268
+ function playAck(): Promise<number> {
269
+ const phrase = ACK_PHRASES[Math.floor(Math.random() * ACK_PHRASES.length)];
270
+ return playCached(phrase);
271
+ }
272
+
273
+ /** Play the right confirmation for an action. */
274
+ function playConfirm(intent: string): Promise<number> {
275
+ const map: Record<string, string> = {
276
+ tile_window: "Tiled.",
277
+ focus: "Focused.",
278
+ distribute: "Distributed.",
279
+ search: "Searching.",
280
+ switch_layer: "Switched.",
281
+ create_layer: "Done.",
282
+ };
283
+ return playCached(map[intent] ?? "Done.");
284
+ }
285
+
286
+ // Warm up cache on startup
287
+ ensureVoiceCache().then(() => log("voice cache ready"));
288
+
289
+ log("worker started, streaming TTS ready");
290
+
291
+ // ── Load system prompt once ────────────────────────────────────────
292
+
293
+ const systemPrompt = buildAssistantSystemPrompt();
294
+ log("system prompt loaded");
295
+ log(`voice inference: ${voiceInference.provider}/${voiceInference.model}`);
296
+
297
+ // ── Auto-restart on file changes ───────────────────────────────────
298
+
299
+ const watchFiles = [
300
+ assistantPromptPath,
301
+ join(import.meta.dir, "assistant-intelligence.ts"),
302
+ join(import.meta.dir, "..", ".env"),
303
+ join(import.meta.dir, "..", ".env.local"),
304
+ import.meta.path, // this script itself
305
+ ];
306
+
307
+ for (const f of watchFiles) {
308
+ try {
309
+ const { watch } = await import("fs");
310
+ let debounce: ReturnType<typeof setTimeout> | null = null;
311
+ watch(f, () => {
312
+ if (debounce) return;
313
+ debounce = setTimeout(() => {
314
+ log(`file changed: ${f.split("/").pop()} — exiting for restart`);
315
+ process.exit(0); // Swift auto-restarts in 2s
316
+ }, 500);
317
+ });
318
+ log(`watching: ${f.split("/").pop()}`);
319
+ } catch {}
320
+ }
321
+
322
+ // ── Command loop ───────────────────────────────────────────────────
323
+
324
+ const decoder = new TextDecoder();
325
+ const reader = Bun.stdin.stream().getReader();
326
+ let buffer = "";
327
+
328
+ async function processLine(line: string) {
329
+ const trimmed = line.trim();
330
+ if (!trimmed) return;
331
+
332
+ let cmd: any;
333
+ try {
334
+ cmd = JSON.parse(trimmed);
335
+ } catch {
336
+ respond({ ok: false, error: "invalid JSON" });
337
+ return;
338
+ }
339
+
340
+ switch (cmd.cmd) {
341
+ case "ping":
342
+ respond({ ok: true, data: { pong: true } });
343
+ break;
344
+
345
+ case "speak":
346
+ try {
347
+ const ms = await streamSpeak(cmd.text);
348
+ log(`spoke "${cmd.text.slice(0, 40)}" in ${ms}ms`);
349
+ respond({ ok: true, data: { durationMs: ms } });
350
+ } catch (err: any) {
351
+ log(`TTS error: ${err.message}`);
352
+ respond({ ok: false, error: err.message });
353
+ }
354
+ break;
355
+
356
+ case "ack":
357
+ // Fire and forget — respond immediately, speak in background
358
+ respond({ ok: true, data: { queued: true } });
359
+ streamSpeak(cmd.text).catch((e) => log(`ack TTS error: ${e.message}`));
360
+ break;
361
+
362
+ case "play_cached":
363
+ respond({ ok: true, data: { queued: true, cached: true } });
364
+ playCached(cmd.text).catch((e) => log(`play_cached error: ${e.message}`));
365
+ break;
366
+
367
+ case "infer":
368
+ try {
369
+ const localPlan = tryLocalAssistantPlan(cmd.transcript, cmd.snapshot ?? {});
370
+ if (localPlan) {
371
+ respond({ ok: true, data: localPlan });
372
+ break;
373
+ }
374
+
375
+ const userMessage = buildAssistantContextMessage(cmd.transcript, cmd.snapshot ?? {});
376
+
377
+ const messages = (cmd.history ?? []).map((h: any) => ({
378
+ role: h.role as "user" | "assistant",
379
+ content: h.content,
380
+ }));
381
+
382
+ const { data, raw } = await inferSmart(userMessage, {
383
+ provider: voiceInference.provider,
384
+ model: voiceInference.model,
385
+ system: systemPrompt,
386
+ messages,
387
+ temperature: 0.2,
388
+ maxTokens: 512,
389
+ tag: "hands-off",
390
+ });
391
+
392
+ const plan = normalizeAssistantPlan(data, cmd.transcript);
393
+ respond({
394
+ ok: true,
395
+ data: {
396
+ ...plan,
397
+ _meta: {
398
+ ...plan._meta,
399
+ provider: raw.provider,
400
+ model: raw.model,
401
+ durationMs: raw.durationMs,
402
+ tokens: raw.usage?.totalTokens,
403
+ },
404
+ },
405
+ });
406
+ } catch (err: any) {
407
+ respond({
408
+ ok: false,
409
+ error: err.message,
410
+ data: {
411
+ actions: [],
412
+ spoken: "Sorry, I had trouble processing that.",
413
+ },
414
+ });
415
+ }
416
+ break;
417
+
418
+ case "turn": {
419
+ // Full orchestrated turn — parallel where possible.
420
+ //
421
+ // Timeline:
422
+ // t=0 ──┬── ack TTS (fire & forget)
423
+ // └── model inference
424
+ // t=~600ms ─┬── narrate TTS (what we're doing)
425
+ // └── execute actions (in parallel with narrate)
426
+ // t=done ── respond with results
427
+ //
428
+ const turnStart = performance.now();
429
+ const transcript = cmd.transcript;
430
+ const snap = cmd.snapshot ?? {};
431
+ const history = cmd.history ?? [];
432
+
433
+ log(`⏱ turn start: "${transcript.slice(0, 50)}"`);
434
+
435
+ // Fire cached ack sound + inference in PARALLEL
436
+ const ackPromise = playAck().catch((e) => log(`ack error: ${e.message}`));
437
+
438
+ const messages = history.map((h: any) => ({
439
+ role: h.role as "user" | "assistant",
440
+ content: typeof h.content === "string" ? h.content : JSON.stringify(h.content),
441
+ })).filter((m: any) => m.content && m.content.length > 0);
442
+
443
+ let inferResult: any = null;
444
+ const localPlan = tryLocalAssistantPlan(transcript, snap);
445
+ if (localPlan) {
446
+ inferResult = localPlan;
447
+ log("local planner matched");
448
+ } else {
449
+ const userMessage = buildAssistantContextMessage(transcript, snap);
450
+ try {
451
+ const { data, raw } = await inferSmart(userMessage, {
452
+ provider: voiceInference.provider,
453
+ model: voiceInference.model,
454
+ system: systemPrompt,
455
+ messages,
456
+ temperature: 0.2,
457
+ maxTokens: 512,
458
+ tag: "hands-off",
459
+ });
460
+ const plan = normalizeAssistantPlan(data, transcript);
461
+ inferResult = { ...plan, _meta: { ...plan._meta, provider: raw.provider, model: raw.model, durationMs: raw.durationMs, tokens: raw.usage?.totalTokens } };
462
+ log(`⏱ inference done in ${raw.durationMs}ms`);
463
+ } catch (err: any) {
464
+ log(`⏱ inference error: ${err.message}`);
465
+ inferResult = { actions: [], spoken: "Sorry, I had trouble with that.", _meta: { error: err.message } };
466
+ }
467
+ }
468
+
469
+ // Wait for ack to finish before narrating (don't overlap speech)
470
+ await ackPromise;
471
+
472
+ // Step 2: Narrate + execute in PARALLEL
473
+ const hasActions = Array.isArray(inferResult.actions) && inferResult.actions.length > 0;
474
+ const spokenText = inferResult.spoken;
475
+
476
+ if (hasActions && spokenText) {
477
+ // SPEAK FIRST — user must hear what's about to happen before windows move
478
+ log(`⏱ narrating: "${spokenText.slice(0, 50)}"`);
479
+ await streamSpeak(spokenText).catch((e) => log(`narrate error: ${e.message}`));
480
+
481
+ // NOW respond with actions — Swift executes after user heard the plan
482
+ const turnMs = Math.round(performance.now() - turnStart);
483
+ log(`⏱ turn response at ${turnMs}ms — actions sent after narration`);
484
+ respond({ ok: true, data: inferResult, turnMs });
485
+
486
+ // Confirm
487
+ await playCached("Done.").catch(() => {});
488
+ } else if (spokenText) {
489
+ // Conversation only — speak and respond
490
+ await streamSpeak(spokenText).catch((e) => log(`speak error: ${e.message}`));
491
+ const turnMs = Math.round(performance.now() - turnStart);
492
+ respond({ ok: true, data: inferResult, turnMs });
493
+ } else {
494
+ const turnMs = Math.round(performance.now() - turnStart);
495
+ respond({ ok: true, data: inferResult, turnMs });
496
+ }
497
+
498
+ const totalMs = Math.round(performance.now() - turnStart);
499
+ log(`⏱ turn complete: ${totalMs}ms total`);
500
+ break;
501
+ }
502
+
503
+ default:
504
+ respond({ ok: false, error: `unknown command: ${cmd.cmd}` });
505
+ }
506
+ }
507
+
508
+ // Read stdin line by line
509
+ (async () => {
510
+ while (true) {
511
+ const { done, value } = await reader.read();
512
+ if (done) break;
513
+
514
+ buffer += decoder.decode(value, { stream: true });
515
+ const lines = buffer.split("\n");
516
+ buffer = lines.pop() ?? "";
517
+
518
+ for (const line of lines) {
519
+ await processLine(line);
520
+ }
521
+ }
522
+ })();
523
+
524
+ function respond(obj: any) {
525
+ console.log(JSON.stringify(obj));
526
+ }
527
+
528
+ function log(msg: string) {
529
+ const ts = new Date().toISOString().slice(11, 23);
530
+ console.error(`[${ts}] handsoff-worker: ${msg}`);
531
+ }