@arach/lattices 0.2.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +172 -86
- package/apps/mac/Info.plist +43 -0
- package/apps/mac/Lattices.app/Contents/Info.plist +43 -0
- package/apps/mac/Lattices.app/Contents/MacOS/Lattices +0 -0
- package/apps/mac/Lattices.app/Contents/Resources/AppIcon.icns +0 -0
- package/apps/mac/Lattices.app/Contents/Resources/docs/assistant-knowledge.md +130 -0
- package/apps/mac/Lattices.app/Contents/Resources/tap.wav +0 -0
- package/apps/mac/Lattices.app/Contents/_CodeSignature/CodeResources +150 -0
- package/apps/mac/Lattices.entitlements +21 -0
- package/apps/mac/Resources/Pets/assistant-spark/pet.json +62 -0
- package/apps/mac/Resources/Pets/assistant-spark/spritesheet.webp +0 -0
- package/apps/mac/Resources/Pets/scout-ranger/pet.json +6 -0
- package/apps/mac/Resources/Pets/scout-ranger/spritesheet.webp +0 -0
- package/apps/mac/Resources/tap.wav +0 -0
- package/assets/AppIcon.icns +0 -0
- package/bin/assistant-intelligence.ts +912 -0
- package/bin/cli/capture.ts +252 -0
- package/bin/cli/daemon.ts +22 -0
- package/bin/cli/helpers.ts +105 -0
- package/bin/cli/layer.ts +178 -0
- package/bin/cli/runs.ts +43 -0
- package/bin/cli/search.ts +141 -0
- package/bin/cli/session.ts +32 -0
- package/bin/client.ts +17 -0
- package/bin/cua.ts +26 -0
- package/bin/{daemon-client.js → daemon-client.ts} +49 -30
- package/bin/handsoff-infer.ts +96 -0
- package/bin/handsoff-worker.ts +531 -0
- package/bin/infer.ts +424 -0
- package/bin/keychain.ts +75 -0
- package/bin/lattices-app.ts +655 -0
- package/bin/lattices-build +125 -0
- package/bin/lattices-build-env.ts +77 -0
- package/bin/lattices-dev +362 -0
- package/bin/lattices.ts +3260 -0
- package/bin/project-twin.ts +645 -0
- package/docs/agent-execution-plan.md +562 -0
- package/docs/agent-layer-guide.md +207 -0
- package/docs/agents.md +233 -0
- package/docs/ai-chat-ux-review.md +416 -0
- package/docs/api.md +1041 -47
- package/docs/app.md +96 -13
- package/docs/assistant-knowledge.md +130 -0
- package/docs/companion-deck.md +209 -0
- package/docs/component-extraction-roadmap.md +392 -0
- package/docs/concepts.md +13 -12
- package/docs/config.md +83 -10
- package/docs/gesture-customization-proposal.md +520 -0
- package/docs/handsoff-test-scenarios.md +84 -0
- package/docs/hyperspace-grid-snappiness.md +210 -0
- package/docs/layers.md +176 -28
- package/docs/mouse-gestures.md +244 -0
- package/docs/ocr.md +21 -9
- package/docs/overview.md +42 -23
- package/docs/presentation-execution-review.md +491 -0
- package/docs/prompts/hands-off-system.md +382 -0
- package/docs/prompts/hands-off-turn.md +30 -0
- package/docs/prompts/voice-advisor.md +31 -0
- package/docs/prompts/voice-fallback.md +23 -0
- package/docs/proposals/LAT-001-gesture-visual-customization.md +522 -0
- package/docs/proposals/LAT-002-shared-overlay-canvas.md +353 -0
- package/docs/proposals/LAT-003-menu-bar-controller-architecture.md +291 -0
- package/docs/proposals/LAT-004-interactive-overlay-actors.md +534 -0
- package/docs/proposals/LAT-005-action-runtime-product-spine.md +914 -0
- package/docs/proposals/LAT-006-followup-gaps.md +103 -0
- package/docs/proposals/LAT-006-runs-and-capture-in-lattices.md +566 -0
- package/docs/proposals/LAT-007-unified-app-shell.md +128 -0
- package/docs/quickstart.md +8 -12
- package/docs/reference/dewey.config.ts +74 -0
- package/docs/reference/install-agent.md +79 -0
- package/docs/release.md +172 -0
- package/docs/repo-structure.md +100 -0
- package/docs/terminal-kit.md +87 -0
- package/docs/tiling-reference.md +224 -0
- package/docs/twins.md +138 -0
- package/docs/voice-command-protocol.md +278 -0
- package/docs/voice-error-model.md +73 -0
- package/docs/voice.md +221 -0
- package/package.json +69 -16
- package/packages/npm/sdk/cua.d.mts +1 -0
- package/packages/npm/sdk/cua.d.ts +188 -0
- package/packages/npm/sdk/cua.mjs +376 -0
- package/app/Lattices.app/Contents/Info.plist +0 -24
- package/app/Package.swift +0 -13
- package/app/Sources/ActionRow.swift +0 -61
- package/app/Sources/App.swift +0 -10
- package/app/Sources/AppDelegate.swift +0 -234
- package/app/Sources/AppShellView.swift +0 -62
- package/app/Sources/AppTypeClassifier.swift +0 -70
- package/app/Sources/AppWindowShell.swift +0 -63
- package/app/Sources/CheatSheetHUD.swift +0 -332
- package/app/Sources/CommandModeState.swift +0 -1362
- package/app/Sources/CommandModeView.swift +0 -1405
- package/app/Sources/CommandModeWindow.swift +0 -192
- package/app/Sources/CommandPaletteView.swift +0 -307
- package/app/Sources/CommandPaletteWindow.swift +0 -134
- package/app/Sources/DaemonProtocol.swift +0 -101
- package/app/Sources/DaemonServer.swift +0 -414
- package/app/Sources/DesktopModel.swift +0 -121
- package/app/Sources/DesktopModelTypes.swift +0 -71
- package/app/Sources/DiagnosticLog.swift +0 -271
- package/app/Sources/EventBus.swift +0 -30
- package/app/Sources/HotkeyManager.swift +0 -250
- package/app/Sources/HotkeyStore.swift +0 -338
- package/app/Sources/InventoryManager.swift +0 -35
- package/app/Sources/InventoryPath.swift +0 -43
- package/app/Sources/KeyRecorderView.swift +0 -210
- package/app/Sources/LatticesApi.swift +0 -1125
- package/app/Sources/MainView.swift +0 -467
- package/app/Sources/MainWindow.swift +0 -83
- package/app/Sources/OcrModel.swift +0 -309
- package/app/Sources/OcrStore.swift +0 -295
- package/app/Sources/OmniSearchState.swift +0 -283
- package/app/Sources/OmniSearchView.swift +0 -288
- package/app/Sources/OmniSearchWindow.swift +0 -105
- package/app/Sources/OrphanRow.swift +0 -129
- package/app/Sources/PaletteCommand.swift +0 -419
- package/app/Sources/PermissionChecker.swift +0 -125
- package/app/Sources/Preferences.swift +0 -92
- package/app/Sources/ProcessModel.swift +0 -199
- package/app/Sources/ProcessQuery.swift +0 -151
- package/app/Sources/Project.swift +0 -28
- package/app/Sources/ProjectRow.swift +0 -368
- package/app/Sources/ProjectScanner.swift +0 -121
- package/app/Sources/ScreenMapState.swift +0 -2387
- package/app/Sources/ScreenMapView.swift +0 -2820
- package/app/Sources/ScreenMapWindowController.swift +0 -89
- package/app/Sources/SessionManager.swift +0 -72
- package/app/Sources/SettingsView.swift +0 -1053
- package/app/Sources/SettingsWindow.swift +0 -20
- package/app/Sources/TabGroupRow.swift +0 -178
- package/app/Sources/Terminal.swift +0 -259
- package/app/Sources/TerminalQuery.swift +0 -156
- package/app/Sources/TerminalSynthesizer.swift +0 -200
- package/app/Sources/Theme.swift +0 -163
- package/app/Sources/TilePickerView.swift +0 -209
- package/app/Sources/TmuxModel.swift +0 -53
- package/app/Sources/TmuxQuery.swift +0 -81
- package/app/Sources/WindowTiler.swift +0 -1755
- package/app/Sources/WorkspaceManager.swift +0 -434
- package/bin/lattices-app.js +0 -221
- package/bin/lattices.js +0 -1418
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Hands-off worker — long-running process that handles both inference and TTS.
|
|
4
|
+
*
|
|
5
|
+
* Reads newline-delimited JSON commands from stdin, writes JSON responses to stdout.
|
|
6
|
+
* Keeps TTS and inference warm — no cold starts.
|
|
7
|
+
*
|
|
8
|
+
* Commands:
|
|
9
|
+
* {"cmd":"infer","transcript":"...","snapshot":{...},"history":[...]}
|
|
10
|
+
* {"cmd":"speak","text":"..."}
|
|
11
|
+
* {"cmd":"ack","text":"..."} (speak + don't wait for completion)
|
|
12
|
+
* {"cmd":"ping"}
|
|
13
|
+
*
|
|
14
|
+
* Responses:
|
|
15
|
+
* {"ok":true,"data":{...}}
|
|
16
|
+
* {"ok":false,"error":"..."}
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import {
|
|
20
|
+
assistantPromptPath,
|
|
21
|
+
buildAssistantContextMessage,
|
|
22
|
+
buildAssistantSystemPrompt,
|
|
23
|
+
normalizeAssistantPlan,
|
|
24
|
+
tryLocalAssistantPlan,
|
|
25
|
+
} from "./assistant-intelligence.ts";
|
|
26
|
+
import { infer, resolveVoiceInferenceOptions } from "./infer.ts";
|
|
27
|
+
|
|
28
|
+
const INFER_TIMEOUT_MS = 15_000;
|
|
29
|
+
const voiceInference = resolveVoiceInferenceOptions();
|
|
30
|
+
|
|
31
|
+
/** Call infer and parse JSON if possible, otherwise treat as spoken-only response */
|
|
32
|
+
async function inferSmart(prompt: string, options: any): Promise<{ data: any; raw: any }> {
|
|
33
|
+
const controller = new AbortController();
|
|
34
|
+
const timer = setTimeout(() => controller.abort(), INFER_TIMEOUT_MS);
|
|
35
|
+
let raw: any;
|
|
36
|
+
try {
|
|
37
|
+
raw = await infer(prompt, { ...options, abortSignal: controller.signal });
|
|
38
|
+
} finally {
|
|
39
|
+
clearTimeout(timer);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Try to parse as JSON
|
|
43
|
+
let cleaned = raw.text
|
|
44
|
+
.replace(/```json\s*/g, "")
|
|
45
|
+
.replace(/```\s*/g, "")
|
|
46
|
+
.trim();
|
|
47
|
+
|
|
48
|
+
const start = cleaned.indexOf("{");
|
|
49
|
+
const end = cleaned.lastIndexOf("}");
|
|
50
|
+
|
|
51
|
+
if (start !== -1 && end !== -1) {
|
|
52
|
+
try {
|
|
53
|
+
const data = JSON.parse(cleaned.slice(start, end + 1));
|
|
54
|
+
return { data, raw };
|
|
55
|
+
} catch {}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Not JSON — treat as conversational response (spoken-only, no actions)
|
|
59
|
+
log(`response was plain text, wrapping as spoken: "${raw.text.slice(0, 80)}"`);
|
|
60
|
+
return {
|
|
61
|
+
data: { actions: [], spoken: raw.text },
|
|
62
|
+
raw,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
import { readFileSync } from "fs";
|
|
66
|
+
import { join } from "path";
|
|
67
|
+
import { spawn } from "child_process";
|
|
68
|
+
|
|
69
|
+
// ── Streaming TTS via OpenAI API → ffplay ──────────────────────────
|
|
70
|
+
|
|
71
|
+
const OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
72
|
+
const ttsConfig = loadTTSConfig();
|
|
73
|
+
|
|
74
|
+
function loadTTSConfig() {
|
|
75
|
+
// Load API key from speakeasy config or env
|
|
76
|
+
let apiKey = process.env.OPENAI_API_KEY || "";
|
|
77
|
+
let voice = "nova";
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
const cfg = JSON.parse(
|
|
81
|
+
readFileSync(join(process.env.HOME || "", ".config/speakeasy/settings.json"), "utf-8")
|
|
82
|
+
);
|
|
83
|
+
if (!apiKey && cfg.providers?.openai?.apiKey) apiKey = cfg.providers.openai.apiKey;
|
|
84
|
+
if (cfg.providers?.openai?.voice) voice = cfg.providers.openai.voice;
|
|
85
|
+
} catch {}
|
|
86
|
+
|
|
87
|
+
return { apiKey, voice };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** Stream TTS: fetch audio from OpenAI and pipe directly to ffplay. Playback starts immediately. */
|
|
91
|
+
async function streamSpeak(text: string): Promise<number> {
|
|
92
|
+
const start = performance.now();
|
|
93
|
+
|
|
94
|
+
const res = await fetch(OPENAI_TTS_URL, {
|
|
95
|
+
method: "POST",
|
|
96
|
+
headers: {
|
|
97
|
+
"Authorization": `Bearer ${ttsConfig.apiKey}`,
|
|
98
|
+
"Content-Type": "application/json",
|
|
99
|
+
},
|
|
100
|
+
body: JSON.stringify({
|
|
101
|
+
model: "tts-1",
|
|
102
|
+
voice: ttsConfig.voice,
|
|
103
|
+
input: text,
|
|
104
|
+
response_format: "pcm",
|
|
105
|
+
speed: 1.1,
|
|
106
|
+
}),
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
if (!res.ok) {
|
|
110
|
+
throw new Error(`OpenAI TTS error: ${res.status} ${res.statusText}`);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const ttfb = Math.round(performance.now() - start);
|
|
114
|
+
log(`TTS first byte in ${ttfb}ms`);
|
|
115
|
+
|
|
116
|
+
// Pipe response body directly to ffplay — playback starts as chunks arrive
|
|
117
|
+
return new Promise((resolve, reject) => {
|
|
118
|
+
const player = spawn("ffplay", [
|
|
119
|
+
"-nodisp", // no video window
|
|
120
|
+
"-autoexit", // quit when done
|
|
121
|
+
"-loglevel", "quiet",
|
|
122
|
+
"-f", "s16le", // PCM signed 16-bit little-endian
|
|
123
|
+
"-ar", "24000", // OpenAI TTS outputs 24kHz
|
|
124
|
+
"-ch_layout", "mono",
|
|
125
|
+
"-", // read from stdin
|
|
126
|
+
], { stdio: ["pipe", "ignore", "ignore"] });
|
|
127
|
+
|
|
128
|
+
const reader = res.body?.getReader();
|
|
129
|
+
if (!reader) {
|
|
130
|
+
reject(new Error("No response body"));
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Pump chunks from fetch → ffplay stdin
|
|
135
|
+
(async () => {
|
|
136
|
+
while (true) {
|
|
137
|
+
const { done, value } = await reader.read();
|
|
138
|
+
if (done) break;
|
|
139
|
+
player.stdin.write(value);
|
|
140
|
+
}
|
|
141
|
+
player.stdin.end();
|
|
142
|
+
})().catch(reject);
|
|
143
|
+
|
|
144
|
+
player.on("close", () => {
|
|
145
|
+
const ms = Math.round(performance.now() - start);
|
|
146
|
+
resolve(ms);
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
player.on("error", reject);
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// ── Pre-cached ack sounds (no API call needed) ────────────────────
|
|
154
|
+
|
|
155
|
+
// Ack phrases — played immediately when user stops talking
|
|
156
|
+
const ACK_PHRASES = [
|
|
157
|
+
"Got it.",
|
|
158
|
+
"Heard you.",
|
|
159
|
+
"On it.",
|
|
160
|
+
"Yep.",
|
|
161
|
+
"Cool.",
|
|
162
|
+
"Sure.",
|
|
163
|
+
"Okay.",
|
|
164
|
+
"One sec.",
|
|
165
|
+
];
|
|
166
|
+
|
|
167
|
+
// Confirmation phrases — played after executing known actions
|
|
168
|
+
const CONFIRM_PHRASES = [
|
|
169
|
+
"Tiled.",
|
|
170
|
+
"Focused.",
|
|
171
|
+
"Done.",
|
|
172
|
+
"Maximized.",
|
|
173
|
+
"Split.",
|
|
174
|
+
"Switched.",
|
|
175
|
+
"Distributed.",
|
|
176
|
+
"Restored.",
|
|
177
|
+
"Searching.",
|
|
178
|
+
];
|
|
179
|
+
|
|
180
|
+
const ackCacheDir = join(process.env.HOME || "", ".lattices", "tts-cache");
|
|
181
|
+
const ackCache = new Map<string, string>(); // phrase → file path
|
|
182
|
+
|
|
183
|
+
async function ensureVoiceCache() {
|
|
184
|
+
const { mkdirSync, existsSync, writeFileSync } = await import("fs");
|
|
185
|
+
mkdirSync(ackCacheDir, { recursive: true });
|
|
186
|
+
|
|
187
|
+
const allPhrases = [...ACK_PHRASES, ...CONFIRM_PHRASES];
|
|
188
|
+
let cached = 0;
|
|
189
|
+
let generated = 0;
|
|
190
|
+
|
|
191
|
+
for (const phrase of allPhrases) {
|
|
192
|
+
const safeName = phrase.replace(/[^a-z]/gi, "_").toLowerCase();
|
|
193
|
+
const filePath = join(ackCacheDir, `voice_${safeName}.pcm`);
|
|
194
|
+
|
|
195
|
+
if (existsSync(filePath)) {
|
|
196
|
+
ackCache.set(phrase, filePath);
|
|
197
|
+
cached++;
|
|
198
|
+
continue;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Generate and cache
|
|
202
|
+
try {
|
|
203
|
+
const res = await fetch(OPENAI_TTS_URL, {
|
|
204
|
+
method: "POST",
|
|
205
|
+
headers: {
|
|
206
|
+
"Authorization": `Bearer ${ttsConfig.apiKey}`,
|
|
207
|
+
"Content-Type": "application/json",
|
|
208
|
+
},
|
|
209
|
+
body: JSON.stringify({
|
|
210
|
+
model: "tts-1",
|
|
211
|
+
voice: ttsConfig.voice,
|
|
212
|
+
input: phrase,
|
|
213
|
+
response_format: "pcm",
|
|
214
|
+
speed: 1.1,
|
|
215
|
+
}),
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
if (res.ok) {
|
|
219
|
+
const buf = Buffer.from(await res.arrayBuffer());
|
|
220
|
+
writeFileSync(filePath, buf);
|
|
221
|
+
ackCache.set(phrase, filePath);
|
|
222
|
+
generated++;
|
|
223
|
+
log(`cached: "${phrase}"`);
|
|
224
|
+
}
|
|
225
|
+
} catch (e: any) {
|
|
226
|
+
log(`cache failed for "${phrase}": ${e.message}`);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
log(`voice cache: ${cached} hit, ${generated} generated, ${allPhrases.length} total`);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/** Play a pre-cached audio file. Near-instant — no API call. */
|
|
233
|
+
async function playCached(phrase: string): Promise<number> {
|
|
234
|
+
const start = performance.now();
|
|
235
|
+
const filePath = ackCache.get(phrase);
|
|
236
|
+
|
|
237
|
+
if (!filePath) {
|
|
238
|
+
log(`playCached: cache miss for "${phrase}", falling back to TTS`);
|
|
239
|
+
return streamSpeak(phrase);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
log(`playing cached: "${phrase}"`);
|
|
243
|
+
return new Promise((resolve, reject) => {
|
|
244
|
+
const player = spawn("ffplay", [
|
|
245
|
+
"-nodisp", "-autoexit", "-loglevel", "quiet",
|
|
246
|
+
"-f", "s16le", "-ar", "24000", "-ch_layout", "mono",
|
|
247
|
+
filePath,
|
|
248
|
+
], { stdio: ["ignore", "ignore", "pipe"] });
|
|
249
|
+
|
|
250
|
+
let stderr = "";
|
|
251
|
+
player.stderr?.on("data", (d: Buffer) => { stderr += d.toString(); });
|
|
252
|
+
|
|
253
|
+
player.on("close", (code: number) => {
|
|
254
|
+
const ms = Math.round(performance.now() - start);
|
|
255
|
+
if (code !== 0) log(`ffplay error (code ${code}): ${stderr.slice(0, 100)}`);
|
|
256
|
+
else log(`played "${phrase}" in ${ms}ms`);
|
|
257
|
+
resolve(ms);
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
player.on("error", (err: Error) => {
|
|
261
|
+
log(`ffplay spawn error: ${err.message}`);
|
|
262
|
+
reject(err);
|
|
263
|
+
});
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/** Play a random ack phrase from cache. */
|
|
268
|
+
function playAck(): Promise<number> {
|
|
269
|
+
const phrase = ACK_PHRASES[Math.floor(Math.random() * ACK_PHRASES.length)];
|
|
270
|
+
return playCached(phrase);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/** Play the right confirmation for an action. */
|
|
274
|
+
function playConfirm(intent: string): Promise<number> {
|
|
275
|
+
const map: Record<string, string> = {
|
|
276
|
+
tile_window: "Tiled.",
|
|
277
|
+
focus: "Focused.",
|
|
278
|
+
distribute: "Distributed.",
|
|
279
|
+
search: "Searching.",
|
|
280
|
+
switch_layer: "Switched.",
|
|
281
|
+
create_layer: "Done.",
|
|
282
|
+
};
|
|
283
|
+
return playCached(map[intent] ?? "Done.");
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// Warm up cache on startup
|
|
287
|
+
ensureVoiceCache().then(() => log("voice cache ready"));
|
|
288
|
+
|
|
289
|
+
log("worker started, streaming TTS ready");
|
|
290
|
+
|
|
291
|
+
// ── Load system prompt once ────────────────────────────────────────
|
|
292
|
+
|
|
293
|
+
const systemPrompt = buildAssistantSystemPrompt();
|
|
294
|
+
log("system prompt loaded");
|
|
295
|
+
log(`voice inference: ${voiceInference.provider}/${voiceInference.model}`);
|
|
296
|
+
|
|
297
|
+
// ── Auto-restart on file changes ───────────────────────────────────
|
|
298
|
+
|
|
299
|
+
const watchFiles = [
|
|
300
|
+
assistantPromptPath,
|
|
301
|
+
join(import.meta.dir, "assistant-intelligence.ts"),
|
|
302
|
+
join(import.meta.dir, "..", ".env"),
|
|
303
|
+
join(import.meta.dir, "..", ".env.local"),
|
|
304
|
+
import.meta.path, // this script itself
|
|
305
|
+
];
|
|
306
|
+
|
|
307
|
+
for (const f of watchFiles) {
|
|
308
|
+
try {
|
|
309
|
+
const { watch } = await import("fs");
|
|
310
|
+
let debounce: ReturnType<typeof setTimeout> | null = null;
|
|
311
|
+
watch(f, () => {
|
|
312
|
+
if (debounce) return;
|
|
313
|
+
debounce = setTimeout(() => {
|
|
314
|
+
log(`file changed: ${f.split("/").pop()} — exiting for restart`);
|
|
315
|
+
process.exit(0); // Swift auto-restarts in 2s
|
|
316
|
+
}, 500);
|
|
317
|
+
});
|
|
318
|
+
log(`watching: ${f.split("/").pop()}`);
|
|
319
|
+
} catch {}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// ── Command loop ───────────────────────────────────────────────────
|
|
323
|
+
|
|
324
|
+
const decoder = new TextDecoder();
|
|
325
|
+
const reader = Bun.stdin.stream().getReader();
|
|
326
|
+
let buffer = "";
|
|
327
|
+
|
|
328
|
+
async function processLine(line: string) {
|
|
329
|
+
const trimmed = line.trim();
|
|
330
|
+
if (!trimmed) return;
|
|
331
|
+
|
|
332
|
+
let cmd: any;
|
|
333
|
+
try {
|
|
334
|
+
cmd = JSON.parse(trimmed);
|
|
335
|
+
} catch {
|
|
336
|
+
respond({ ok: false, error: "invalid JSON" });
|
|
337
|
+
return;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
switch (cmd.cmd) {
|
|
341
|
+
case "ping":
|
|
342
|
+
respond({ ok: true, data: { pong: true } });
|
|
343
|
+
break;
|
|
344
|
+
|
|
345
|
+
case "speak":
|
|
346
|
+
try {
|
|
347
|
+
const ms = await streamSpeak(cmd.text);
|
|
348
|
+
log(`spoke "${cmd.text.slice(0, 40)}" in ${ms}ms`);
|
|
349
|
+
respond({ ok: true, data: { durationMs: ms } });
|
|
350
|
+
} catch (err: any) {
|
|
351
|
+
log(`TTS error: ${err.message}`);
|
|
352
|
+
respond({ ok: false, error: err.message });
|
|
353
|
+
}
|
|
354
|
+
break;
|
|
355
|
+
|
|
356
|
+
case "ack":
|
|
357
|
+
// Fire and forget — respond immediately, speak in background
|
|
358
|
+
respond({ ok: true, data: { queued: true } });
|
|
359
|
+
streamSpeak(cmd.text).catch((e) => log(`ack TTS error: ${e.message}`));
|
|
360
|
+
break;
|
|
361
|
+
|
|
362
|
+
case "play_cached":
|
|
363
|
+
respond({ ok: true, data: { queued: true, cached: true } });
|
|
364
|
+
playCached(cmd.text).catch((e) => log(`play_cached error: ${e.message}`));
|
|
365
|
+
break;
|
|
366
|
+
|
|
367
|
+
case "infer":
|
|
368
|
+
try {
|
|
369
|
+
const localPlan = tryLocalAssistantPlan(cmd.transcript, cmd.snapshot ?? {});
|
|
370
|
+
if (localPlan) {
|
|
371
|
+
respond({ ok: true, data: localPlan });
|
|
372
|
+
break;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
const userMessage = buildAssistantContextMessage(cmd.transcript, cmd.snapshot ?? {});
|
|
376
|
+
|
|
377
|
+
const messages = (cmd.history ?? []).map((h: any) => ({
|
|
378
|
+
role: h.role as "user" | "assistant",
|
|
379
|
+
content: h.content,
|
|
380
|
+
}));
|
|
381
|
+
|
|
382
|
+
const { data, raw } = await inferSmart(userMessage, {
|
|
383
|
+
provider: voiceInference.provider,
|
|
384
|
+
model: voiceInference.model,
|
|
385
|
+
system: systemPrompt,
|
|
386
|
+
messages,
|
|
387
|
+
temperature: 0.2,
|
|
388
|
+
maxTokens: 512,
|
|
389
|
+
tag: "hands-off",
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
const plan = normalizeAssistantPlan(data, cmd.transcript);
|
|
393
|
+
respond({
|
|
394
|
+
ok: true,
|
|
395
|
+
data: {
|
|
396
|
+
...plan,
|
|
397
|
+
_meta: {
|
|
398
|
+
...plan._meta,
|
|
399
|
+
provider: raw.provider,
|
|
400
|
+
model: raw.model,
|
|
401
|
+
durationMs: raw.durationMs,
|
|
402
|
+
tokens: raw.usage?.totalTokens,
|
|
403
|
+
},
|
|
404
|
+
},
|
|
405
|
+
});
|
|
406
|
+
} catch (err: any) {
|
|
407
|
+
respond({
|
|
408
|
+
ok: false,
|
|
409
|
+
error: err.message,
|
|
410
|
+
data: {
|
|
411
|
+
actions: [],
|
|
412
|
+
spoken: "Sorry, I had trouble processing that.",
|
|
413
|
+
},
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
break;
|
|
417
|
+
|
|
418
|
+
case "turn": {
|
|
419
|
+
// Full orchestrated turn — parallel where possible.
|
|
420
|
+
//
|
|
421
|
+
// Timeline:
|
|
422
|
+
// t=0 ──┬── ack TTS (fire & forget)
|
|
423
|
+
// └── model inference
|
|
424
|
+
// t=~600ms ─┬── narrate TTS (what we're doing)
|
|
425
|
+
// └── execute actions (in parallel with narrate)
|
|
426
|
+
// t=done ── respond with results
|
|
427
|
+
//
|
|
428
|
+
const turnStart = performance.now();
|
|
429
|
+
const transcript = cmd.transcript;
|
|
430
|
+
const snap = cmd.snapshot ?? {};
|
|
431
|
+
const history = cmd.history ?? [];
|
|
432
|
+
|
|
433
|
+
log(`⏱ turn start: "${transcript.slice(0, 50)}"`);
|
|
434
|
+
|
|
435
|
+
// Fire cached ack sound + inference in PARALLEL
|
|
436
|
+
const ackPromise = playAck().catch((e) => log(`ack error: ${e.message}`));
|
|
437
|
+
|
|
438
|
+
const messages = history.map((h: any) => ({
|
|
439
|
+
role: h.role as "user" | "assistant",
|
|
440
|
+
content: typeof h.content === "string" ? h.content : JSON.stringify(h.content),
|
|
441
|
+
})).filter((m: any) => m.content && m.content.length > 0);
|
|
442
|
+
|
|
443
|
+
let inferResult: any = null;
|
|
444
|
+
const localPlan = tryLocalAssistantPlan(transcript, snap);
|
|
445
|
+
if (localPlan) {
|
|
446
|
+
inferResult = localPlan;
|
|
447
|
+
log("local planner matched");
|
|
448
|
+
} else {
|
|
449
|
+
const userMessage = buildAssistantContextMessage(transcript, snap);
|
|
450
|
+
try {
|
|
451
|
+
const { data, raw } = await inferSmart(userMessage, {
|
|
452
|
+
provider: voiceInference.provider,
|
|
453
|
+
model: voiceInference.model,
|
|
454
|
+
system: systemPrompt,
|
|
455
|
+
messages,
|
|
456
|
+
temperature: 0.2,
|
|
457
|
+
maxTokens: 512,
|
|
458
|
+
tag: "hands-off",
|
|
459
|
+
});
|
|
460
|
+
const plan = normalizeAssistantPlan(data, transcript);
|
|
461
|
+
inferResult = { ...plan, _meta: { ...plan._meta, provider: raw.provider, model: raw.model, durationMs: raw.durationMs, tokens: raw.usage?.totalTokens } };
|
|
462
|
+
log(`⏱ inference done in ${raw.durationMs}ms`);
|
|
463
|
+
} catch (err: any) {
|
|
464
|
+
log(`⏱ inference error: ${err.message}`);
|
|
465
|
+
inferResult = { actions: [], spoken: "Sorry, I had trouble with that.", _meta: { error: err.message } };
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Wait for ack to finish before narrating (don't overlap speech)
|
|
470
|
+
await ackPromise;
|
|
471
|
+
|
|
472
|
+
// Step 2: Narrate + execute in PARALLEL
|
|
473
|
+
const hasActions = Array.isArray(inferResult.actions) && inferResult.actions.length > 0;
|
|
474
|
+
const spokenText = inferResult.spoken;
|
|
475
|
+
|
|
476
|
+
if (hasActions && spokenText) {
|
|
477
|
+
// SPEAK FIRST — user must hear what's about to happen before windows move
|
|
478
|
+
log(`⏱ narrating: "${spokenText.slice(0, 50)}"`);
|
|
479
|
+
await streamSpeak(spokenText).catch((e) => log(`narrate error: ${e.message}`));
|
|
480
|
+
|
|
481
|
+
// NOW respond with actions — Swift executes after user heard the plan
|
|
482
|
+
const turnMs = Math.round(performance.now() - turnStart);
|
|
483
|
+
log(`⏱ turn response at ${turnMs}ms — actions sent after narration`);
|
|
484
|
+
respond({ ok: true, data: inferResult, turnMs });
|
|
485
|
+
|
|
486
|
+
// Confirm
|
|
487
|
+
await playCached("Done.").catch(() => {});
|
|
488
|
+
} else if (spokenText) {
|
|
489
|
+
// Conversation only — speak and respond
|
|
490
|
+
await streamSpeak(spokenText).catch((e) => log(`speak error: ${e.message}`));
|
|
491
|
+
const turnMs = Math.round(performance.now() - turnStart);
|
|
492
|
+
respond({ ok: true, data: inferResult, turnMs });
|
|
493
|
+
} else {
|
|
494
|
+
const turnMs = Math.round(performance.now() - turnStart);
|
|
495
|
+
respond({ ok: true, data: inferResult, turnMs });
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
const totalMs = Math.round(performance.now() - turnStart);
|
|
499
|
+
log(`⏱ turn complete: ${totalMs}ms total`);
|
|
500
|
+
break;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
default:
|
|
504
|
+
respond({ ok: false, error: `unknown command: ${cmd.cmd}` });
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Read stdin line by line
|
|
509
|
+
(async () => {
|
|
510
|
+
while (true) {
|
|
511
|
+
const { done, value } = await reader.read();
|
|
512
|
+
if (done) break;
|
|
513
|
+
|
|
514
|
+
buffer += decoder.decode(value, { stream: true });
|
|
515
|
+
const lines = buffer.split("\n");
|
|
516
|
+
buffer = lines.pop() ?? "";
|
|
517
|
+
|
|
518
|
+
for (const line of lines) {
|
|
519
|
+
await processLine(line);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
})();
|
|
523
|
+
|
|
524
|
+
function respond(obj: any) {
|
|
525
|
+
console.log(JSON.stringify(obj));
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
function log(msg: string) {
|
|
529
|
+
const ts = new Date().toISOString().slice(11, 23);
|
|
530
|
+
console.error(`[${ts}] handsoff-worker: ${msg}`);
|
|
531
|
+
}
|