@lattices/cli 0.4.1 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +3 -0
  2. package/app/Info.plist +2 -2
  3. package/app/Lattices.app/Contents/Info.plist +2 -2
  4. package/app/Lattices.app/Contents/MacOS/Lattices +0 -0
  5. package/app/Package.swift +6 -0
  6. package/app/Sources/ActionRow.swift +43 -26
  7. package/app/Sources/App.swift +10 -0
  8. package/app/Sources/AppDelegate.swift +91 -30
  9. package/app/Sources/AppShellView.swift +2 -0
  10. package/app/Sources/AppTypeClassifier.swift +36 -0
  11. package/app/Sources/AppUpdater.swift +92 -0
  12. package/app/Sources/CheatSheetHUD.swift +1 -0
  13. package/app/Sources/CliActionLauncher.swift +50 -0
  14. package/app/Sources/CommandModeView.swift +4 -24
  15. package/app/Sources/CompanionActivityLog.swift +70 -0
  16. package/app/Sources/CompanionKeyboardController.swift +141 -0
  17. package/app/Sources/DesktopModel.swift +4 -0
  18. package/app/Sources/HandsOffSession.swift +53 -16
  19. package/app/Sources/HomeDashboardView.swift +18 -10
  20. package/app/Sources/HotkeyStore.swift +8 -5
  21. package/app/Sources/IntentEngine.swift +7 -1
  22. package/app/Sources/LatticesApi.swift +125 -4
  23. package/app/Sources/LatticesCompanionBridgeServer.swift +438 -0
  24. package/app/Sources/LatticesCompanionCockpit.swift +555 -0
  25. package/app/Sources/LatticesCompanionSecurityCoordinator.swift +594 -0
  26. package/app/Sources/LatticesCompanionTrackpadController.swift +204 -0
  27. package/app/Sources/LatticesDeckHost.swift +1463 -0
  28. package/app/Sources/LatticesRuntime.swift +61 -0
  29. package/app/Sources/MainView.swift +398 -186
  30. package/app/Sources/MouseFinder.swift +335 -30
  31. package/app/Sources/MouseGestureConfig.swift +364 -0
  32. package/app/Sources/MouseGestureController.swift +1203 -0
  33. package/app/Sources/MouseInputDeviceStore.swift +98 -0
  34. package/app/Sources/MouseInputEventViewer.swift +272 -0
  35. package/app/Sources/MouseShortcutStore.swift +107 -0
  36. package/app/Sources/OmniSearchView.swift +136 -2
  37. package/app/Sources/OmniSearchWindow.swift +65 -5
  38. package/app/Sources/OnboardingView.swift +30 -16
  39. package/app/Sources/PaletteCommand.swift +26 -6
  40. package/app/Sources/PermissionChecker.swift +76 -2
  41. package/app/Sources/PiAuthNextStepCard.swift +148 -0
  42. package/app/Sources/PiAuthPromptCard.swift +90 -0
  43. package/app/Sources/PiChatDock.swift +137 -74
  44. package/app/Sources/PiChatSession.swift +608 -108
  45. package/app/Sources/PiInstallCallout.swift +86 -0
  46. package/app/Sources/PiProviderSetupCallout.swift +99 -0
  47. package/app/Sources/PiWorkspaceView.swift +174 -77
  48. package/app/Sources/Preferences.swift +78 -0
  49. package/app/Sources/ScreenMapState.swift +91 -31
  50. package/app/Sources/ScreenMapView.swift +510 -524
  51. package/app/Sources/ScreenMapWindowController.swift +12 -4
  52. package/app/Sources/SettingsView.swift +869 -152
  53. package/app/Sources/SystemTelemetryMonitor.swift +273 -0
  54. package/app/Sources/VoiceCommandWindow.swift +23 -2
  55. package/app/Sources/WindowDragSnapController.swift +628 -0
  56. package/app/Sources/WindowTiler.swift +328 -65
  57. package/app/Sources/WorkspaceManager.swift +288 -0
  58. package/bin/assistant-intelligence.ts +874 -0
  59. package/bin/handsoff-infer.ts +16 -209
  60. package/bin/handsoff-worker.ts +45 -258
  61. package/bin/lattices-app.ts +65 -1
  62. package/bin/lattices-dev +4 -0
  63. package/bin/lattices.ts +125 -14
  64. package/docs/agents.md +14 -0
  65. package/docs/api.md +55 -0
  66. package/docs/app.md +3 -0
  67. package/docs/companion-deck.md +180 -0
  68. package/docs/config.md +25 -0
  69. package/docs/tiling-reference.md +55 -0
  70. package/docs/voice-error-model.md +73 -0
  71. package/package.json +4 -2
@@ -8,10 +8,13 @@
8
8
  * All logging goes to stderr so it doesn't pollute the JSON output.
9
9
  */
10
10
 
11
+ import {
12
+ buildAssistantContextMessage,
13
+ buildAssistantSystemPrompt,
14
+ normalizeAssistantPlan,
15
+ tryLocalAssistantPlan,
16
+ } from "./assistant-intelligence.ts";
11
17
  import { inferJSON } from "../lib/infer.ts";
12
- import { readFileSync } from "fs";
13
- import { join, dirname } from "path";
14
- import { homedir } from "os";
15
18
 
16
19
  // ── Read input from stdin ──────────────────────────────────────────
17
20
 
@@ -30,213 +33,15 @@ const req = JSON.parse(input) as {
30
33
  history?: Array<{ role: "user" | "assistant"; content: string }>;
31
34
  };
32
35
 
33
- // ── Load system prompt from file ───────────────────────────────────
36
+ const transcript = req.transcript ?? "";
37
+ const systemPrompt = buildAssistantSystemPrompt();
38
+ const userMessage = buildAssistantContextMessage(transcript, req.snapshot ?? {});
34
39
 
35
- const promptDir = join(dirname(import.meta.dir), "docs", "prompts");
36
- let systemPrompt: string;
37
- try {
38
- systemPrompt = readFileSync(join(promptDir, "hands-off-system.md"), "utf-8")
39
- .split("\n")
40
- .filter((l) => !l.startsWith("# "))
41
- .join("\n")
42
- .trim();
43
- } catch {
44
- systemPrompt = "You are a workspace assistant. Respond with JSON: {actions, spoken}.";
45
- }
46
-
47
- // Replace {{intent_catalog}} with the actual tiling reference
48
- const intentCatalog = `
49
- tile_window: Tile a window to a screen position
50
- Slots:
51
- position (required): Named position or grid:CxR:C,R syntax.
52
- Halves: left, right, top, bottom
53
- Quarters (2x2): top-left, top-right, bottom-left, bottom-right
54
- Thirds (3x1): left-third, center-third, right-third
55
- Sixths (3x2): top-left-third, top-center-third, top-right-third, bottom-left-third, bottom-center-third, bottom-right-third
56
- Fourths (4x1): first-fourth, second-fourth, third-fourth, last-fourth
57
- Eighths (4x2): top-first-fourth, top-second-fourth, top-third-fourth, top-last-fourth, bottom-first-fourth, bottom-second-fourth, bottom-third-fourth, bottom-last-fourth
58
- Special: maximize (full screen), center (centered floating)
59
- Grid syntax: grid:CxR:C,R (e.g. grid:5x3:2,1 = center cell of 5x3 grid)
60
- app (optional): Target app name — match loosely (e.g. "chrome" matches "Google Chrome")
61
- wid (optional): Target window ID (from snapshot)
62
- session (optional): Tmux session name
63
- If no app/wid/session given, tiles the frontmost window.
64
- "quarter" = 2x2 cell (top-left etc.), NOT a 4x1 fourth.
65
- "top quarter" = top-left or top-right (2x2). "top third" = top-left-third (3x2).
66
- Examples: "tile chrome left" → {intent:"tile_window", slots:{app:"chrome", position:"left"}}
67
-
68
- focus: Focus a window, app, or session
69
- Slots:
70
- app (optional): App name to focus
71
- session (optional): Session name to focus
72
- wid (optional): Window ID to focus
73
-
74
- distribute: Arrange windows in an even grid — with optional app filter and region constraint
75
- Slots:
76
- app (optional): Filter to windows of this app (e.g. "iTerm2", "Google Chrome"). Without this, distributes ALL visible windows.
77
- region (optional): Constrain the grid to a screen region. Uses the same position names as tile_window:
78
- Halves: left, right, top, bottom
79
- Quarters: top-left, top-right, bottom-left, bottom-right
80
- Thirds: left-third, center-third, right-third
81
- Without this, uses the full screen.
82
- Examples:
83
- "grid the terminals on the right" → {intent:"distribute", slots:{app:"iTerm2", region:"right"}}
84
- "organize my chrome windows in the bottom half" → {intent:"distribute", slots:{app:"Google Chrome", region:"bottom"}}
85
- "spread everything out" → {intent:"distribute", slots:{}}
86
- "tile all terminals" → {intent:"distribute", slots:{app:"iTerm2"}}
87
-
88
- swap: Swap the positions of two windows
89
- Slots:
90
- wid_a (required): Window ID of the first window (from snapshot)
91
- wid_b (required): Window ID of the second window (from snapshot)
92
- Examples:
93
- "swap Chrome and iTerm" → {intent:"swap", slots:{wid_a:12345, wid_b:67890}}
94
-
95
- hide: Hide or minimize a window or app
96
- Slots:
97
- app (optional): App name to hide (hides the entire app)
98
- wid (optional): Window ID to minimize (minimizes just that window)
99
- Use app to hide all windows of an app. Use wid to minimize a single window.
100
- Examples:
101
- "hide Slack" → {intent:"hide", slots:{app:"Slack"}}
102
- "minimize that" → {intent:"hide", slots:{wid:12345}}
103
-
104
- highlight: Flash a window's border to identify it visually
105
- Slots:
106
- wid (optional): Window ID to highlight (from snapshot)
107
- app (optional): App name to highlight
108
- Use when the user asks "which one is that?" or wants to visually identify a window.
109
- Examples:
110
- "show me the lattices terminal" → {intent:"highlight", slots:{wid:12345}}
111
- "which one is Chrome?" → {intent:"highlight", slots:{app:"Google Chrome"}}
112
-
113
- move_to_display: Move a window to another monitor/display
114
- Slots:
115
- display (required): Target display index (0 = main/primary, 1 = second, etc.)
116
- wid (optional): Window ID to move (from snapshot)
117
- app (optional): App name to move
118
- position (optional): Tile position on the target display (e.g. "left", "maximize")
119
- If no wid/app given, moves the frontmost window.
120
- Examples:
121
- "put this on my second monitor" → {intent:"move_to_display", slots:{wid:12345, display:1}}
122
- "move Chrome to the main screen" → {intent:"move_to_display", slots:{app:"Google Chrome", display:0}}
123
- "send iTerm to the other monitor, left half" → {intent:"move_to_display", slots:{app:"iTerm2", display:1, position:"left"}}
124
-
125
- undo: Undo the last window move — restore windows to their previous positions
126
- No slots needed.
127
- Examples:
128
- "put it back" → {intent:"undo"}
129
- "undo that" → {intent:"undo"}
130
-
131
- search: Search windows by text
132
- Slots:
133
- query (required): Search text
134
- Examples:
135
- "find the error message" → {intent:"search", slots:{query:"error"}}
136
- "find all terminal windows" → {intent:"search", slots:{query:"terminal"}}
137
-
138
- list_windows: List all visible windows
139
- No slots needed. Use when the user asks "what's on screen?" or "what windows do I have?"
140
-
141
- list_sessions: List active terminal sessions
142
- No slots needed. Use when the user asks "what sessions are running?" or "show my projects."
143
-
144
- switch_layer: Switch to a workspace layer
145
- Slots:
146
- layer (required): Layer name or index
147
- Examples:
148
- "switch to the web layer" → {intent:"switch_layer", slots:{layer:"web"}}
149
- "go to layer 2" → {intent:"switch_layer", slots:{layer:"2"}}
150
-
151
- create_layer: Save current window arrangement as a named layer
152
- Slots:
153
- name (required): Layer name
154
- Examples:
155
- "save this layout as review" → {intent:"create_layer", slots:{name:"review"}}
156
-
157
- launch: Launch a project session
158
- Slots:
159
- project (required): Project name or path
160
- Examples:
161
- "open my frontend project" → {intent:"launch", slots:{project:"frontend"}}
162
- "start working on lattices" → {intent:"launch", slots:{project:"lattices"}}
163
-
164
- kill: Kill a terminal session
165
- Slots:
166
- session (required): Session name or project name
167
- Examples:
168
- "stop the frontend session" → {intent:"kill", slots:{session:"frontend"}}
169
-
170
- scan: Trigger an immediate screen text scan (OCR)
171
- No slots needed. Use when the user asks you to read or scan screen content.
172
-
173
- CHOOSING THE RIGHT INTENT:
174
- Positioning:
175
- tile_window = position ONE specific window at a specific spot. Use for 1-6 named windows.
176
- distribute = auto-grid MANY windows. Use when the user says "all", "my terminals", "everything", or names more windows than the 6-action limit.
177
- distribute with app+region is the most powerful combo: "grid my terminals on the right" → distribute(app:"iTerm2", region:"right")
178
- Rearranging:
179
- swap = exchange positions of exactly two windows. "swap Chrome and iTerm"
180
- move_to_display = move a window to a different monitor. "put this on my other screen"
181
- Visibility:
182
- hide = hide an app or minimize a window. "hide Slack", "minimize that"
183
- highlight = flash a window's border to identify it. "which one is the lattices terminal?"
184
- focus = bring a window to the front. "focus Slack", "show me Chrome"
185
- Recovery:
186
- undo = restore previous positions after a move. "put it back", "undo that"
187
- Information:
188
- list_windows, list_sessions, search = answer questions about the desktop. NO actions needed for pure questions.
189
- Session lifecycle:
190
- launch = start a project session. "open my frontend project"
191
- kill = stop a session. "kill the API"
192
-
193
- TILING PRESETS (use multiple tile_window actions):
194
- "split screen" / "side by side" → left + right
195
- "thirds" → left-third, center-third, right-third
196
- "main + sidebar" → main app left (or maximize), others stacked right
197
- "stack" → top + bottom
198
- "corners" / "quadrants" → top-left, top-right, bottom-left, bottom-right
199
- "six-up" / "3 by 2" → 3x2 grid using sixth positions
200
- "eight-up" / "4 by 2" → 4x2 grid using eighth positions
201
-
202
- TILING PRESETS (use distribute intent):
203
- "mosaic" / "grid" / "spread out" → distribute (all windows, full screen)
204
- "grid the terminals" → distribute with app:"iTerm2"
205
- "terminals on the right" → distribute with app:"iTerm2", region:"right"
206
- "organize chrome on the left" → distribute with app:"Google Chrome", region:"left"
207
- `;
208
-
209
- systemPrompt = systemPrompt.replace("{{intent_catalog}}", intentCatalog);
210
-
211
- // ── Build the per-turn message ─────────────────────────────────────
212
-
213
- let userMessage = `USER: "${req.transcript}"\n\n`;
214
- userMessage += "--- DESKTOP SNAPSHOT ---\n";
215
-
216
- const snap = req.snapshot;
217
- if (snap.stageManager) {
218
- userMessage += `Stage Manager: ON (grouping: ${snap.smGrouping ?? "all-at-once"})\n\n`;
219
- userMessage += `Active stage (${snap.activeStage?.length ?? 0} windows):\n`;
220
- for (const w of snap.activeStage ?? []) {
221
- userMessage += ` [${w.wid}] ${w.app}: "${w.title}" — ${w.frame}\n`;
222
- }
223
- userMessage += `\nStrip: ${snap.stripApps?.join(", ") ?? "none"}\n`;
224
- userMessage += `Other stages: ${snap.hiddenApps?.join(", ") ?? "none"}\n`;
225
- } else {
226
- userMessage += "Stage Manager: OFF\n";
227
- userMessage += `Visible windows (${snap.activeStage?.length ?? 0}):\n`;
228
- for (const w of snap.activeStage ?? []) {
229
- userMessage += ` [${w.wid}] ${w.app}: "${w.title}" — ${w.frame}\n`;
230
- }
231
- }
232
-
233
- if (snap.currentLayer) {
234
- userMessage += `\nCurrent layer: ${snap.currentLayer}\n`;
235
- }
236
- if (snap.screen) {
237
- userMessage += `Screen: ${snap.screen}\n`;
40
+ const localPlan = tryLocalAssistantPlan(transcript, req.snapshot ?? {});
41
+ if (localPlan) {
42
+ console.log(JSON.stringify(localPlan));
43
+ process.exit(0);
238
44
  }
239
- userMessage += "--- END SNAPSHOT ---\n";
240
45
 
241
46
  // ── Call inference ──────────────────────────────────────────────────
242
47
 
@@ -257,9 +62,11 @@ try {
257
62
  });
258
63
 
259
64
  // Output result as JSON to stdout
65
+ const plan = normalizeAssistantPlan(data, transcript);
260
66
  const output = {
261
- ...data,
67
+ ...plan,
262
68
  _meta: {
69
+ ...plan._meta,
263
70
  provider: raw.provider,
264
71
  model: raw.model,
265
72
  durationMs: raw.durationMs,
@@ -16,7 +16,14 @@
16
16
  * {"ok":false,"error":"..."}
17
17
  */
18
18
 
19
- import { infer, inferJSON } from "../lib/infer.ts";
19
+ import {
20
+ assistantPromptPath,
21
+ buildAssistantContextMessage,
22
+ buildAssistantSystemPrompt,
23
+ normalizeAssistantPlan,
24
+ tryLocalAssistantPlan,
25
+ } from "./assistant-intelligence.ts";
26
+ import { infer } from "../lib/infer.ts";
20
27
 
21
28
  const INFER_TIMEOUT_MS = 15_000;
22
29
 
@@ -55,7 +62,7 @@ async function inferSmart(prompt: string, options: any): Promise<{ data: any; ra
55
62
  };
56
63
  }
57
64
  import { readFileSync } from "fs";
58
- import { join, dirname } from "path";
65
+ import { join } from "path";
59
66
  import { spawn } from "child_process";
60
67
 
61
68
  // ── Streaming TTS via OpenAI API → ffplay ──────────────────────────
@@ -275,100 +282,6 @@ function playConfirm(intent: string): Promise<number> {
275
282
  return playCached(map[intent] ?? "Done.");
276
283
  }
277
284
 
278
- // ── Fast path: local intent matching (no LLM needed) ──────────────
279
-
280
- interface FastMatch {
281
- actions: Array<{ intent: string; slots: Record<string, string> }>;
282
- confirm: string; // which confirmation to play
283
- }
284
-
285
- function tryFastMatch(transcript: string, snapshot: any): FastMatch | null {
286
- const t = transcript.toLowerCase().trim();
287
- const activeApps = (snapshot.activeStage ?? []).map((w: any) => ({
288
- app: w.app as string,
289
- wid: w.wid as number,
290
- }));
291
-
292
- // Tile patterns
293
- const tileMatch = t.match(
294
- /(?:tile|snap|put|move)\s+(\w+)\s+(?:to\s+)?(?:the\s+)?(left|right|top|bottom|maximize|center|top.?left|top.?right|bottom.?left|bottom.?right|left.?third|center.?third|right.?third)/
295
- );
296
- if (tileMatch) {
297
- const app = tileMatch[1];
298
- const pos = tileMatch[2].replace(/\s+/g, "-");
299
- return {
300
- actions: [{ intent: "tile_window", slots: { app, position: pos } }],
301
- confirm: "tile_window",
302
- };
303
- }
304
-
305
- // Split screen: "split X and Y" or "X left Y right"
306
- const splitMatch = t.match(/split\s+(\w+)\s+(?:and|&)\s+(\w+)/);
307
- if (splitMatch) {
308
- return {
309
- actions: [
310
- { intent: "tile_window", slots: { app: splitMatch[1], position: "left" } },
311
- { intent: "tile_window", slots: { app: splitMatch[2], position: "right" } },
312
- ],
313
- confirm: "tile_window",
314
- };
315
- }
316
-
317
- // Focus: "focus X" / "focus on X" / "switch to X" / "go to X"
318
- const focusMatch = t.match(/(?:focus(?:\s+on)?|switch\s+to|go\s+to|show)\s+(?:the\s+)?(?:on\s+)?(\w+)/);
319
- if (focusMatch && !t.includes("tile") && !t.includes("split")) {
320
- const app = focusMatch[1];
321
- if (app && app !== "on" && app !== "the") {
322
- return {
323
- actions: [{ intent: "focus", slots: { app } }],
324
- confirm: "focus",
325
- };
326
- }
327
- }
328
-
329
- // Maximize: "maximize" / "full screen" / "make it big"
330
- if (/maximize|full\s*screen|make\s+it\s+big/.test(t)) {
331
- return {
332
- actions: [{ intent: "tile_window", slots: { position: "maximize" } }],
333
- confirm: "tile_window",
334
- };
335
- }
336
-
337
- // Distribute: "grid" / "mosaic" / "distribute" / "even"
338
- if (/grid|mosaic|distribute|even\s+(?:out|grid)|arrange/.test(t)) {
339
- return {
340
- actions: [{ intent: "distribute", slots: {} }],
341
- confirm: "distribute",
342
- };
343
- }
344
-
345
- // Corners: "quadrants" / "four corners"
346
- if (/quadrants?|four\s+corners?|corners/.test(t) && activeApps.length >= 4) {
347
- const positions = ["top-left", "top-right", "bottom-left", "bottom-right"];
348
- return {
349
- actions: activeApps.slice(0, 4).map((a: any, i: number) => ({
350
- intent: "tile_window",
351
- slots: { app: a.app, position: positions[i] },
352
- })),
353
- confirm: "tile_window",
354
- };
355
- }
356
-
357
- // Thirds: "thirds"
358
- if (/thirds/.test(t) && activeApps.length >= 3) {
359
- const positions = ["left-third", "center-third", "right-third"];
360
- return {
361
- actions: activeApps.slice(0, 3).map((a: any, i: number) => ({
362
- intent: "tile_window",
363
- slots: { app: a.app, position: positions[i] },
364
- })),
365
- confirm: "tile_window",
366
- };
367
- }
368
-
369
- return null; // No fast match — fall through to LLM
370
- }
371
-
372
285
  // Warm up cache on startup
373
286
  ensureVoiceCache().then(() => log("voice cache ready"));
374
287
 
@@ -376,70 +289,14 @@ log("worker started, streaming TTS ready");
376
289
 
377
290
  // ── Load system prompt once ────────────────────────────────────────
378
291
 
379
- const promptDir = join(dirname(import.meta.dir), "docs", "prompts");
380
- let systemPrompt: string;
381
- try {
382
- systemPrompt = readFileSync(join(promptDir, "hands-off-system.md"), "utf-8")
383
- .split("\n")
384
- .filter((l) => !l.startsWith("# "))
385
- .join("\n")
386
- .trim();
387
- } catch {
388
- systemPrompt = "You are a workspace assistant. Respond with JSON: {actions, spoken}.";
389
- }
390
-
391
- const intentCatalog = `
392
- tile_window: Tile a window to a screen position
393
- Slots:
394
- position (required): Named position or grid:CxR:C,R syntax.
395
- Halves: left, right, top, bottom
396
- Quarters (2x2): top-left, top-right, bottom-left, bottom-right
397
- Thirds (3x1): left-third, center-third, right-third
398
- Sixths (3x2): top-left-third, top-center-third, top-right-third, bottom-left-third, bottom-center-third, bottom-right-third
399
- Fourths (4x1): first-fourth, second-fourth, third-fourth, last-fourth
400
- Eighths (4x2): top-first-fourth, top-second-fourth, top-third-fourth, top-last-fourth, bottom-first-fourth, bottom-second-fourth, bottom-third-fourth, bottom-last-fourth
401
- Special: maximize (full screen), center (centered floating)
402
- Grid syntax: grid:CxR:C,R (e.g. grid:5x3:2,1 = center cell of 5x3 grid)
403
- app (optional): Target app name — match loosely (e.g. "chrome" matches "Google Chrome")
404
- wid (optional): Target window ID (from snapshot)
405
- session (optional): Tmux session name
406
- If no app/wid/session given, tiles the frontmost window.
407
- "quarter" = 2x2 cell (top-left etc.), NOT a 4x1 fourth.
408
- "top quarter" = top-left or top-right (2x2). "top third" = top-left-third (3x2).
409
-
410
- focus: Focus a window, app, or session
411
- Slots: app, session, or wid (at least one)
412
-
413
- distribute: Arrange all visible windows in an even grid. No slots.
414
-
415
- search: Search windows by text
416
- Slots: query (required)
417
-
418
- list_windows: List all visible windows. No slots.
419
-
420
- switch_layer: Switch to a workspace layer
421
- Slots: layer (required) — name or index
422
-
423
- create_layer: Save current arrangement as a named layer
424
- Slots: name (required)
425
-
426
- TILING PRESETS (use multiple tile_window actions):
427
- "split screen" → left + right
428
- "thirds" → left-third, center-third, right-third
429
- "mosaic"/"grid" → use distribute
430
- "corners"/"quadrants" → top-left, top-right, bottom-left, bottom-right
431
- "stack" → top + bottom
432
- "six-up"/"3 by 2" → 3x2 grid using the sixth positions
433
- "eight-up"/"4 by 2" → 4x2 grid using the eighth positions
434
- `;
435
-
436
- systemPrompt = systemPrompt.replace("{{intent_catalog}}", intentCatalog);
292
+ const systemPrompt = buildAssistantSystemPrompt();
437
293
  log("system prompt loaded");
438
294
 
439
295
  // ── Auto-restart on file changes ───────────────────────────────────
440
296
 
441
297
  const watchFiles = [
442
- join(promptDir, "hands-off-system.md"),
298
+ assistantPromptPath,
299
+ join(import.meta.dir, "assistant-intelligence.ts"),
443
300
  import.meta.path, // this script itself
444
301
  ];
445
302
 
@@ -458,89 +315,6 @@ for (const f of watchFiles) {
458
315
  } catch {}
459
316
  }
460
317
 
461
- // ── Build context message from snapshot ─────────────────────────────
462
-
463
- function buildContextMessage(transcript: string, snap: any): string {
464
- let msg = `USER: "${transcript}"\n\n`;
465
- msg += "--- DESKTOP SNAPSHOT ---\n";
466
-
467
- // Screens
468
- const screens = snap.screens ?? [];
469
- if (screens.length > 1) {
470
- msg += `Displays: ${screens.map((s: any) => `${s.width}x${s.height}${s.isMain ? " (main)" : ""}`).join(", ")}\n`;
471
- } else if (screens.length === 1) {
472
- msg += `Screen: ${screens[0].width}x${screens[0].height}\n`;
473
- }
474
-
475
- // Stage Manager
476
- if (snap.stageManager) {
477
- msg += `Stage Manager: ON (grouping: ${snap.smGrouping ?? "all-at-once"})\n`;
478
- }
479
-
480
- // All windows — full inventory, ordered front-to-back (zIndex 0 = frontmost)
481
- const windows = snap.windows ?? snap.activeStage ?? [];
482
- const onScreen = windows.filter((w: any) => w.onScreen !== false);
483
- const offScreen = windows.filter((w: any) => w.onScreen === false);
484
-
485
- msg += `\nVisible windows (${onScreen.length}, front-to-back order):\n`;
486
- for (const w of onScreen) {
487
- const flags: string[] = [];
488
- if (w.zIndex === 0) flags.push("FRONTMOST");
489
- if (w.session) flags.push(`session:${w.session}`);
490
- const flagStr = flags.length ? ` [${flags.join(", ")}]` : "";
491
- msg += ` wid:${w.wid} ${w.app}: "${w.title}" — ${w.frame}${flagStr}\n`;
492
- }
493
-
494
- if (offScreen.length > 0) {
495
- // Summarize hidden windows by app instead of listing all
496
- const hiddenByApp: Record<string, number> = {};
497
- for (const w of offScreen) {
498
- const app = w.app;
499
- hiddenByApp[app] = (hiddenByApp[app] || 0) + 1;
500
- }
501
- const summary = Object.entries(hiddenByApp)
502
- .filter(([app]) => !["WindowManager", "Spotlight", "CursorUIViewService", "AutoFill", "coreautha", "loginwindow", "Open and Save Panel Service"].includes(app))
503
- .map(([app, count]) => `${app}(${count})`)
504
- .join(", ");
505
- if (summary) {
506
- msg += `\nHidden windows: ${summary}\n`;
507
- }
508
- }
509
-
510
- // Terminals — cwd, running commands, claude, tmux
511
- const terminals = snap.terminals ?? [];
512
- if (terminals.length > 0) {
513
- msg += `\nTerminal tabs (${terminals.length}):\n`;
514
- for (const t of terminals) {
515
- const flags: string[] = [];
516
- if (t.hasClaude) flags.push("Claude Code");
517
- if (t.tmuxSession) flags.push(`tmux:${t.tmuxSession}`);
518
- if (!t.isActiveTab) flags.push("background tab");
519
- const flagStr = flags.length ? ` [${flags.join(", ")}]` : "";
520
- const cwd = t.cwd ? ` cwd:${t.cwd.replace(/^\/Users\/\w+\//, "~/")}` : "";
521
- const cmds = (t.runningCommands ?? []).map((c: any) => c.command).join(", ");
522
- const cmdStr = cmds ? ` running:${cmds}` : "";
523
- msg += ` ${t.displayName}${cwd}${cmdStr}${flagStr}`;
524
- if (t.windowId) msg += ` (wid:${t.windowId})`;
525
- msg += "\n";
526
- }
527
- }
528
-
529
- // Tmux sessions
530
- const tmux = snap.tmuxSessions ?? [];
531
- if (tmux.length > 0) {
532
- msg += `\nTmux sessions: ${tmux.map((s: any) => `${s.name} (${s.windows} windows${s.attached ? ", attached" : ""})`).join(", ")}\n`;
533
- }
534
-
535
- // Layer
536
- if (snap.currentLayer) {
537
- msg += `\nCurrent layer: ${snap.currentLayer.name} (index: ${snap.currentLayer.index})\n`;
538
- }
539
-
540
- msg += "--- END SNAPSHOT ---\n";
541
- return msg;
542
- }
543
-
544
318
  // ── Command loop ───────────────────────────────────────────────────
545
319
 
546
320
  const decoder = new TextDecoder();
@@ -588,7 +362,13 @@ async function processLine(line: string) {
588
362
 
589
363
  case "infer":
590
364
  try {
591
- const userMessage = buildContextMessage(cmd.transcript, cmd.snapshot ?? {});
365
+ const localPlan = tryLocalAssistantPlan(cmd.transcript, cmd.snapshot ?? {});
366
+ if (localPlan) {
367
+ respond({ ok: true, data: localPlan });
368
+ break;
369
+ }
370
+
371
+ const userMessage = buildAssistantContextMessage(cmd.transcript, cmd.snapshot ?? {});
592
372
 
593
373
  const messages = (cmd.history ?? []).map((h: any) => ({
594
374
  role: h.role as "user" | "assistant",
@@ -605,11 +385,13 @@ async function processLine(line: string) {
605
385
  tag: "hands-off",
606
386
  });
607
387
 
388
+ const plan = normalizeAssistantPlan(data, cmd.transcript);
608
389
  respond({
609
390
  ok: true,
610
391
  data: {
611
- ...data,
392
+ ...plan,
612
393
  _meta: {
394
+ ...plan._meta,
613
395
  provider: raw.provider,
614
396
  model: raw.model,
615
397
  durationMs: raw.durationMs,
@@ -649,30 +431,35 @@ async function processLine(line: string) {
649
431
  // Fire cached ack sound + inference in PARALLEL
650
432
  const ackPromise = playAck().catch((e) => log(`ack error: ${e.message}`));
651
433
 
652
- // Build full context message from snapshot
653
- const userMessage = buildContextMessage(transcript, snap);
654
-
655
434
  const messages = history.map((h: any) => ({
656
435
  role: h.role as "user" | "assistant",
657
436
  content: typeof h.content === "string" ? h.content : JSON.stringify(h.content),
658
437
  })).filter((m: any) => m.content && m.content.length > 0);
659
438
 
660
439
  let inferResult: any = null;
661
- try {
662
- const { data, raw } = await inferSmart(userMessage, {
663
- provider: "xai",
664
- model: "grok-4.20-beta-0309-non-reasoning",
665
- system: systemPrompt,
666
- messages,
667
- temperature: 0.2,
668
- maxTokens: 512,
669
- tag: "hands-off",
670
- });
671
- inferResult = { ...data, _meta: { provider: raw.provider, model: raw.model, durationMs: raw.durationMs, tokens: raw.usage?.totalTokens } };
672
- log(`⏱ inference done in ${raw.durationMs}ms`);
673
- } catch (err: any) {
674
- log(`⏱ inference error: ${err.message}`);
675
- inferResult = { actions: [], spoken: "Sorry, I had trouble with that.", _meta: { error: err.message } };
440
+ const localPlan = tryLocalAssistantPlan(transcript, snap);
441
+ if (localPlan) {
442
+ inferResult = localPlan;
443
+ log("local planner matched");
444
+ } else {
445
+ const userMessage = buildAssistantContextMessage(transcript, snap);
446
+ try {
447
+ const { data, raw } = await inferSmart(userMessage, {
448
+ provider: "xai",
449
+ model: "grok-4.20-beta-0309-non-reasoning",
450
+ system: systemPrompt,
451
+ messages,
452
+ temperature: 0.2,
453
+ maxTokens: 512,
454
+ tag: "hands-off",
455
+ });
456
+ const plan = normalizeAssistantPlan(data, transcript);
457
+ inferResult = { ...plan, _meta: { ...plan._meta, provider: raw.provider, model: raw.model, durationMs: raw.durationMs, tokens: raw.usage?.totalTokens } };
458
+ log(`⏱ inference done in ${raw.durationMs}ms`);
459
+ } catch (err: any) {
460
+ log(`⏱ inference error: ${err.message}`);
461
+ inferResult = { actions: [], spoken: "Sorry, I had trouble with that.", _meta: { error: err.message } };
462
+ }
676
463
  }
677
464
 
678
465
  // Wait for ack to finish before narrating (don't overlap speech)