@lattices/cli 0.4.1 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/app/Info.plist +2 -2
- package/app/Lattices.app/Contents/Info.plist +2 -2
- package/app/Lattices.app/Contents/MacOS/Lattices +0 -0
- package/app/Package.swift +6 -0
- package/app/Sources/ActionRow.swift +43 -26
- package/app/Sources/App.swift +10 -0
- package/app/Sources/AppDelegate.swift +91 -30
- package/app/Sources/AppShellView.swift +2 -0
- package/app/Sources/AppTypeClassifier.swift +36 -0
- package/app/Sources/AppUpdater.swift +92 -0
- package/app/Sources/CheatSheetHUD.swift +1 -0
- package/app/Sources/CliActionLauncher.swift +50 -0
- package/app/Sources/CommandModeView.swift +4 -24
- package/app/Sources/CompanionActivityLog.swift +70 -0
- package/app/Sources/CompanionKeyboardController.swift +141 -0
- package/app/Sources/DesktopModel.swift +4 -0
- package/app/Sources/HandsOffSession.swift +53 -16
- package/app/Sources/HomeDashboardView.swift +18 -10
- package/app/Sources/HotkeyStore.swift +8 -5
- package/app/Sources/IntentEngine.swift +7 -1
- package/app/Sources/LatticesApi.swift +125 -4
- package/app/Sources/LatticesCompanionBridgeServer.swift +438 -0
- package/app/Sources/LatticesCompanionCockpit.swift +555 -0
- package/app/Sources/LatticesCompanionSecurityCoordinator.swift +594 -0
- package/app/Sources/LatticesCompanionTrackpadController.swift +204 -0
- package/app/Sources/LatticesDeckHost.swift +1463 -0
- package/app/Sources/LatticesRuntime.swift +61 -0
- package/app/Sources/MainView.swift +398 -186
- package/app/Sources/MouseFinder.swift +335 -30
- package/app/Sources/MouseGestureConfig.swift +364 -0
- package/app/Sources/MouseGestureController.swift +1203 -0
- package/app/Sources/MouseInputDeviceStore.swift +98 -0
- package/app/Sources/MouseInputEventViewer.swift +272 -0
- package/app/Sources/MouseShortcutStore.swift +107 -0
- package/app/Sources/OmniSearchView.swift +136 -2
- package/app/Sources/OmniSearchWindow.swift +65 -5
- package/app/Sources/OnboardingView.swift +30 -16
- package/app/Sources/PaletteCommand.swift +26 -6
- package/app/Sources/PermissionChecker.swift +76 -2
- package/app/Sources/PiAuthNextStepCard.swift +148 -0
- package/app/Sources/PiAuthPromptCard.swift +90 -0
- package/app/Sources/PiChatDock.swift +137 -74
- package/app/Sources/PiChatSession.swift +608 -108
- package/app/Sources/PiInstallCallout.swift +86 -0
- package/app/Sources/PiProviderSetupCallout.swift +99 -0
- package/app/Sources/PiWorkspaceView.swift +174 -77
- package/app/Sources/Preferences.swift +78 -0
- package/app/Sources/ScreenMapState.swift +91 -31
- package/app/Sources/ScreenMapView.swift +510 -524
- package/app/Sources/ScreenMapWindowController.swift +12 -4
- package/app/Sources/SettingsView.swift +869 -152
- package/app/Sources/SystemTelemetryMonitor.swift +273 -0
- package/app/Sources/VoiceCommandWindow.swift +23 -2
- package/app/Sources/WindowDragSnapController.swift +628 -0
- package/app/Sources/WindowTiler.swift +328 -65
- package/app/Sources/WorkspaceManager.swift +288 -0
- package/bin/assistant-intelligence.ts +874 -0
- package/bin/handsoff-infer.ts +16 -209
- package/bin/handsoff-worker.ts +45 -258
- package/bin/lattices-app.ts +65 -1
- package/bin/lattices-dev +4 -0
- package/bin/lattices.ts +125 -14
- package/docs/agents.md +14 -0
- package/docs/api.md +55 -0
- package/docs/app.md +3 -0
- package/docs/companion-deck.md +180 -0
- package/docs/config.md +25 -0
- package/docs/tiling-reference.md +55 -0
- package/docs/voice-error-model.md +73 -0
- package/package.json +4 -2
package/bin/handsoff-infer.ts
CHANGED
|
@@ -8,10 +8,13 @@
|
|
|
8
8
|
* All logging goes to stderr so it doesn't pollute the JSON output.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
+
import {
|
|
12
|
+
buildAssistantContextMessage,
|
|
13
|
+
buildAssistantSystemPrompt,
|
|
14
|
+
normalizeAssistantPlan,
|
|
15
|
+
tryLocalAssistantPlan,
|
|
16
|
+
} from "./assistant-intelligence.ts";
|
|
11
17
|
import { inferJSON } from "../lib/infer.ts";
|
|
12
|
-
import { readFileSync } from "fs";
|
|
13
|
-
import { join, dirname } from "path";
|
|
14
|
-
import { homedir } from "os";
|
|
15
18
|
|
|
16
19
|
// ── Read input from stdin ──────────────────────────────────────────
|
|
17
20
|
|
|
@@ -30,213 +33,15 @@ const req = JSON.parse(input) as {
|
|
|
30
33
|
history?: Array<{ role: "user" | "assistant"; content: string }>;
|
|
31
34
|
};
|
|
32
35
|
|
|
33
|
-
|
|
36
|
+
const transcript = req.transcript ?? "";
|
|
37
|
+
const systemPrompt = buildAssistantSystemPrompt();
|
|
38
|
+
const userMessage = buildAssistantContextMessage(transcript, req.snapshot ?? {});
|
|
34
39
|
|
|
35
|
-
const
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
.split("\n")
|
|
40
|
-
.filter((l) => !l.startsWith("# "))
|
|
41
|
-
.join("\n")
|
|
42
|
-
.trim();
|
|
43
|
-
} catch {
|
|
44
|
-
systemPrompt = "You are a workspace assistant. Respond with JSON: {actions, spoken}.";
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
// Replace {{intent_catalog}} with the actual tiling reference
|
|
48
|
-
const intentCatalog = `
|
|
49
|
-
tile_window: Tile a window to a screen position
|
|
50
|
-
Slots:
|
|
51
|
-
position (required): Named position or grid:CxR:C,R syntax.
|
|
52
|
-
Halves: left, right, top, bottom
|
|
53
|
-
Quarters (2x2): top-left, top-right, bottom-left, bottom-right
|
|
54
|
-
Thirds (3x1): left-third, center-third, right-third
|
|
55
|
-
Sixths (3x2): top-left-third, top-center-third, top-right-third, bottom-left-third, bottom-center-third, bottom-right-third
|
|
56
|
-
Fourths (4x1): first-fourth, second-fourth, third-fourth, last-fourth
|
|
57
|
-
Eighths (4x2): top-first-fourth, top-second-fourth, top-third-fourth, top-last-fourth, bottom-first-fourth, bottom-second-fourth, bottom-third-fourth, bottom-last-fourth
|
|
58
|
-
Special: maximize (full screen), center (centered floating)
|
|
59
|
-
Grid syntax: grid:CxR:C,R (e.g. grid:5x3:2,1 = center cell of 5x3 grid)
|
|
60
|
-
app (optional): Target app name — match loosely (e.g. "chrome" matches "Google Chrome")
|
|
61
|
-
wid (optional): Target window ID (from snapshot)
|
|
62
|
-
session (optional): Tmux session name
|
|
63
|
-
If no app/wid/session given, tiles the frontmost window.
|
|
64
|
-
"quarter" = 2x2 cell (top-left etc.), NOT a 4x1 fourth.
|
|
65
|
-
"top quarter" = top-left or top-right (2x2). "top third" = top-left-third (3x2).
|
|
66
|
-
Examples: "tile chrome left" → {intent:"tile_window", slots:{app:"chrome", position:"left"}}
|
|
67
|
-
|
|
68
|
-
focus: Focus a window, app, or session
|
|
69
|
-
Slots:
|
|
70
|
-
app (optional): App name to focus
|
|
71
|
-
session (optional): Session name to focus
|
|
72
|
-
wid (optional): Window ID to focus
|
|
73
|
-
|
|
74
|
-
distribute: Arrange windows in an even grid — with optional app filter and region constraint
|
|
75
|
-
Slots:
|
|
76
|
-
app (optional): Filter to windows of this app (e.g. "iTerm2", "Google Chrome"). Without this, distributes ALL visible windows.
|
|
77
|
-
region (optional): Constrain the grid to a screen region. Uses the same position names as tile_window:
|
|
78
|
-
Halves: left, right, top, bottom
|
|
79
|
-
Quarters: top-left, top-right, bottom-left, bottom-right
|
|
80
|
-
Thirds: left-third, center-third, right-third
|
|
81
|
-
Without this, uses the full screen.
|
|
82
|
-
Examples:
|
|
83
|
-
"grid the terminals on the right" → {intent:"distribute", slots:{app:"iTerm2", region:"right"}}
|
|
84
|
-
"organize my chrome windows in the bottom half" → {intent:"distribute", slots:{app:"Google Chrome", region:"bottom"}}
|
|
85
|
-
"spread everything out" → {intent:"distribute", slots:{}}
|
|
86
|
-
"tile all terminals" → {intent:"distribute", slots:{app:"iTerm2"}}
|
|
87
|
-
|
|
88
|
-
swap: Swap the positions of two windows
|
|
89
|
-
Slots:
|
|
90
|
-
wid_a (required): Window ID of the first window (from snapshot)
|
|
91
|
-
wid_b (required): Window ID of the second window (from snapshot)
|
|
92
|
-
Examples:
|
|
93
|
-
"swap Chrome and iTerm" → {intent:"swap", slots:{wid_a:12345, wid_b:67890}}
|
|
94
|
-
|
|
95
|
-
hide: Hide or minimize a window or app
|
|
96
|
-
Slots:
|
|
97
|
-
app (optional): App name to hide (hides the entire app)
|
|
98
|
-
wid (optional): Window ID to minimize (minimizes just that window)
|
|
99
|
-
Use app to hide all windows of an app. Use wid to minimize a single window.
|
|
100
|
-
Examples:
|
|
101
|
-
"hide Slack" → {intent:"hide", slots:{app:"Slack"}}
|
|
102
|
-
"minimize that" → {intent:"hide", slots:{wid:12345}}
|
|
103
|
-
|
|
104
|
-
highlight: Flash a window's border to identify it visually
|
|
105
|
-
Slots:
|
|
106
|
-
wid (optional): Window ID to highlight (from snapshot)
|
|
107
|
-
app (optional): App name to highlight
|
|
108
|
-
Use when the user asks "which one is that?" or wants to visually identify a window.
|
|
109
|
-
Examples:
|
|
110
|
-
"show me the lattices terminal" → {intent:"highlight", slots:{wid:12345}}
|
|
111
|
-
"which one is Chrome?" → {intent:"highlight", slots:{app:"Google Chrome"}}
|
|
112
|
-
|
|
113
|
-
move_to_display: Move a window to another monitor/display
|
|
114
|
-
Slots:
|
|
115
|
-
display (required): Target display index (0 = main/primary, 1 = second, etc.)
|
|
116
|
-
wid (optional): Window ID to move (from snapshot)
|
|
117
|
-
app (optional): App name to move
|
|
118
|
-
position (optional): Tile position on the target display (e.g. "left", "maximize")
|
|
119
|
-
If no wid/app given, moves the frontmost window.
|
|
120
|
-
Examples:
|
|
121
|
-
"put this on my second monitor" → {intent:"move_to_display", slots:{wid:12345, display:1}}
|
|
122
|
-
"move Chrome to the main screen" → {intent:"move_to_display", slots:{app:"Google Chrome", display:0}}
|
|
123
|
-
"send iTerm to the other monitor, left half" → {intent:"move_to_display", slots:{app:"iTerm2", display:1, position:"left"}}
|
|
124
|
-
|
|
125
|
-
undo: Undo the last window move — restore windows to their previous positions
|
|
126
|
-
No slots needed.
|
|
127
|
-
Examples:
|
|
128
|
-
"put it back" → {intent:"undo"}
|
|
129
|
-
"undo that" → {intent:"undo"}
|
|
130
|
-
|
|
131
|
-
search: Search windows by text
|
|
132
|
-
Slots:
|
|
133
|
-
query (required): Search text
|
|
134
|
-
Examples:
|
|
135
|
-
"find the error message" → {intent:"search", slots:{query:"error"}}
|
|
136
|
-
"find all terminal windows" → {intent:"search", slots:{query:"terminal"}}
|
|
137
|
-
|
|
138
|
-
list_windows: List all visible windows
|
|
139
|
-
No slots needed. Use when the user asks "what's on screen?" or "what windows do I have?"
|
|
140
|
-
|
|
141
|
-
list_sessions: List active terminal sessions
|
|
142
|
-
No slots needed. Use when the user asks "what sessions are running?" or "show my projects."
|
|
143
|
-
|
|
144
|
-
switch_layer: Switch to a workspace layer
|
|
145
|
-
Slots:
|
|
146
|
-
layer (required): Layer name or index
|
|
147
|
-
Examples:
|
|
148
|
-
"switch to the web layer" → {intent:"switch_layer", slots:{layer:"web"}}
|
|
149
|
-
"go to layer 2" → {intent:"switch_layer", slots:{layer:"2"}}
|
|
150
|
-
|
|
151
|
-
create_layer: Save current window arrangement as a named layer
|
|
152
|
-
Slots:
|
|
153
|
-
name (required): Layer name
|
|
154
|
-
Examples:
|
|
155
|
-
"save this layout as review" → {intent:"create_layer", slots:{name:"review"}}
|
|
156
|
-
|
|
157
|
-
launch: Launch a project session
|
|
158
|
-
Slots:
|
|
159
|
-
project (required): Project name or path
|
|
160
|
-
Examples:
|
|
161
|
-
"open my frontend project" → {intent:"launch", slots:{project:"frontend"}}
|
|
162
|
-
"start working on lattices" → {intent:"launch", slots:{project:"lattices"}}
|
|
163
|
-
|
|
164
|
-
kill: Kill a terminal session
|
|
165
|
-
Slots:
|
|
166
|
-
session (required): Session name or project name
|
|
167
|
-
Examples:
|
|
168
|
-
"stop the frontend session" → {intent:"kill", slots:{session:"frontend"}}
|
|
169
|
-
|
|
170
|
-
scan: Trigger an immediate screen text scan (OCR)
|
|
171
|
-
No slots needed. Use when the user asks you to read or scan screen content.
|
|
172
|
-
|
|
173
|
-
CHOOSING THE RIGHT INTENT:
|
|
174
|
-
Positioning:
|
|
175
|
-
tile_window = position ONE specific window at a specific spot. Use for 1-6 named windows.
|
|
176
|
-
distribute = auto-grid MANY windows. Use when the user says "all", "my terminals", "everything", or names more windows than the 6-action limit.
|
|
177
|
-
distribute with app+region is the most powerful combo: "grid my terminals on the right" → distribute(app:"iTerm2", region:"right")
|
|
178
|
-
Rearranging:
|
|
179
|
-
swap = exchange positions of exactly two windows. "swap Chrome and iTerm"
|
|
180
|
-
move_to_display = move a window to a different monitor. "put this on my other screen"
|
|
181
|
-
Visibility:
|
|
182
|
-
hide = hide an app or minimize a window. "hide Slack", "minimize that"
|
|
183
|
-
highlight = flash a window's border to identify it. "which one is the lattices terminal?"
|
|
184
|
-
focus = bring a window to the front. "focus Slack", "show me Chrome"
|
|
185
|
-
Recovery:
|
|
186
|
-
undo = restore previous positions after a move. "put it back", "undo that"
|
|
187
|
-
Information:
|
|
188
|
-
list_windows, list_sessions, search = answer questions about the desktop. NO actions needed for pure questions.
|
|
189
|
-
Session lifecycle:
|
|
190
|
-
launch = start a project session. "open my frontend project"
|
|
191
|
-
kill = stop a session. "kill the API"
|
|
192
|
-
|
|
193
|
-
TILING PRESETS (use multiple tile_window actions):
|
|
194
|
-
"split screen" / "side by side" → left + right
|
|
195
|
-
"thirds" → left-third, center-third, right-third
|
|
196
|
-
"main + sidebar" → main app left (or maximize), others stacked right
|
|
197
|
-
"stack" → top + bottom
|
|
198
|
-
"corners" / "quadrants" → top-left, top-right, bottom-left, bottom-right
|
|
199
|
-
"six-up" / "3 by 2" → 3x2 grid using sixth positions
|
|
200
|
-
"eight-up" / "4 by 2" → 4x2 grid using eighth positions
|
|
201
|
-
|
|
202
|
-
TILING PRESETS (use distribute intent):
|
|
203
|
-
"mosaic" / "grid" / "spread out" → distribute (all windows, full screen)
|
|
204
|
-
"grid the terminals" → distribute with app:"iTerm2"
|
|
205
|
-
"terminals on the right" → distribute with app:"iTerm2", region:"right"
|
|
206
|
-
"organize chrome on the left" → distribute with app:"Google Chrome", region:"left"
|
|
207
|
-
`;
|
|
208
|
-
|
|
209
|
-
systemPrompt = systemPrompt.replace("{{intent_catalog}}", intentCatalog);
|
|
210
|
-
|
|
211
|
-
// ── Build the per-turn message ─────────────────────────────────────
|
|
212
|
-
|
|
213
|
-
let userMessage = `USER: "${req.transcript}"\n\n`;
|
|
214
|
-
userMessage += "--- DESKTOP SNAPSHOT ---\n";
|
|
215
|
-
|
|
216
|
-
const snap = req.snapshot;
|
|
217
|
-
if (snap.stageManager) {
|
|
218
|
-
userMessage += `Stage Manager: ON (grouping: ${snap.smGrouping ?? "all-at-once"})\n\n`;
|
|
219
|
-
userMessage += `Active stage (${snap.activeStage?.length ?? 0} windows):\n`;
|
|
220
|
-
for (const w of snap.activeStage ?? []) {
|
|
221
|
-
userMessage += ` [${w.wid}] ${w.app}: "${w.title}" — ${w.frame}\n`;
|
|
222
|
-
}
|
|
223
|
-
userMessage += `\nStrip: ${snap.stripApps?.join(", ") ?? "none"}\n`;
|
|
224
|
-
userMessage += `Other stages: ${snap.hiddenApps?.join(", ") ?? "none"}\n`;
|
|
225
|
-
} else {
|
|
226
|
-
userMessage += "Stage Manager: OFF\n";
|
|
227
|
-
userMessage += `Visible windows (${snap.activeStage?.length ?? 0}):\n`;
|
|
228
|
-
for (const w of snap.activeStage ?? []) {
|
|
229
|
-
userMessage += ` [${w.wid}] ${w.app}: "${w.title}" — ${w.frame}\n`;
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
if (snap.currentLayer) {
|
|
234
|
-
userMessage += `\nCurrent layer: ${snap.currentLayer}\n`;
|
|
235
|
-
}
|
|
236
|
-
if (snap.screen) {
|
|
237
|
-
userMessage += `Screen: ${snap.screen}\n`;
|
|
40
|
+
const localPlan = tryLocalAssistantPlan(transcript, req.snapshot ?? {});
|
|
41
|
+
if (localPlan) {
|
|
42
|
+
console.log(JSON.stringify(localPlan));
|
|
43
|
+
process.exit(0);
|
|
238
44
|
}
|
|
239
|
-
userMessage += "--- END SNAPSHOT ---\n";
|
|
240
45
|
|
|
241
46
|
// ── Call inference ──────────────────────────────────────────────────
|
|
242
47
|
|
|
@@ -257,9 +62,11 @@ try {
|
|
|
257
62
|
});
|
|
258
63
|
|
|
259
64
|
// Output result as JSON to stdout
|
|
65
|
+
const plan = normalizeAssistantPlan(data, transcript);
|
|
260
66
|
const output = {
|
|
261
|
-
...
|
|
67
|
+
...plan,
|
|
262
68
|
_meta: {
|
|
69
|
+
...plan._meta,
|
|
263
70
|
provider: raw.provider,
|
|
264
71
|
model: raw.model,
|
|
265
72
|
durationMs: raw.durationMs,
|
package/bin/handsoff-worker.ts
CHANGED
|
@@ -16,7 +16,14 @@
|
|
|
16
16
|
* {"ok":false,"error":"..."}
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
|
-
import {
|
|
19
|
+
import {
|
|
20
|
+
assistantPromptPath,
|
|
21
|
+
buildAssistantContextMessage,
|
|
22
|
+
buildAssistantSystemPrompt,
|
|
23
|
+
normalizeAssistantPlan,
|
|
24
|
+
tryLocalAssistantPlan,
|
|
25
|
+
} from "./assistant-intelligence.ts";
|
|
26
|
+
import { infer } from "../lib/infer.ts";
|
|
20
27
|
|
|
21
28
|
const INFER_TIMEOUT_MS = 15_000;
|
|
22
29
|
|
|
@@ -55,7 +62,7 @@ async function inferSmart(prompt: string, options: any): Promise<{ data: any; ra
|
|
|
55
62
|
};
|
|
56
63
|
}
|
|
57
64
|
import { readFileSync } from "fs";
|
|
58
|
-
import { join
|
|
65
|
+
import { join } from "path";
|
|
59
66
|
import { spawn } from "child_process";
|
|
60
67
|
|
|
61
68
|
// ── Streaming TTS via OpenAI API → ffplay ──────────────────────────
|
|
@@ -275,100 +282,6 @@ function playConfirm(intent: string): Promise<number> {
|
|
|
275
282
|
return playCached(map[intent] ?? "Done.");
|
|
276
283
|
}
|
|
277
284
|
|
|
278
|
-
// ── Fast path: local intent matching (no LLM needed) ──────────────
|
|
279
|
-
|
|
280
|
-
interface FastMatch {
|
|
281
|
-
actions: Array<{ intent: string; slots: Record<string, string> }>;
|
|
282
|
-
confirm: string; // which confirmation to play
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
function tryFastMatch(transcript: string, snapshot: any): FastMatch | null {
|
|
286
|
-
const t = transcript.toLowerCase().trim();
|
|
287
|
-
const activeApps = (snapshot.activeStage ?? []).map((w: any) => ({
|
|
288
|
-
app: w.app as string,
|
|
289
|
-
wid: w.wid as number,
|
|
290
|
-
}));
|
|
291
|
-
|
|
292
|
-
// Tile patterns
|
|
293
|
-
const tileMatch = t.match(
|
|
294
|
-
/(?:tile|snap|put|move)\s+(\w+)\s+(?:to\s+)?(?:the\s+)?(left|right|top|bottom|maximize|center|top.?left|top.?right|bottom.?left|bottom.?right|left.?third|center.?third|right.?third)/
|
|
295
|
-
);
|
|
296
|
-
if (tileMatch) {
|
|
297
|
-
const app = tileMatch[1];
|
|
298
|
-
const pos = tileMatch[2].replace(/\s+/g, "-");
|
|
299
|
-
return {
|
|
300
|
-
actions: [{ intent: "tile_window", slots: { app, position: pos } }],
|
|
301
|
-
confirm: "tile_window",
|
|
302
|
-
};
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
// Split screen: "split X and Y" or "X left Y right"
|
|
306
|
-
const splitMatch = t.match(/split\s+(\w+)\s+(?:and|&)\s+(\w+)/);
|
|
307
|
-
if (splitMatch) {
|
|
308
|
-
return {
|
|
309
|
-
actions: [
|
|
310
|
-
{ intent: "tile_window", slots: { app: splitMatch[1], position: "left" } },
|
|
311
|
-
{ intent: "tile_window", slots: { app: splitMatch[2], position: "right" } },
|
|
312
|
-
],
|
|
313
|
-
confirm: "tile_window",
|
|
314
|
-
};
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
// Focus: "focus X" / "focus on X" / "switch to X" / "go to X"
|
|
318
|
-
const focusMatch = t.match(/(?:focus(?:\s+on)?|switch\s+to|go\s+to|show)\s+(?:the\s+)?(?:on\s+)?(\w+)/);
|
|
319
|
-
if (focusMatch && !t.includes("tile") && !t.includes("split")) {
|
|
320
|
-
const app = focusMatch[1];
|
|
321
|
-
if (app && app !== "on" && app !== "the") {
|
|
322
|
-
return {
|
|
323
|
-
actions: [{ intent: "focus", slots: { app } }],
|
|
324
|
-
confirm: "focus",
|
|
325
|
-
};
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
// Maximize: "maximize" / "full screen" / "make it big"
|
|
330
|
-
if (/maximize|full\s*screen|make\s+it\s+big/.test(t)) {
|
|
331
|
-
return {
|
|
332
|
-
actions: [{ intent: "tile_window", slots: { position: "maximize" } }],
|
|
333
|
-
confirm: "tile_window",
|
|
334
|
-
};
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
// Distribute: "grid" / "mosaic" / "distribute" / "even"
|
|
338
|
-
if (/grid|mosaic|distribute|even\s+(?:out|grid)|arrange/.test(t)) {
|
|
339
|
-
return {
|
|
340
|
-
actions: [{ intent: "distribute", slots: {} }],
|
|
341
|
-
confirm: "distribute",
|
|
342
|
-
};
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
// Corners: "quadrants" / "four corners"
|
|
346
|
-
if (/quadrants?|four\s+corners?|corners/.test(t) && activeApps.length >= 4) {
|
|
347
|
-
const positions = ["top-left", "top-right", "bottom-left", "bottom-right"];
|
|
348
|
-
return {
|
|
349
|
-
actions: activeApps.slice(0, 4).map((a: any, i: number) => ({
|
|
350
|
-
intent: "tile_window",
|
|
351
|
-
slots: { app: a.app, position: positions[i] },
|
|
352
|
-
})),
|
|
353
|
-
confirm: "tile_window",
|
|
354
|
-
};
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
// Thirds: "thirds"
|
|
358
|
-
if (/thirds/.test(t) && activeApps.length >= 3) {
|
|
359
|
-
const positions = ["left-third", "center-third", "right-third"];
|
|
360
|
-
return {
|
|
361
|
-
actions: activeApps.slice(0, 3).map((a: any, i: number) => ({
|
|
362
|
-
intent: "tile_window",
|
|
363
|
-
slots: { app: a.app, position: positions[i] },
|
|
364
|
-
})),
|
|
365
|
-
confirm: "tile_window",
|
|
366
|
-
};
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
return null; // No fast match — fall through to LLM
|
|
370
|
-
}
|
|
371
|
-
|
|
372
285
|
// Warm up cache on startup
|
|
373
286
|
ensureVoiceCache().then(() => log("voice cache ready"));
|
|
374
287
|
|
|
@@ -376,70 +289,14 @@ log("worker started, streaming TTS ready");
|
|
|
376
289
|
|
|
377
290
|
// ── Load system prompt once ────────────────────────────────────────
|
|
378
291
|
|
|
379
|
-
const
|
|
380
|
-
let systemPrompt: string;
|
|
381
|
-
try {
|
|
382
|
-
systemPrompt = readFileSync(join(promptDir, "hands-off-system.md"), "utf-8")
|
|
383
|
-
.split("\n")
|
|
384
|
-
.filter((l) => !l.startsWith("# "))
|
|
385
|
-
.join("\n")
|
|
386
|
-
.trim();
|
|
387
|
-
} catch {
|
|
388
|
-
systemPrompt = "You are a workspace assistant. Respond with JSON: {actions, spoken}.";
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
const intentCatalog = `
|
|
392
|
-
tile_window: Tile a window to a screen position
|
|
393
|
-
Slots:
|
|
394
|
-
position (required): Named position or grid:CxR:C,R syntax.
|
|
395
|
-
Halves: left, right, top, bottom
|
|
396
|
-
Quarters (2x2): top-left, top-right, bottom-left, bottom-right
|
|
397
|
-
Thirds (3x1): left-third, center-third, right-third
|
|
398
|
-
Sixths (3x2): top-left-third, top-center-third, top-right-third, bottom-left-third, bottom-center-third, bottom-right-third
|
|
399
|
-
Fourths (4x1): first-fourth, second-fourth, third-fourth, last-fourth
|
|
400
|
-
Eighths (4x2): top-first-fourth, top-second-fourth, top-third-fourth, top-last-fourth, bottom-first-fourth, bottom-second-fourth, bottom-third-fourth, bottom-last-fourth
|
|
401
|
-
Special: maximize (full screen), center (centered floating)
|
|
402
|
-
Grid syntax: grid:CxR:C,R (e.g. grid:5x3:2,1 = center cell of 5x3 grid)
|
|
403
|
-
app (optional): Target app name — match loosely (e.g. "chrome" matches "Google Chrome")
|
|
404
|
-
wid (optional): Target window ID (from snapshot)
|
|
405
|
-
session (optional): Tmux session name
|
|
406
|
-
If no app/wid/session given, tiles the frontmost window.
|
|
407
|
-
"quarter" = 2x2 cell (top-left etc.), NOT a 4x1 fourth.
|
|
408
|
-
"top quarter" = top-left or top-right (2x2). "top third" = top-left-third (3x2).
|
|
409
|
-
|
|
410
|
-
focus: Focus a window, app, or session
|
|
411
|
-
Slots: app, session, or wid (at least one)
|
|
412
|
-
|
|
413
|
-
distribute: Arrange all visible windows in an even grid. No slots.
|
|
414
|
-
|
|
415
|
-
search: Search windows by text
|
|
416
|
-
Slots: query (required)
|
|
417
|
-
|
|
418
|
-
list_windows: List all visible windows. No slots.
|
|
419
|
-
|
|
420
|
-
switch_layer: Switch to a workspace layer
|
|
421
|
-
Slots: layer (required) — name or index
|
|
422
|
-
|
|
423
|
-
create_layer: Save current arrangement as a named layer
|
|
424
|
-
Slots: name (required)
|
|
425
|
-
|
|
426
|
-
TILING PRESETS (use multiple tile_window actions):
|
|
427
|
-
"split screen" → left + right
|
|
428
|
-
"thirds" → left-third, center-third, right-third
|
|
429
|
-
"mosaic"/"grid" → use distribute
|
|
430
|
-
"corners"/"quadrants" → top-left, top-right, bottom-left, bottom-right
|
|
431
|
-
"stack" → top + bottom
|
|
432
|
-
"six-up"/"3 by 2" → 3x2 grid using the sixth positions
|
|
433
|
-
"eight-up"/"4 by 2" → 4x2 grid using the eighth positions
|
|
434
|
-
`;
|
|
435
|
-
|
|
436
|
-
systemPrompt = systemPrompt.replace("{{intent_catalog}}", intentCatalog);
|
|
292
|
+
const systemPrompt = buildAssistantSystemPrompt();
|
|
437
293
|
log("system prompt loaded");
|
|
438
294
|
|
|
439
295
|
// ── Auto-restart on file changes ───────────────────────────────────
|
|
440
296
|
|
|
441
297
|
const watchFiles = [
|
|
442
|
-
|
|
298
|
+
assistantPromptPath,
|
|
299
|
+
join(import.meta.dir, "assistant-intelligence.ts"),
|
|
443
300
|
import.meta.path, // this script itself
|
|
444
301
|
];
|
|
445
302
|
|
|
@@ -458,89 +315,6 @@ for (const f of watchFiles) {
|
|
|
458
315
|
} catch {}
|
|
459
316
|
}
|
|
460
317
|
|
|
461
|
-
// ── Build context message from snapshot ─────────────────────────────
|
|
462
|
-
|
|
463
|
-
function buildContextMessage(transcript: string, snap: any): string {
|
|
464
|
-
let msg = `USER: "${transcript}"\n\n`;
|
|
465
|
-
msg += "--- DESKTOP SNAPSHOT ---\n";
|
|
466
|
-
|
|
467
|
-
// Screens
|
|
468
|
-
const screens = snap.screens ?? [];
|
|
469
|
-
if (screens.length > 1) {
|
|
470
|
-
msg += `Displays: ${screens.map((s: any) => `${s.width}x${s.height}${s.isMain ? " (main)" : ""}`).join(", ")}\n`;
|
|
471
|
-
} else if (screens.length === 1) {
|
|
472
|
-
msg += `Screen: ${screens[0].width}x${screens[0].height}\n`;
|
|
473
|
-
}
|
|
474
|
-
|
|
475
|
-
// Stage Manager
|
|
476
|
-
if (snap.stageManager) {
|
|
477
|
-
msg += `Stage Manager: ON (grouping: ${snap.smGrouping ?? "all-at-once"})\n`;
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
// All windows — full inventory, ordered front-to-back (zIndex 0 = frontmost)
|
|
481
|
-
const windows = snap.windows ?? snap.activeStage ?? [];
|
|
482
|
-
const onScreen = windows.filter((w: any) => w.onScreen !== false);
|
|
483
|
-
const offScreen = windows.filter((w: any) => w.onScreen === false);
|
|
484
|
-
|
|
485
|
-
msg += `\nVisible windows (${onScreen.length}, front-to-back order):\n`;
|
|
486
|
-
for (const w of onScreen) {
|
|
487
|
-
const flags: string[] = [];
|
|
488
|
-
if (w.zIndex === 0) flags.push("FRONTMOST");
|
|
489
|
-
if (w.session) flags.push(`session:${w.session}`);
|
|
490
|
-
const flagStr = flags.length ? ` [${flags.join(", ")}]` : "";
|
|
491
|
-
msg += ` wid:${w.wid} ${w.app}: "${w.title}" — ${w.frame}${flagStr}\n`;
|
|
492
|
-
}
|
|
493
|
-
|
|
494
|
-
if (offScreen.length > 0) {
|
|
495
|
-
// Summarize hidden windows by app instead of listing all
|
|
496
|
-
const hiddenByApp: Record<string, number> = {};
|
|
497
|
-
for (const w of offScreen) {
|
|
498
|
-
const app = w.app;
|
|
499
|
-
hiddenByApp[app] = (hiddenByApp[app] || 0) + 1;
|
|
500
|
-
}
|
|
501
|
-
const summary = Object.entries(hiddenByApp)
|
|
502
|
-
.filter(([app]) => !["WindowManager", "Spotlight", "CursorUIViewService", "AutoFill", "coreautha", "loginwindow", "Open and Save Panel Service"].includes(app))
|
|
503
|
-
.map(([app, count]) => `${app}(${count})`)
|
|
504
|
-
.join(", ");
|
|
505
|
-
if (summary) {
|
|
506
|
-
msg += `\nHidden windows: ${summary}\n`;
|
|
507
|
-
}
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
// Terminals — cwd, running commands, claude, tmux
|
|
511
|
-
const terminals = snap.terminals ?? [];
|
|
512
|
-
if (terminals.length > 0) {
|
|
513
|
-
msg += `\nTerminal tabs (${terminals.length}):\n`;
|
|
514
|
-
for (const t of terminals) {
|
|
515
|
-
const flags: string[] = [];
|
|
516
|
-
if (t.hasClaude) flags.push("Claude Code");
|
|
517
|
-
if (t.tmuxSession) flags.push(`tmux:${t.tmuxSession}`);
|
|
518
|
-
if (!t.isActiveTab) flags.push("background tab");
|
|
519
|
-
const flagStr = flags.length ? ` [${flags.join(", ")}]` : "";
|
|
520
|
-
const cwd = t.cwd ? ` cwd:${t.cwd.replace(/^\/Users\/\w+\//, "~/")}` : "";
|
|
521
|
-
const cmds = (t.runningCommands ?? []).map((c: any) => c.command).join(", ");
|
|
522
|
-
const cmdStr = cmds ? ` running:${cmds}` : "";
|
|
523
|
-
msg += ` ${t.displayName}${cwd}${cmdStr}${flagStr}`;
|
|
524
|
-
if (t.windowId) msg += ` (wid:${t.windowId})`;
|
|
525
|
-
msg += "\n";
|
|
526
|
-
}
|
|
527
|
-
}
|
|
528
|
-
|
|
529
|
-
// Tmux sessions
|
|
530
|
-
const tmux = snap.tmuxSessions ?? [];
|
|
531
|
-
if (tmux.length > 0) {
|
|
532
|
-
msg += `\nTmux sessions: ${tmux.map((s: any) => `${s.name} (${s.windows} windows${s.attached ? ", attached" : ""})`).join(", ")}\n`;
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
// Layer
|
|
536
|
-
if (snap.currentLayer) {
|
|
537
|
-
msg += `\nCurrent layer: ${snap.currentLayer.name} (index: ${snap.currentLayer.index})\n`;
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
msg += "--- END SNAPSHOT ---\n";
|
|
541
|
-
return msg;
|
|
542
|
-
}
|
|
543
|
-
|
|
544
318
|
// ── Command loop ───────────────────────────────────────────────────
|
|
545
319
|
|
|
546
320
|
const decoder = new TextDecoder();
|
|
@@ -588,7 +362,13 @@ async function processLine(line: string) {
|
|
|
588
362
|
|
|
589
363
|
case "infer":
|
|
590
364
|
try {
|
|
591
|
-
const
|
|
365
|
+
const localPlan = tryLocalAssistantPlan(cmd.transcript, cmd.snapshot ?? {});
|
|
366
|
+
if (localPlan) {
|
|
367
|
+
respond({ ok: true, data: localPlan });
|
|
368
|
+
break;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
const userMessage = buildAssistantContextMessage(cmd.transcript, cmd.snapshot ?? {});
|
|
592
372
|
|
|
593
373
|
const messages = (cmd.history ?? []).map((h: any) => ({
|
|
594
374
|
role: h.role as "user" | "assistant",
|
|
@@ -605,11 +385,13 @@ async function processLine(line: string) {
|
|
|
605
385
|
tag: "hands-off",
|
|
606
386
|
});
|
|
607
387
|
|
|
388
|
+
const plan = normalizeAssistantPlan(data, cmd.transcript);
|
|
608
389
|
respond({
|
|
609
390
|
ok: true,
|
|
610
391
|
data: {
|
|
611
|
-
...
|
|
392
|
+
...plan,
|
|
612
393
|
_meta: {
|
|
394
|
+
...plan._meta,
|
|
613
395
|
provider: raw.provider,
|
|
614
396
|
model: raw.model,
|
|
615
397
|
durationMs: raw.durationMs,
|
|
@@ -649,30 +431,35 @@ async function processLine(line: string) {
|
|
|
649
431
|
// Fire cached ack sound + inference in PARALLEL
|
|
650
432
|
const ackPromise = playAck().catch((e) => log(`ack error: ${e.message}`));
|
|
651
433
|
|
|
652
|
-
// Build full context message from snapshot
|
|
653
|
-
const userMessage = buildContextMessage(transcript, snap);
|
|
654
|
-
|
|
655
434
|
const messages = history.map((h: any) => ({
|
|
656
435
|
role: h.role as "user" | "assistant",
|
|
657
436
|
content: typeof h.content === "string" ? h.content : JSON.stringify(h.content),
|
|
658
437
|
})).filter((m: any) => m.content && m.content.length > 0);
|
|
659
438
|
|
|
660
439
|
let inferResult: any = null;
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
440
|
+
const localPlan = tryLocalAssistantPlan(transcript, snap);
|
|
441
|
+
if (localPlan) {
|
|
442
|
+
inferResult = localPlan;
|
|
443
|
+
log("local planner matched");
|
|
444
|
+
} else {
|
|
445
|
+
const userMessage = buildAssistantContextMessage(transcript, snap);
|
|
446
|
+
try {
|
|
447
|
+
const { data, raw } = await inferSmart(userMessage, {
|
|
448
|
+
provider: "xai",
|
|
449
|
+
model: "grok-4.20-beta-0309-non-reasoning",
|
|
450
|
+
system: systemPrompt,
|
|
451
|
+
messages,
|
|
452
|
+
temperature: 0.2,
|
|
453
|
+
maxTokens: 512,
|
|
454
|
+
tag: "hands-off",
|
|
455
|
+
});
|
|
456
|
+
const plan = normalizeAssistantPlan(data, transcript);
|
|
457
|
+
inferResult = { ...plan, _meta: { ...plan._meta, provider: raw.provider, model: raw.model, durationMs: raw.durationMs, tokens: raw.usage?.totalTokens } };
|
|
458
|
+
log(`⏱ inference done in ${raw.durationMs}ms`);
|
|
459
|
+
} catch (err: any) {
|
|
460
|
+
log(`⏱ inference error: ${err.message}`);
|
|
461
|
+
inferResult = { actions: [], spoken: "Sorry, I had trouble with that.", _meta: { error: err.message } };
|
|
462
|
+
}
|
|
676
463
|
}
|
|
677
464
|
|
|
678
465
|
// Wait for ack to finish before narrating (don't overlap speech)
|