@lattices/cli 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/README.md +85 -9
  2. package/app/Info.plist +30 -0
  3. package/app/Lattices.app/Contents/Info.plist +8 -2
  4. package/app/Lattices.app/Contents/MacOS/Lattices +0 -0
  5. package/app/Lattices.app/Contents/Resources/AppIcon.icns +0 -0
  6. package/app/Lattices.app/Contents/Resources/tap.wav +0 -0
  7. package/app/Lattices.app/Contents/_CodeSignature/CodeResources +139 -0
  8. package/app/Lattices.entitlements +15 -0
  9. package/app/Package.swift +8 -1
  10. package/app/Resources/tap.wav +0 -0
  11. package/app/Sources/AdvisorLearningStore.swift +90 -0
  12. package/app/Sources/AgentSession.swift +377 -0
  13. package/app/Sources/AppDelegate.swift +45 -12
  14. package/app/Sources/AppShellView.swift +81 -8
  15. package/app/Sources/AudioProvider.swift +386 -0
  16. package/app/Sources/CheatSheetHUD.swift +261 -19
  17. package/app/Sources/DaemonProtocol.swift +13 -0
  18. package/app/Sources/DaemonServer.swift +8 -0
  19. package/app/Sources/DesktopModel.swift +189 -6
  20. package/app/Sources/DesktopModelTypes.swift +2 -0
  21. package/app/Sources/DiagnosticLog.swift +104 -2
  22. package/app/Sources/EventBus.swift +1 -0
  23. package/app/Sources/HUDBottomBar.swift +279 -0
  24. package/app/Sources/HUDController.swift +1158 -0
  25. package/app/Sources/HUDLeftBar.swift +849 -0
  26. package/app/Sources/HUDMinimap.swift +179 -0
  27. package/app/Sources/HUDRightBar.swift +774 -0
  28. package/app/Sources/HUDState.swift +367 -0
  29. package/app/Sources/HUDTopBar.swift +243 -0
  30. package/app/Sources/HandsOffSession.swift +802 -0
  31. package/app/Sources/HomeDashboardView.swift +125 -0
  32. package/app/Sources/HotkeyManager.swift +2 -0
  33. package/app/Sources/HotkeyStore.swift +49 -9
  34. package/app/Sources/IntentEngine.swift +962 -0
  35. package/app/Sources/Intents/CreateLayerIntent.swift +54 -0
  36. package/app/Sources/Intents/DistributeIntent.swift +56 -0
  37. package/app/Sources/Intents/FocusIntent.swift +69 -0
  38. package/app/Sources/Intents/HelpIntent.swift +41 -0
  39. package/app/Sources/Intents/KillIntent.swift +47 -0
  40. package/app/Sources/Intents/LatticeIntent.swift +78 -0
  41. package/app/Sources/Intents/LaunchIntent.swift +67 -0
  42. package/app/Sources/Intents/ListSessionsIntent.swift +32 -0
  43. package/app/Sources/Intents/ListWindowsIntent.swift +30 -0
  44. package/app/Sources/Intents/ScanIntent.swift +52 -0
  45. package/app/Sources/Intents/SearchIntent.swift +190 -0
  46. package/app/Sources/Intents/SwitchLayerIntent.swift +50 -0
  47. package/app/Sources/Intents/TileIntent.swift +61 -0
  48. package/app/Sources/LatticesApi.swift +1275 -30
  49. package/app/Sources/LauncherHUD.swift +348 -0
  50. package/app/Sources/MainView.swift +147 -44
  51. package/app/Sources/MouseFinder.swift +222 -0
  52. package/app/Sources/OcrModel.swift +34 -1
  53. package/app/Sources/OmniSearchState.swift +99 -102
  54. package/app/Sources/OnboardingView.swift +457 -0
  55. package/app/Sources/PermissionChecker.swift +2 -12
  56. package/app/Sources/PiChatDock.swift +454 -0
  57. package/app/Sources/PiChatSession.swift +815 -0
  58. package/app/Sources/PiWorkspaceView.swift +364 -0
  59. package/app/Sources/PlacementSpec.swift +195 -0
  60. package/app/Sources/Preferences.swift +59 -0
  61. package/app/Sources/ProjectScanner.swift +58 -45
  62. package/app/Sources/ScreenMapState.swift +701 -55
  63. package/app/Sources/ScreenMapView.swift +843 -103
  64. package/app/Sources/ScreenMapWindowController.swift +22 -0
  65. package/app/Sources/SessionLayerStore.swift +285 -0
  66. package/app/Sources/SessionManager.swift +4 -1
  67. package/app/Sources/SettingsView.swift +186 -3
  68. package/app/Sources/Theme.swift +9 -8
  69. package/app/Sources/TmuxModel.swift +7 -0
  70. package/app/Sources/TmuxQuery.swift +27 -3
  71. package/app/Sources/VoiceChatView.swift +192 -0
  72. package/app/Sources/VoiceCommandWindow.swift +1594 -0
  73. package/app/Sources/VoiceIntentResolver.swift +671 -0
  74. package/app/Sources/VoxClient.swift +454 -0
  75. package/app/Sources/WindowTiler.swift +348 -87
  76. package/app/Sources/WorkspaceManager.swift +127 -18
  77. package/app/Tests/StageDragTests.swift +333 -0
  78. package/app/Tests/StageJoinTests.swift +313 -0
  79. package/app/Tests/StageManagerTests.swift +280 -0
  80. package/app/Tests/StageTileTests.swift +353 -0
  81. package/assets/AppIcon.icns +0 -0
  82. package/bin/client.ts +16 -0
  83. package/bin/{daemon-client.js → daemon-client.ts} +49 -30
  84. package/bin/handsoff-infer.ts +280 -0
  85. package/bin/handsoff-worker.ts +740 -0
  86. package/bin/lattices-app.ts +338 -0
  87. package/bin/lattices-dev +208 -0
  88. package/bin/{lattices.js → lattices.ts} +777 -140
  89. package/bin/project-twin.ts +645 -0
  90. package/docs/agent-execution-plan.md +562 -0
  91. package/docs/agent-layer-guide.md +207 -0
  92. package/docs/agents.md +142 -0
  93. package/docs/api.md +153 -34
  94. package/docs/app.md +29 -1
  95. package/docs/config.md +5 -1
  96. package/docs/handsoff-test-scenarios.md +84 -0
  97. package/docs/layers.md +20 -20
  98. package/docs/ocr.md +14 -5
  99. package/docs/overview.md +5 -1
  100. package/docs/presentation-execution-review.md +491 -0
  101. package/docs/prompts/hands-off-system.md +374 -0
  102. package/docs/prompts/hands-off-turn.md +30 -0
  103. package/docs/prompts/voice-advisor.md +31 -0
  104. package/docs/prompts/voice-fallback.md +23 -0
  105. package/docs/tiling-reference.md +167 -0
  106. package/docs/twins.md +138 -0
  107. package/docs/voice-command-protocol.md +278 -0
  108. package/docs/voice.md +219 -0
  109. package/package.json +29 -11
  110. package/bin/client.js +0 -4
  111. package/bin/lattices-app.js +0 -221
package/docs/twins.md ADDED
@@ -0,0 +1,138 @@
1
+ ---
2
+ title: Project Twins
3
+ description: Pi-backed project twins for mediated, persistent agent execution
4
+ order: 3
5
+ ---
6
+
7
+ A project twin is a persistent software counterpart to a codebase.
8
+
9
+ It is not the primary agent. It is the project-native runtime that sits
10
+ between a general-purpose caller and the project's execution protocol.
11
+
12
+ ## Why a twin exists
13
+
14
+ General-purpose agents are interchangeable. Project protocols are not.
15
+
16
+ If every primary agent has to learn the project's tool surface, memory
17
+ policy, protocol semantics, and context conventions from scratch, the
18
+ integration becomes brittle. A twin fixes that by becoming the stable
19
+ project-facing runtime:
20
+
21
+ - The **primary agent** asks for work
22
+ - The **twin** resumes with the right context and memory
23
+ - The **protocol** stays behind the twin boundary
24
+
25
+ ```text
26
+ primary agent -> project twin -> project protocol / harness
27
+ ```
28
+
29
+ The twin is the client of record for the project.
30
+
31
+ ## Responsibilities
32
+
33
+ A project twin owns:
34
+
35
+ - Project-scoped identity
36
+ - Persistent session continuity
37
+ - Memory compaction and continuation
38
+ - Tool policy and allowed capabilities
39
+ - Protocol knowledge
40
+ - Project context assembly
41
+ - Caller-facing summaries and handoffs
42
+
43
+ A primary agent should not speak the project protocol directly. It should
44
+ invoke the twin.
45
+
46
+ ## Pi-backed runtime
47
+
48
+ Pi is a good fit for the twin runtime because it already provides:
49
+
50
+ - Persistent sessions
51
+ - RPC mode for long-running subprocess integration
52
+ - Tool calling with an explicit harness
53
+ - Compaction and summarization hooks
54
+ - Context files, prompt templates, and extension loading
55
+
56
+ That makes the split:
57
+
58
+ - **Twin**: product concept and policy boundary
59
+ - **Pi**: reasoning and session runtime
60
+ - **Host system**: orchestration, durable memory, and protocol adapters
61
+
62
+ Pi powers the twin. It does not define the twin.
63
+
64
+ ## Invocation model
65
+
66
+ The primary agent makes a single mediated call into the twin:
67
+
68
+ 1. Resume the twin session
69
+ 2. Inject caller context, project memory, and protocol state
70
+ 3. Let the twin do project-local work inside the harness
71
+ 4. Return a concise result to the caller
72
+
73
+ The caller should see a stable capability surface such as:
74
+
75
+ - `status`
76
+ - `inspect`
77
+ - `plan`
78
+ - `execute`
79
+ - `summarize`
80
+ - `handoff`
81
+
82
+ It should not see raw protocol-shaped operations unless that protocol is
83
+ itself the public product surface.
84
+
85
+ ## Implementation in this repo
86
+
87
+ This repo now includes a Pi-backed runtime in
88
+ [`bin/project-twin.ts`](/Users/arach/dev/lattices/bin/project-twin.ts).
89
+
90
+ The runtime:
91
+
92
+ - Spawns `pi --mode rpc` as a persistent subprocess
93
+ - Stores project-local session state under `.openscout/twins/<name>/sessions`
94
+ - Exposes a stable `invoke()` API for callers
95
+ - Optionally injects OpenScout relay context if `.openscout/relay*` exists
96
+
97
+ The default harness is intentionally narrow:
98
+
99
+ - Built-in Pi tools are explicitly pinned to `read,bash,edit,write`
100
+ - Extension, skill, and prompt-template discovery are disabled by default
101
+ - Project instructions still come from `AGENTS.md` and related context files
102
+
103
+ This keeps the twin deterministic unless the host explicitly widens the
104
+ surface.
105
+
106
+ ## Example
107
+
108
+ ```ts
109
+ import { ProjectTwin } from "@lattices/cli"
110
+
111
+ const twin = new ProjectTwin({
112
+ cwd: "/Users/you/dev/my-project",
113
+ name: "my-project",
114
+ model: "anthropic/claude-sonnet-4-5",
115
+ })
116
+
117
+ await twin.start()
118
+
119
+ const result = await twin.invoke({
120
+ caller: "primary-agent",
121
+ protocol: "openscout-relay",
122
+ memory: "The caller is debugging relay enrollment and wants the next safe action.",
123
+ task: "Inspect the available project context and summarize what the caller should do next.",
124
+ })
125
+
126
+ console.log(result.text)
127
+
128
+ await twin.stop()
129
+ ```
130
+
131
+ ## Design rule
132
+
133
+ All project-specific protocol semantics should live behind the twin
134
+ boundary.
135
+
136
+ The primary agent should invoke the twin as a skill-like capability.
137
+ The twin should own context assembly, protocol interaction, and the final
138
+ handoff back to the caller.
@@ -0,0 +1,278 @@
1
+ # Voice Command Protocol — Lattices ↔ Vox
2
+
3
+ ## Overview
4
+
5
+ Lattices delegates all audio capture and transcription to Vox via WebSocket JSON-RPC. Lattices never accesses the microphone directly — it borrows Vox's mic and transcription pipeline, receives English text back, and routes it through its own intent engine.
6
+
7
+ These dictations are **ephemeral** — Vox does not persist them as memos, sync them, or add them to Vox's history. Lattices is just using Vox as a transcription pipe.
8
+
9
+ ## Vox Process Model
10
+
11
+ Vox consists of three independent processes:
12
+
13
+ | Process | Role | Relevance to Lattices |
14
+ |---|---|---|
15
+ | **Vox.app** | Main UI — menu bar, notch visualization, memo history | None |
16
+ | **Vox** | Background service — mic access, recording, hotkeys, orchestrates transcription, state notifications | **This is what Lattices connects to** |
17
+ | **VoxEngine** | Transcription engine — runs Whisper models, called by Vox internally | Indirect — Vox delegates to it |
18
+
19
+ Vox is the right target because:
20
+ - It owns the mic and recording lifecycle
21
+ - It's the long-running background process (always up when Vox is installed)
22
+ - It already orchestrates the record → transcribe → result pipeline
23
+ - It's easy to discover via its existing DistributedNotification
24
+
25
+ ## Service Discovery
26
+
27
+ Lattices never hardcodes ports. Discovery uses two mechanisms:
28
+
29
+ ### 1. Well-known file (at rest)
30
+
31
+ Vox writes its service configuration on startup:
32
+
33
+ ```
34
+ ~/.vox/services.json
35
+ ```
36
+
37
+ ```json
38
+ {
39
+ "agent": {"port": 19823, "pid": 48209},
40
+ "engine": {"port": 19821, "pid": 48210},
41
+ "sync": {"port": 19820, "pid": 48208},
42
+ "inference": {"port": 19822, "pid": 48212}
43
+ }
44
+ ```
45
+
46
+ Lattices reads `agent.port` from this file. If the file doesn't exist, Vox isn't installed.
47
+
48
+ ### 2. DistributedNotification (live discovery)
49
+
50
+ Vox posts when it comes online:
51
+
52
+ ```
53
+ Notification: com.jdi.vox.agent.live.ready
54
+ UserInfo: {"agentPort": 19823, "pid": 48209}
55
+ ```
56
+
57
+ Lattices subscribes to this on startup. Handles:
58
+ - **Vox launches after Lattices** — Lattices picks up the port dynamically
59
+ - **Vox restarts** — Lattices reconnects with the new port
60
+ - **Port changes** — no stale config
61
+
62
+ ### 3. Health check
63
+
64
+ After discovering a port, Lattices confirms Vox is alive:
65
+
66
+ ```json
67
+ → {"id": "hc", "method": "ping"}
68
+ ← {"id": "hc", "result": {"pong": true}}
69
+ ```
70
+
71
+ If ping fails, Lattices marks voice as unavailable and retries on the next `live.ready` or after ~30 seconds.
72
+
73
+ ### When Vox is not running
74
+
75
+ Three possible states:
76
+
77
+ | State | How detected | Lattices behavior |
78
+ |---|---|---|
79
+ | **Not installed** | `/Applications/Vox.app` doesn't exist and no `~/.vox/` dir | Footer: `[Space] Voice (unavailable)` — no recovery action |
80
+ | **Installed but not running** | App bundle exists, but `services.json` missing/stale or ping fails | Footer: `[Space] Voice (start Vox)` — pressing Space runs `open /Applications/Vox.app`, which brings up Vox as a side effect |
81
+ | **Running** | Ping succeeds | Normal operation |
82
+
83
+ Launch-on-demand flow:
84
+ 1. User presses Space while Vox is down but Vox is installed
85
+ 2. Lattices runs `NSWorkspace.shared.open(URL(fileURLWithPath: "/Applications/Vox.app"))`
86
+ 3. Feedback strip shows "Starting Vox..."
87
+ 4. Lattices waits for `live.ready` notification (timeout: 10s)
88
+ 5. On `live.ready`, connects and proceeds with `startDictation`
89
+ 6. On timeout, shows "Couldn't reach Vox — try opening it manually"
90
+
91
+ Passive behavior (no user action):
92
+ - No log spam — just a quiet unavailable state
93
+ - Lattices keeps listening for `live.ready` and re-checks `services.json` periodically (~30s)
94
+ - The moment Vox comes online, voice becomes available — no restart needed
95
+
96
+ ## Protocol
97
+
98
+ ### Wire Format
99
+
100
+ Uses Vox's JSON-RPC format over WebSocket:
101
+
102
+ ```
103
+ Request: {"id": "...", "method": "...", "params": {...}}
104
+ Response: {"id": "...", "result": {...}} or {"id": "...", "error": "..."}
105
+ Event: {"event": "...", "data": {...}} (server push, no id)
106
+ ```
107
+
108
+ ### Methods (Lattices → Vox)
109
+
110
+ **`startDictation`** — Start recording from the mic.
111
+
112
+ ```json
113
+ {"id": "1", "method": "startDictation", "params": {
114
+ "source": "lattices",
115
+ "persist": false
116
+ }}
117
+ ```
118
+
119
+ - `source` — identifies the caller (for Vox's logging/UI)
120
+ - `persist: false` — do not save as a memo, do not sync, do not show in Vox history
121
+
122
+ Response (immediate ack):
123
+ ```json
124
+ {"id": "1", "result": {"ok": true}}
125
+ ```
126
+
127
+ Error responses:
128
+ ```json
129
+ {"id": "1", "error": "Microphone access denied"}
130
+ {"id": "1", "error": "No model loaded"}
131
+ {"id": "1", "error": "mic_busy", "owner": "vox"}
132
+ ```
133
+
134
+ The `mic_busy` error means another consumer (Vox's own memo recording, or another client) already has an active dictation. The `owner` field identifies who holds the mic. Lattices shows: "Mic in use by Vox — finish your memo first".
135
+
136
+ The reverse case (user hits Vox hotkey while Lattices has the mic) is handled on Vox's side — it should reject its own recording with an equivalent busy state. Vox is the single owner of mic arbitration.
137
+
138
+ **`stopDictation`** — Stop recording and return the transcript.
139
+
140
+ ```json
141
+ {"id": "2", "method": "stopDictation"}
142
+ ```
143
+
144
+ Response (after transcription completes):
145
+ ```json
146
+ {"id": "2", "result": {
147
+ "transcript": "tile this left",
148
+ "confidence": 0.94,
149
+ "durationMs": 1820
150
+ }}
151
+ ```
152
+
153
+ **`cancelDictation`** — Abort without transcribing.
154
+
155
+ ```json
156
+ {"id": "3", "method": "cancelDictation"}
157
+ ```
158
+
159
+ ```json
160
+ {"id": "3", "result": {"ok": true}}
161
+ ```
162
+
163
+ ### Events (Vox → Lattices)
164
+
165
+ Pushed over the WebSocket connection during an active dictation.
166
+
167
+ | Event | When | Data |
168
+ |---|---|---|
169
+ | `dictation.started` | Mic is hot, recording has begun | `{"source": "lattices"}` |
170
+ | `dictation.transcribing` | Recording stopped, model is running | `{}` |
171
+ | `dictation.result` | Transcription complete | `{"transcript": "...", "confidence": 0.94, "durationMs": 1820}` |
172
+ | `dictation.error` | Something failed during recording or transcription | `{"message": "..."}` |
173
+
174
+ ## Disconnect Contract
175
+
176
+ If the WebSocket connection drops mid-dictation (Lattices crashes, user quits, network hiccup), Vox **must** auto-cancel the in-flight dictation:
177
+
178
+ 1. Stop recording immediately
179
+ 2. Discard any captured audio — do not transcribe
180
+ 3. Release the mic so Vox's own UI or a reconnecting client can use it
181
+ 4. Log the orphaned dictation for diagnostics: `[dictation] orphaned session from lattices — connection dropped, auto-cancelled`
182
+
183
+ Vox treats a closed WebSocket as an implicit `cancelDictation`. No grace period, no buffering — if the consumer is gone, the recording is worthless.
184
+
185
+ On the Lattices side, if the connection drops while in `listening` or `transcribing` state:
186
+ - Feedback strip: "Connection lost" (red)
187
+ - Attempt reconnect via normal discovery (ping → `services.json` → wait for `live.ready`)
188
+ - Do not auto-retry the dictation — the user needs to press Space again
189
+
190
+ ## End-to-End Lifecycle
191
+
192
+ ```mermaid
193
+ sequenceDiagram
194
+ participant U as User
195
+ participant L as Lattices UI
196
+ participant TA as Vox
197
+ participant IE as Intent Engine
198
+
199
+ U->>L: Press Space (in cheat sheet)
200
+ L->>TA: startDictation (persist: false)
201
+
202
+ alt Error
203
+ TA-->>L: error (mic denied / no model)
204
+ L->>U: Red text in feedback strip
205
+ else OK
206
+ TA-->>L: {ok: true}
207
+ TA-->>L: dictation.started
208
+ L->>U: Green dot (pulsing) + "Listening..."
209
+
210
+ Note over U,TA: User speaks...
211
+
212
+ U->>L: Press Space again
213
+ L->>TA: stopDictation
214
+ TA-->>L: dictation.transcribing
215
+ L->>U: "Transcribing..."
216
+
217
+ TA-->>L: {transcript: "tile this left", confidence: 0.94}
218
+ L->>U: Show transcript
219
+ end
220
+
221
+ L->>IE: Classify via NLEmbedding
222
+ IE-->>L: intent: tile_window, slots: {position: left}, confidence: 0.95
223
+ L->>U: Show intent + slots
224
+
225
+ L->>IE: Execute
226
+ IE-->>L: result
227
+ L->>U: "Done" or error
228
+
229
+ Note over L: Log entry written
230
+ ```
231
+
232
+ ## UI States
233
+
234
+ | State | Feedback strip | Footer |
235
+ |---|---|---|
236
+ | **Idle** | Hidden | `[Space] Voice [ESC] Dismiss` |
237
+ | **Not installed** | Hidden | `[Space] Voice (unavailable) [ESC] Dismiss` |
238
+ | **Installed, not running** | Hidden | `[Space] Voice (start Vox) [ESC] Dismiss` |
239
+ | **Starting** | "Starting Vox..." | `[ESC] Cancel` |
240
+ | **Error** | Red: "Mic access denied" or "Mic in use by Vox" | `[ESC] Dismiss` |
241
+ | **Disconnected** | Red: "Connection lost" | `[ESC] Dismiss` |
242
+ | **Listening** | Green dot + "Listening..." | `[Space] Stop [ESC] Cancel` |
243
+ | **Transcribing** | "Transcribing..." | `[ESC] Cancel` |
244
+ | **Result** | `"tile this left"` → `tile window · position: left` → `Done` | `[Space] New [ESC] Dismiss` |
245
+
246
+ ## Logging
247
+
248
+ Every voice command produces a diagnostic log entry:
249
+
250
+ ```
251
+ [voice] "tile this left" → tile_window(position: left) → ok (conf=0.95, 1820ms)
252
+ [voice] "organize my stuff" → distribute() → ok (conf=0.79, 2100ms)
253
+ [voice] "do something weird" → (no match, conf=0.41, 900ms)
254
+ [voice] error: Vox not running
255
+ [voice] error: mic_busy (owner: vox)
256
+ [voice] error: connection dropped mid-dictation
257
+ [voice] launched Vox, connected in 2.1s
258
+ ```
259
+
260
+ ## Implementation Scope
261
+
262
+ ### Lattices side
263
+ - Use `@vox/client` SDK (`VoxClient` with `service: "agent"`, `clientId: "lattices"`, `capabilities: ["dictation"]`) — see `vox/sdk/SDK.md` for full reference
264
+ - Replace `AVAudioRecorder` in `VoxAudioProvider` with `createDictationSession().start({ persist: false })`
265
+ - Remove mic entitlement and `NSMicrophoneUsageDescription` (Lattices never touches the mic)
266
+ - Service discovery, auto-reconnect, and auth are handled by the SDK
267
+ - Map `DictationSession` events (`stateChange`, `partialTranscript`, `finalTranscript`, `error`) to cheat sheet UI states
268
+ - Handle `MicBusyError` — show `"Mic in use by ${error.owner}"`
269
+
270
+ ### Vox side (separate repo)
271
+ - Expose a WebSocket bridge (or add methods to existing bridge)
272
+ - Add `startDictation`, `stopDictation`, `cancelDictation` handlers
273
+ - Emit `dictation.started`, `dictation.transcribing`, `dictation.result`, `dictation.error` events
274
+ - Honor `persist: false` — skip memo creation and sync
275
+ - Write `~/.vox/services.json` on startup (all service ports)
276
+ - Include `agentPort` in `live.ready` notification userInfo
277
+ - Return `mic_busy` error with `owner` field when another consumer holds the mic
278
+ - Auto-cancel dictation on WebSocket disconnect (closed socket = implicit cancel)
package/docs/voice.md ADDED
@@ -0,0 +1,219 @@
1
+ ---
2
+ title: Voice Commands
3
+ description: Natural language voice control for window management
4
+ order: 7
5
+ ---
6
+
7
+ Voice commands let you control Lattices by speaking. Press **Hyper+3**
8
+ to open the voice command window, hold **Option** to speak, release to
9
+ stop. Lattices transcribes your speech via Vox,
10
+ matches it to an intent, and executes it.
11
+
12
+ ## Quick start
13
+
14
+ 1. Install Vox (provides mic + transcription)
15
+ 2. Install [Claude Code](https://claude.ai/code) CLI (provides AI advisor)
16
+ 3. Press **Hyper+3** to open the voice command window
17
+ 4. Hold **Option** and speak a command
18
+ 5. Release **Option** — Lattices transcribes and executes
19
+
20
+ ## Keyboard shortcuts
21
+
22
+ | Key | Action |
23
+ |-----|--------|
24
+ | **Hyper+3** | Open/close voice command window |
25
+ | **⌥ (hold)** | Push-to-talk — hold to record, release to stop |
26
+ | **Tab** | Arm/disarm the mic |
27
+ | **Escape** | Cancel recording or dismiss window |
28
+
29
+ ## Built-in commands
30
+
31
+ ### Search
32
+
33
+ Find windows by app name, title, content, or category.
34
+
35
+ ```
36
+ "Find all vox windows"
37
+ "Find terminals" → expands to iTerm, Terminal, Warp, etc.
38
+ "Show me all browsers" → expands to Safari, Chrome, Firefox, Arc, etc.
39
+ "Where is my editor?" → expands to VS Code, Cursor, Xcode, etc.
40
+ ```
41
+
42
+ Category synonyms are built in — saying "terminals", "browsers", "editors",
43
+ "chat", "music", "mail", or "notes" automatically expands to search for
44
+ the actual app names.
45
+
46
+ ### Tile
47
+
48
+ Move windows to screen positions.
49
+
50
+ ```
51
+ "Tile this left"
52
+ "Snap to the right half"
53
+ "Maximize the window"
54
+ "Put this in the top right corner"
55
+ ```
56
+
57
+ Voice tiling should resolve into the same canonical daemon mutation used
58
+ by other agent surfaces: `window.place`.
59
+
60
+ ### Focus
61
+
62
+ Bring a window or app to the front.
63
+
64
+ ```
65
+ "Focus Safari"
66
+ "Switch to Slack"
67
+ "Go to the lattices window"
68
+ ```
69
+
70
+ ### Open / Launch
71
+
72
+ Open applications or project workspaces.
73
+
74
+ ```
75
+ "Open Spotify"
76
+ "Launch the vox project"
77
+ ```
78
+
79
+ ### Kill
80
+
81
+ Close windows or quit applications.
82
+
83
+ ```
84
+ "Kill this window"
85
+ "Close Safari"
86
+ "Quit Spotify"
87
+ ```
88
+
89
+ ### Scan
90
+
91
+ Trigger an OCR scan of visible windows.
92
+
93
+ ```
94
+ "Scan the screen"
95
+ "Read what's on screen"
96
+ ```
97
+
98
+ ### Other
99
+
100
+ ```
101
+ "List all windows"
102
+ "Show my sessions"
103
+ "Switch to layer 2"
104
+ "Help"
105
+ ```
106
+
107
+ ## AI advisor
108
+
109
+ Every voice command fires a Claude Haiku advisor in parallel. The
110
+ advisor provides commentary and follow-up suggestions in the **AI
111
+ corner** (bottom-right of the voice command window).
112
+
113
+ When local matching handles the command well, the AI corner shows
114
+ "no AI needed" with an optional "ask AI" button. When the advisor
115
+ has something useful, it shows a one-line comment and an actionable
116
+ suggestion button.
117
+
118
+ ### How it works
119
+
120
+ 1. You speak a command
121
+ 2. Local intent matching runs immediately (fast, free)
122
+ 3. Haiku advisor runs in parallel (takes ~2-5 seconds)
123
+ 4. If the advisor suggests something, a button appears in the AI corner
124
+ 5. Click the suggestion to execute it
125
+ 6. If you engage with a suggestion that the local matcher missed,
126
+ it's recorded in `~/.lattices/advisor-learning.jsonl` for future
127
+ improvement
128
+
129
+ ### Session persistence
130
+
131
+ The advisor maintains a conversation session across voice commands.
132
+ It remembers what you've asked and what worked. When the context
133
+ reaches 75% of the model's limit, the session auto-resets.
134
+
135
+ Context usage and session cost are shown in the AI corner header.
136
+
137
+ ## Configuration
138
+
139
+ Open **Settings > AI** to configure:
140
+
141
+ | Setting | Default | Description |
142
+ |---------|---------|-------------|
143
+ | Claude CLI path | Auto-detected | Path to the `claude` binary. Checks `~/.local/bin/claude`, `/usr/local/bin/claude`, `/opt/homebrew/bin/claude`, then `which claude`. |
144
+ | Advisor model | Haiku | `haiku` (fast, cheap) or `sonnet` (smarter, slower) |
145
+ | Budget per session | $0.50 | Maximum spend per Claude CLI invocation |
146
+
147
+ ### Installing Claude Code
148
+
149
+ ```bash
150
+ npm install -g @anthropic-ai/claude-code
151
+ ```
152
+
153
+ Or see [claude.ai/code](https://claude.ai/code) for other install methods.
154
+
155
+ ## Layout
156
+
157
+ The voice command window has four sections:
158
+
159
+ | Section | Position | Content |
160
+ |---------|----------|---------|
161
+ | **History** | Left column | Past commands with expandable details |
162
+ | **Voice Command** | Center column | Current transcript, matched intent, results |
163
+ | **Log** | Top-right | Rolling diagnostic log (last 12 entries) |
164
+ | **AI Corner** | Bottom-right | Advisor commentary, suggestions, session stats |
165
+
166
+ ## Search architecture
167
+
168
+ Voice search uses the same backend as `lattices search`:
169
+
170
+ 1. **Quick search** — window titles, app names, session tags (instant)
171
+ 2. **Complete search** — adds terminal cwd/processes + OCR content
172
+ 3. **Synonym expansion** — category terms like "terminals" expand to
173
+ actual app names before searching
174
+ 4. **Query cleanup** — strips natural language qualifiers ("and sort by...",
175
+ "please", "for me") before searching
176
+
177
+ ## Processing resilience
178
+
179
+ - **15-second timeout** — if processing doesn't complete, returns to idle
180
+ - **Cancellation on dismiss** — closing the window cancels in-flight work
181
+ - **Double-execution prevention** — streaming and stop callbacks can't
182
+ both fire the intent
183
+
184
+ ## Advisor learning
185
+
186
+ When the local matcher fails but the AI advisor suggests something that
187
+ you engage with, the interaction is recorded:
188
+
189
+ ```
190
+ ~/.lattices/advisor-learning.jsonl
191
+ ```
192
+
193
+ Each line is a JSON object:
194
+
195
+ ```json
196
+ {
197
+ "timestamp": "2026-03-15T18:30:00.000Z",
198
+ "transcript": "find all terminals",
199
+ "localIntent": "search",
200
+ "localSlots": {"query": "terminals"},
201
+ "localResultCount": 0,
202
+ "advisorIntent": "search",
203
+ "advisorSlots": {"query": "iterm"},
204
+ "advisorLabel": "Search iTerm"
205
+ }
206
+ ```
207
+
208
+ This dataset captures where the local system falls short and what the
209
+ right answer was. Future work can mine it for automatic synonym
210
+ mappings and phrase pattern improvements.
211
+
212
+ ## Requirements
213
+
214
+ - **Vox** — provides microphone access and
215
+ speech-to-text transcription
216
+ - **[Claude Code](https://claude.ai/code)** CLI — provides the AI advisor
217
+ (optional, voice commands work without it but no AI suggestions)
218
+ - **Accessibility** permission — for window tiling and focus
219
+ - **Screen Recording** permission — for window discovery
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "@lattices/cli",
3
- "version": "0.3.0",
3
+ "version": "0.4.1",
4
4
  "description": "Agentic window manager for macOS — programmable workspace, smart layouts, managed tmux sessions, and a 35+-method agent API",
5
5
  "bin": {
6
- "lattices": "./bin/lattices.js",
7
- "lattices-app": "./bin/lattices-app.js"
6
+ "lattices": "./bin/lattices.ts",
7
+ "lattices-app": "./bin/lattices-app.ts"
8
8
  },
9
9
  "keywords": [
10
10
  "tmux",
@@ -21,22 +21,40 @@
21
21
  },
22
22
  "license": "MIT",
23
23
  "exports": {
24
- ".": "./bin/client.js",
25
- "./daemon-client": "./bin/daemon-client.js"
24
+ ".": "./bin/client.ts",
25
+ "./daemon-client": "./bin/daemon-client.ts",
26
+ "./project-twin": "./bin/project-twin.ts"
26
27
  },
27
28
  "scripts": {
28
- "dev": "bun --cwd docs-site dev"
29
+ "dev": "bun --cwd docs-site dev",
30
+ "typecheck": "tsc --noEmit",
31
+ "build:app-bundle": "bash ./bin/lattices-dev build",
32
+ "prepack": "bash ./bin/lattices-dev build"
29
33
  },
30
34
  "type": "module",
31
- "engines": {
32
- "node": ">=18"
33
- },
34
35
  "os": ["darwin"],
35
36
  "files": [
36
37
  "bin",
38
+ "app/Info.plist",
39
+ "app/Lattices.app",
40
+ "app/Lattices.entitlements",
37
41
  "app/Package.swift",
42
+ "app/Resources",
38
43
  "app/Sources",
39
- "app/Lattices.app/Contents/Info.plist",
44
+ "app/Tests",
45
+ "assets/AppIcon.icns",
40
46
  "docs"
41
- ]
47
+ ],
48
+ "devDependencies": {
49
+ "bun-types": "^1.3.10",
50
+ "typescript": "^5.9.3"
51
+ },
52
+ "dependencies": {
53
+ "@ai-sdk/anthropic": "^3.0.58",
54
+ "@ai-sdk/google": "^3.0.43",
55
+ "@ai-sdk/openai": "^3.0.41",
56
+ "@ai-sdk/xai": "^3.0.67",
57
+ "@arach/speakeasy": "^0.2.8",
58
+ "ai": "^6.0.116"
59
+ }
42
60
  }
package/bin/client.js DELETED
@@ -1,4 +0,0 @@
1
- // Public API — re-exports from daemon-client for a cleaner import path.
2
- // Usage: import { daemonCall, isDaemonRunning } from '@lattices/cli'
3
-
4
- export { daemonCall, isDaemonRunning } from "./daemon-client.js";