@lattices/cli 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -9
- package/app/Info.plist +30 -0
- package/app/Lattices.app/Contents/Info.plist +8 -2
- package/app/Lattices.app/Contents/MacOS/Lattices +0 -0
- package/app/Lattices.app/Contents/Resources/AppIcon.icns +0 -0
- package/app/Lattices.app/Contents/Resources/tap.wav +0 -0
- package/app/Lattices.app/Contents/_CodeSignature/CodeResources +139 -0
- package/app/Lattices.entitlements +15 -0
- package/app/Package.swift +8 -1
- package/app/Resources/tap.wav +0 -0
- package/app/Sources/AdvisorLearningStore.swift +90 -0
- package/app/Sources/AgentSession.swift +377 -0
- package/app/Sources/AppDelegate.swift +45 -12
- package/app/Sources/AppShellView.swift +81 -8
- package/app/Sources/AudioProvider.swift +386 -0
- package/app/Sources/CheatSheetHUD.swift +261 -19
- package/app/Sources/DaemonProtocol.swift +13 -0
- package/app/Sources/DaemonServer.swift +8 -0
- package/app/Sources/DesktopModel.swift +189 -6
- package/app/Sources/DesktopModelTypes.swift +2 -0
- package/app/Sources/DiagnosticLog.swift +104 -2
- package/app/Sources/EventBus.swift +1 -0
- package/app/Sources/HUDBottomBar.swift +279 -0
- package/app/Sources/HUDController.swift +1158 -0
- package/app/Sources/HUDLeftBar.swift +849 -0
- package/app/Sources/HUDMinimap.swift +179 -0
- package/app/Sources/HUDRightBar.swift +774 -0
- package/app/Sources/HUDState.swift +367 -0
- package/app/Sources/HUDTopBar.swift +243 -0
- package/app/Sources/HandsOffSession.swift +802 -0
- package/app/Sources/HomeDashboardView.swift +125 -0
- package/app/Sources/HotkeyManager.swift +2 -0
- package/app/Sources/HotkeyStore.swift +49 -9
- package/app/Sources/IntentEngine.swift +962 -0
- package/app/Sources/Intents/CreateLayerIntent.swift +54 -0
- package/app/Sources/Intents/DistributeIntent.swift +56 -0
- package/app/Sources/Intents/FocusIntent.swift +69 -0
- package/app/Sources/Intents/HelpIntent.swift +41 -0
- package/app/Sources/Intents/KillIntent.swift +47 -0
- package/app/Sources/Intents/LatticeIntent.swift +78 -0
- package/app/Sources/Intents/LaunchIntent.swift +67 -0
- package/app/Sources/Intents/ListSessionsIntent.swift +32 -0
- package/app/Sources/Intents/ListWindowsIntent.swift +30 -0
- package/app/Sources/Intents/ScanIntent.swift +52 -0
- package/app/Sources/Intents/SearchIntent.swift +190 -0
- package/app/Sources/Intents/SwitchLayerIntent.swift +50 -0
- package/app/Sources/Intents/TileIntent.swift +61 -0
- package/app/Sources/LatticesApi.swift +1275 -30
- package/app/Sources/LauncherHUD.swift +348 -0
- package/app/Sources/MainView.swift +147 -44
- package/app/Sources/MouseFinder.swift +222 -0
- package/app/Sources/OcrModel.swift +34 -1
- package/app/Sources/OmniSearchState.swift +99 -102
- package/app/Sources/OnboardingView.swift +457 -0
- package/app/Sources/PermissionChecker.swift +2 -12
- package/app/Sources/PiChatDock.swift +454 -0
- package/app/Sources/PiChatSession.swift +815 -0
- package/app/Sources/PiWorkspaceView.swift +364 -0
- package/app/Sources/PlacementSpec.swift +195 -0
- package/app/Sources/Preferences.swift +59 -0
- package/app/Sources/ProjectScanner.swift +58 -45
- package/app/Sources/ScreenMapState.swift +701 -55
- package/app/Sources/ScreenMapView.swift +843 -103
- package/app/Sources/ScreenMapWindowController.swift +22 -0
- package/app/Sources/SessionLayerStore.swift +285 -0
- package/app/Sources/SessionManager.swift +4 -1
- package/app/Sources/SettingsView.swift +186 -3
- package/app/Sources/Theme.swift +9 -8
- package/app/Sources/TmuxModel.swift +7 -0
- package/app/Sources/TmuxQuery.swift +27 -3
- package/app/Sources/VoiceChatView.swift +192 -0
- package/app/Sources/VoiceCommandWindow.swift +1594 -0
- package/app/Sources/VoiceIntentResolver.swift +671 -0
- package/app/Sources/VoxClient.swift +454 -0
- package/app/Sources/WindowTiler.swift +348 -87
- package/app/Sources/WorkspaceManager.swift +127 -18
- package/app/Tests/StageDragTests.swift +333 -0
- package/app/Tests/StageJoinTests.swift +313 -0
- package/app/Tests/StageManagerTests.swift +280 -0
- package/app/Tests/StageTileTests.swift +353 -0
- package/assets/AppIcon.icns +0 -0
- package/bin/client.ts +16 -0
- package/bin/{daemon-client.js → daemon-client.ts} +49 -30
- package/bin/handsoff-infer.ts +280 -0
- package/bin/handsoff-worker.ts +740 -0
- package/bin/lattices-app.ts +338 -0
- package/bin/lattices-dev +208 -0
- package/bin/{lattices.js → lattices.ts} +777 -140
- package/bin/project-twin.ts +645 -0
- package/docs/agent-execution-plan.md +562 -0
- package/docs/agent-layer-guide.md +207 -0
- package/docs/agents.md +142 -0
- package/docs/api.md +153 -34
- package/docs/app.md +29 -1
- package/docs/config.md +5 -1
- package/docs/handsoff-test-scenarios.md +84 -0
- package/docs/layers.md +20 -20
- package/docs/ocr.md +14 -5
- package/docs/overview.md +5 -1
- package/docs/presentation-execution-review.md +491 -0
- package/docs/prompts/hands-off-system.md +374 -0
- package/docs/prompts/hands-off-turn.md +30 -0
- package/docs/prompts/voice-advisor.md +31 -0
- package/docs/prompts/voice-fallback.md +23 -0
- package/docs/tiling-reference.md +167 -0
- package/docs/twins.md +138 -0
- package/docs/voice-command-protocol.md +278 -0
- package/docs/voice.md +219 -0
- package/package.json +29 -11
- package/bin/client.js +0 -4
- package/bin/lattices-app.js +0 -221
package/docs/twins.md
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Project Twins
|
|
3
|
+
description: Pi-backed project twins for mediated, persistent agent execution
|
|
4
|
+
order: 3
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
A project twin is a persistent software counterpart to a codebase.
|
|
8
|
+
|
|
9
|
+
It is not the primary agent. It is the project-native runtime that sits
|
|
10
|
+
between a general-purpose caller and the project's execution protocol.
|
|
11
|
+
|
|
12
|
+
## Why a twin exists
|
|
13
|
+
|
|
14
|
+
General-purpose agents are interchangeable. Project protocols are not.
|
|
15
|
+
|
|
16
|
+
If every primary agent has to learn the project's tool surface, memory
|
|
17
|
+
policy, protocol semantics, and context conventions from scratch, the
|
|
18
|
+
integration becomes brittle. A twin fixes that by becoming the stable
|
|
19
|
+
project-facing runtime:
|
|
20
|
+
|
|
21
|
+
- The **primary agent** asks for work
|
|
22
|
+
- The **twin** resumes with the right context and memory
|
|
23
|
+
- The **protocol** stays behind the twin boundary
|
|
24
|
+
|
|
25
|
+
```text
|
|
26
|
+
primary agent -> project twin -> project protocol / harness
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
The twin is the client of record for the project.
|
|
30
|
+
|
|
31
|
+
## Responsibilities
|
|
32
|
+
|
|
33
|
+
A project twin owns:
|
|
34
|
+
|
|
35
|
+
- Project-scoped identity
|
|
36
|
+
- Persistent session continuity
|
|
37
|
+
- Memory compaction and continuation
|
|
38
|
+
- Tool policy and allowed capabilities
|
|
39
|
+
- Protocol knowledge
|
|
40
|
+
- Project context assembly
|
|
41
|
+
- Caller-facing summaries and handoffs
|
|
42
|
+
|
|
43
|
+
A primary agent should not speak the project protocol directly. It should
|
|
44
|
+
invoke the twin.
|
|
45
|
+
|
|
46
|
+
## Pi-backed runtime
|
|
47
|
+
|
|
48
|
+
Pi is a good fit for the twin runtime because it already provides:
|
|
49
|
+
|
|
50
|
+
- Persistent sessions
|
|
51
|
+
- RPC mode for long-running subprocess integration
|
|
52
|
+
- Tool calling with an explicit harness
|
|
53
|
+
- Compaction and summarization hooks
|
|
54
|
+
- Context files, prompt templates, and extension loading
|
|
55
|
+
|
|
56
|
+
That makes the split:
|
|
57
|
+
|
|
58
|
+
- **Twin**: product concept and policy boundary
|
|
59
|
+
- **Pi**: reasoning and session runtime
|
|
60
|
+
- **Host system**: orchestration, durable memory, and protocol adapters
|
|
61
|
+
|
|
62
|
+
Pi powers the twin. It does not define the twin.
|
|
63
|
+
|
|
64
|
+
## Invocation model
|
|
65
|
+
|
|
66
|
+
The primary agent makes a single mediated call into the twin:
|
|
67
|
+
|
|
68
|
+
1. Resume the twin session
|
|
69
|
+
2. Inject caller context, project memory, and protocol state
|
|
70
|
+
3. Let the twin do project-local work inside the harness
|
|
71
|
+
4. Return a concise result to the caller
|
|
72
|
+
|
|
73
|
+
The caller should see a stable capability surface such as:
|
|
74
|
+
|
|
75
|
+
- `status`
|
|
76
|
+
- `inspect`
|
|
77
|
+
- `plan`
|
|
78
|
+
- `execute`
|
|
79
|
+
- `summarize`
|
|
80
|
+
- `handoff`
|
|
81
|
+
|
|
82
|
+
It should not see raw protocol-shaped operations unless that protocol is
|
|
83
|
+
itself the public product surface.
|
|
84
|
+
|
|
85
|
+
## Implementation in this repo
|
|
86
|
+
|
|
87
|
+
This repo now includes a Pi-backed runtime in
|
|
88
|
+
[`bin/project-twin.ts`](/Users/arach/dev/lattices/bin/project-twin.ts).
|
|
89
|
+
|
|
90
|
+
The runtime:
|
|
91
|
+
|
|
92
|
+
- Spawns `pi --mode rpc` as a persistent subprocess
|
|
93
|
+
- Stores project-local session state under `.openscout/twins/<name>/sessions`
|
|
94
|
+
- Exposes a stable `invoke()` API for callers
|
|
95
|
+
- Optionally injects OpenScout relay context if `.openscout/relay*` exists
|
|
96
|
+
|
|
97
|
+
The default harness is intentionally narrow:
|
|
98
|
+
|
|
99
|
+
- Built-in Pi tools are explicitly pinned to `read,bash,edit,write`
|
|
100
|
+
- Extension, skill, and prompt-template discovery are disabled by default
|
|
101
|
+
- Project instructions still come from `AGENTS.md` and related context files
|
|
102
|
+
|
|
103
|
+
This keeps the twin deterministic unless the host explicitly widens the
|
|
104
|
+
surface.
|
|
105
|
+
|
|
106
|
+
## Example
|
|
107
|
+
|
|
108
|
+
```ts
|
|
109
|
+
import { ProjectTwin } from "@lattices/cli"
|
|
110
|
+
|
|
111
|
+
const twin = new ProjectTwin({
|
|
112
|
+
cwd: "/Users/you/dev/my-project",
|
|
113
|
+
name: "my-project",
|
|
114
|
+
model: "anthropic/claude-sonnet-4-5",
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
await twin.start()
|
|
118
|
+
|
|
119
|
+
const result = await twin.invoke({
|
|
120
|
+
caller: "primary-agent",
|
|
121
|
+
protocol: "openscout-relay",
|
|
122
|
+
memory: "The caller is debugging relay enrollment and wants the next safe action.",
|
|
123
|
+
task: "Inspect the available project context and summarize what the caller should do next.",
|
|
124
|
+
})
|
|
125
|
+
|
|
126
|
+
console.log(result.text)
|
|
127
|
+
|
|
128
|
+
await twin.stop()
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Design rule
|
|
132
|
+
|
|
133
|
+
All project-specific protocol semantics should live behind the twin
|
|
134
|
+
boundary.
|
|
135
|
+
|
|
136
|
+
The primary agent should invoke the twin as a skill-like capability.
|
|
137
|
+
The twin should own context assembly, protocol interaction, and the final
|
|
138
|
+
handoff back to the caller.
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
# Voice Command Protocol — Lattices ↔ Vox
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Lattices delegates all audio capture and transcription to Vox via WebSocket JSON-RPC. Lattices never accesses the microphone directly — it borrows Vox's mic and transcription pipeline, receives English text back, and routes it through its own intent engine.
|
|
6
|
+
|
|
7
|
+
These dictations are **ephemeral** — Vox does not persist them as memos, sync them, or add them to Vox's history. Lattices is just using Vox as a transcription pipe.
|
|
8
|
+
|
|
9
|
+
## Vox Process Model
|
|
10
|
+
|
|
11
|
+
Vox consists of three independent processes:
|
|
12
|
+
|
|
13
|
+
| Process | Role | Relevance to Lattices |
|
|
14
|
+
|---|---|---|
|
|
15
|
+
| **Vox.app** | Main UI — menu bar, notch visualization, memo history | None |
|
|
16
|
+
| **Vox** | Background service — mic access, recording, hotkeys, orchestrates transcription, state notifications | **This is what Lattices connects to** |
|
|
17
|
+
| **VoxEngine** | Transcription engine — runs Whisper models, called by Vox internally | Indirect — Vox delegates to it |
|
|
18
|
+
|
|
19
|
+
Vox is the right target because:
|
|
20
|
+
- It owns the mic and recording lifecycle
|
|
21
|
+
- It's the long-running background process (always up when Vox is installed)
|
|
22
|
+
- It already orchestrates the record → transcribe → result pipeline
|
|
23
|
+
- It's easy to discover via its existing DistributedNotification
|
|
24
|
+
|
|
25
|
+
## Service Discovery
|
|
26
|
+
|
|
27
|
+
Lattices never hardcodes ports. Discovery uses two mechanisms:
|
|
28
|
+
|
|
29
|
+
### 1. Well-known file (at rest)
|
|
30
|
+
|
|
31
|
+
Vox writes its service configuration on startup:
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
~/.vox/services.json
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"agent": {"port": 19823, "pid": 48209},
|
|
40
|
+
"engine": {"port": 19821, "pid": 48210},
|
|
41
|
+
"sync": {"port": 19820, "pid": 48208},
|
|
42
|
+
"inference": {"port": 19822, "pid": 48212}
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Lattices reads `agent.port` from this file. If the file doesn't exist, Vox isn't installed.
|
|
47
|
+
|
|
48
|
+
### 2. DistributedNotification (live discovery)
|
|
49
|
+
|
|
50
|
+
Vox posts when it comes online:
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
Notification: com.jdi.vox.agent.live.ready
|
|
54
|
+
UserInfo: {"agentPort": 19823, "pid": 48209}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Lattices subscribes to this on startup. Handles:
|
|
58
|
+
- **Vox launches after Lattices** — Lattices picks up the port dynamically
|
|
59
|
+
- **Vox restarts** — Lattices reconnects with the new port
|
|
60
|
+
- **Port changes** — no stale config
|
|
61
|
+
|
|
62
|
+
### 3. Health check
|
|
63
|
+
|
|
64
|
+
After discovering a port, Lattices confirms Vox is alive:
|
|
65
|
+
|
|
66
|
+
```json
|
|
67
|
+
→ {"id": "hc", "method": "ping"}
|
|
68
|
+
← {"id": "hc", "result": {"pong": true}}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
If ping fails, Lattices marks voice as unavailable and retries on the next `live.ready` or after ~30 seconds.
|
|
72
|
+
|
|
73
|
+
### When Vox is not running
|
|
74
|
+
|
|
75
|
+
Three possible states:
|
|
76
|
+
|
|
77
|
+
| State | How detected | Lattices behavior |
|
|
78
|
+
|---|---|---|
|
|
79
|
+
| **Not installed** | `/Applications/Vox.app` doesn't exist and no `~/.vox/` dir | Footer: `[Space] Voice (unavailable)` — no recovery action |
|
|
80
|
+
| **Installed but not running** | App bundle exists, but `services.json` missing/stale or ping fails | Footer: `[Space] Voice (start Vox)` — pressing Space runs `open /Applications/Vox.app`, which brings up Vox as a side effect |
|
|
81
|
+
| **Running** | Ping succeeds | Normal operation |
|
|
82
|
+
|
|
83
|
+
Launch-on-demand flow:
|
|
84
|
+
1. User presses Space while Vox is down but Vox is installed
|
|
85
|
+
2. Lattices runs `NSWorkspace.shared.open(URL(fileURLWithPath: "/Applications/Vox.app"))`
|
|
86
|
+
3. Feedback strip shows "Starting Vox..."
|
|
87
|
+
4. Lattices waits for `live.ready` notification (timeout: 10s)
|
|
88
|
+
5. On `live.ready`, connects and proceeds with `startDictation`
|
|
89
|
+
6. On timeout, shows "Couldn't reach Vox — try opening it manually"
|
|
90
|
+
|
|
91
|
+
Passive behavior (no user action):
|
|
92
|
+
- No log spam — just a quiet unavailable state
|
|
93
|
+
- Lattices keeps listening for `live.ready` and re-checks `services.json` periodically (~30s)
|
|
94
|
+
- The moment Vox comes online, voice becomes available — no restart needed
|
|
95
|
+
|
|
96
|
+
## Protocol
|
|
97
|
+
|
|
98
|
+
### Wire Format
|
|
99
|
+
|
|
100
|
+
Uses Vox's JSON-RPC format over WebSocket:
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
Request: {"id": "...", "method": "...", "params": {...}}
|
|
104
|
+
Response: {"id": "...", "result": {...}} or {"id": "...", "error": "..."}
|
|
105
|
+
Event: {"event": "...", "data": {...}} (server push, no id)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Methods (Lattices → Vox)
|
|
109
|
+
|
|
110
|
+
**`startDictation`** — Start recording from the mic.
|
|
111
|
+
|
|
112
|
+
```json
|
|
113
|
+
{"id": "1", "method": "startDictation", "params": {
|
|
114
|
+
"source": "lattices",
|
|
115
|
+
"persist": false
|
|
116
|
+
}}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
- `source` — identifies the caller (for Vox's logging/UI)
|
|
120
|
+
- `persist: false` — do not save as a memo, do not sync, do not show in Vox history
|
|
121
|
+
|
|
122
|
+
Response (immediate ack):
|
|
123
|
+
```json
|
|
124
|
+
{"id": "1", "result": {"ok": true}}
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Error responses:
|
|
128
|
+
```json
|
|
129
|
+
{"id": "1", "error": "Microphone access denied"}
|
|
130
|
+
{"id": "1", "error": "No model loaded"}
|
|
131
|
+
{"id": "1", "error": "mic_busy", "owner": "vox"}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
The `mic_busy` error means another consumer (Vox's own memo recording, or another client) already has an active dictation. The `owner` field identifies who holds the mic. Lattices shows: "Mic in use by Vox — finish your memo first".
|
|
135
|
+
|
|
136
|
+
The reverse case (user hits Vox hotkey while Lattices has the mic) is handled on Vox's side — it should reject its own recording with an equivalent busy state. Vox is the single owner of mic arbitration.
|
|
137
|
+
|
|
138
|
+
**`stopDictation`** — Stop recording and return the transcript.
|
|
139
|
+
|
|
140
|
+
```json
|
|
141
|
+
{"id": "2", "method": "stopDictation"}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Response (after transcription completes):
|
|
145
|
+
```json
|
|
146
|
+
{"id": "2", "result": {
|
|
147
|
+
"transcript": "tile this left",
|
|
148
|
+
"confidence": 0.94,
|
|
149
|
+
"durationMs": 1820
|
|
150
|
+
}}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**`cancelDictation`** — Abort without transcribing.
|
|
154
|
+
|
|
155
|
+
```json
|
|
156
|
+
{"id": "3", "method": "cancelDictation"}
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
```json
|
|
160
|
+
{"id": "3", "result": {"ok": true}}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Events (Vox → Lattices)
|
|
164
|
+
|
|
165
|
+
Pushed over the WebSocket connection during an active dictation.
|
|
166
|
+
|
|
167
|
+
| Event | When | Data |
|
|
168
|
+
|---|---|---|
|
|
169
|
+
| `dictation.started` | Mic is hot, recording has begun | `{"source": "lattices"}` |
|
|
170
|
+
| `dictation.transcribing` | Recording stopped, model is running | `{}` |
|
|
171
|
+
| `dictation.result` | Transcription complete | `{"transcript": "...", "confidence": 0.94, "durationMs": 1820}` |
|
|
172
|
+
| `dictation.error` | Something failed during recording or transcription | `{"message": "..."}` |
|
|
173
|
+
|
|
174
|
+
## Disconnect Contract
|
|
175
|
+
|
|
176
|
+
If the WebSocket connection drops mid-dictation (Lattices crashes, user quits, network hiccup), Vox **must** auto-cancel the in-flight dictation:
|
|
177
|
+
|
|
178
|
+
1. Stop recording immediately
|
|
179
|
+
2. Discard any captured audio — do not transcribe
|
|
180
|
+
3. Release the mic so Vox's own UI or a reconnecting client can use it
|
|
181
|
+
4. Log the orphaned dictation for diagnostics: `[dictation] orphaned session from lattices — connection dropped, auto-cancelled`
|
|
182
|
+
|
|
183
|
+
Vox treats a closed WebSocket as an implicit `cancelDictation`. No grace period, no buffering — if the consumer is gone, the recording is worthless.
|
|
184
|
+
|
|
185
|
+
On the Lattices side, if the connection drops while in `listening` or `transcribing` state:
|
|
186
|
+
- Feedback strip: "Connection lost" (red)
|
|
187
|
+
- Attempt reconnect via normal discovery (ping → `services.json` → wait for `live.ready`)
|
|
188
|
+
- Do not auto-retry the dictation — the user needs to press Space again
|
|
189
|
+
|
|
190
|
+
## End-to-End Lifecycle
|
|
191
|
+
|
|
192
|
+
```mermaid
|
|
193
|
+
sequenceDiagram
|
|
194
|
+
participant U as User
|
|
195
|
+
participant L as Lattices UI
|
|
196
|
+
participant TA as Vox
|
|
197
|
+
participant IE as Intent Engine
|
|
198
|
+
|
|
199
|
+
U->>L: Press Space (in cheat sheet)
|
|
200
|
+
L->>TA: startDictation (persist: false)
|
|
201
|
+
|
|
202
|
+
alt Error
|
|
203
|
+
TA-->>L: error (mic denied / no model)
|
|
204
|
+
L->>U: Red text in feedback strip
|
|
205
|
+
else OK
|
|
206
|
+
TA-->>L: {ok: true}
|
|
207
|
+
TA-->>L: dictation.started
|
|
208
|
+
L->>U: Green dot (pulsing) + "Listening..."
|
|
209
|
+
|
|
210
|
+
Note over U,TA: User speaks...
|
|
211
|
+
|
|
212
|
+
U->>L: Press Space again
|
|
213
|
+
L->>TA: stopDictation
|
|
214
|
+
TA-->>L: dictation.transcribing
|
|
215
|
+
L->>U: "Transcribing..."
|
|
216
|
+
|
|
217
|
+
TA-->>L: {transcript: "tile this left", confidence: 0.94}
|
|
218
|
+
L->>U: Show transcript
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
L->>IE: Classify via NLEmbedding
|
|
222
|
+
IE-->>L: intent: tile_window, slots: {position: left}, confidence: 0.95
|
|
223
|
+
L->>U: Show intent + slots
|
|
224
|
+
|
|
225
|
+
L->>IE: Execute
|
|
226
|
+
IE-->>L: result
|
|
227
|
+
L->>U: "Done" or error
|
|
228
|
+
|
|
229
|
+
Note over L: Log entry written
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
## UI States
|
|
233
|
+
|
|
234
|
+
| State | Feedback strip | Footer |
|
|
235
|
+
|---|---|---|
|
|
236
|
+
| **Idle** | Hidden | `[Space] Voice [ESC] Dismiss` |
|
|
237
|
+
| **Not installed** | Hidden | `[Space] Voice (unavailable) [ESC] Dismiss` |
|
|
238
|
+
| **Installed, not running** | Hidden | `[Space] Voice (start Vox) [ESC] Dismiss` |
|
|
239
|
+
| **Starting** | "Starting Vox..." | `[ESC] Cancel` |
|
|
240
|
+
| **Error** | Red: "Mic access denied" or "Mic in use by Vox" | `[ESC] Dismiss` |
|
|
241
|
+
| **Disconnected** | Red: "Connection lost" | `[ESC] Dismiss` |
|
|
242
|
+
| **Listening** | Green dot + "Listening..." | `[Space] Stop [ESC] Cancel` |
|
|
243
|
+
| **Transcribing** | "Transcribing..." | `[ESC] Cancel` |
|
|
244
|
+
| **Result** | `"tile this left"` → `tile window · position: left` → `Done` | `[Space] New [ESC] Dismiss` |
|
|
245
|
+
|
|
246
|
+
## Logging
|
|
247
|
+
|
|
248
|
+
Every voice command produces a diagnostic log entry:
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
[voice] "tile this left" → tile_window(position: left) → ok (conf=0.95, 1820ms)
|
|
252
|
+
[voice] "organize my stuff" → distribute() → ok (conf=0.79, 2100ms)
|
|
253
|
+
[voice] "do something weird" → (no match, conf=0.41, 900ms)
|
|
254
|
+
[voice] error: Vox not running
|
|
255
|
+
[voice] error: mic_busy (owner: vox)
|
|
256
|
+
[voice] error: connection dropped mid-dictation
|
|
257
|
+
[voice] launched Vox, connected in 2.1s
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Implementation Scope
|
|
261
|
+
|
|
262
|
+
### Lattices side
|
|
263
|
+
- Use `@vox/client` SDK (`VoxClient` with `service: "agent"`, `clientId: "lattices"`, `capabilities: ["dictation"]`) — see `vox/sdk/SDK.md` for full reference
|
|
264
|
+
- Replace `AVAudioRecorder` in `VoxAudioProvider` with `createDictationSession().start({ persist: false })`
|
|
265
|
+
- Remove mic entitlement and `NSMicrophoneUsageDescription` (Lattices never touches the mic)
|
|
266
|
+
- Service discovery, auto-reconnect, and auth are handled by the SDK
|
|
267
|
+
- Map `DictationSession` events (`stateChange`, `partialTranscript`, `finalTranscript`, `error`) to cheat sheet UI states
|
|
268
|
+
- Handle `MicBusyError` — show `"Mic in use by ${error.owner}"`
|
|
269
|
+
|
|
270
|
+
### Vox side (separate repo)
|
|
271
|
+
- Expose a WebSocket bridge (or add methods to existing bridge)
|
|
272
|
+
- Add `startDictation`, `stopDictation`, `cancelDictation` handlers
|
|
273
|
+
- Emit `dictation.started`, `dictation.transcribing`, `dictation.result`, `dictation.error` events
|
|
274
|
+
- Honor `persist: false` — skip memo creation and sync
|
|
275
|
+
- Write `~/.vox/services.json` on startup (all service ports)
|
|
276
|
+
- Include `agentPort` in `live.ready` notification userInfo
|
|
277
|
+
- Return `mic_busy` error with `owner` field when another consumer holds the mic
|
|
278
|
+
- Auto-cancel dictation on WebSocket disconnect (closed socket = implicit cancel)
|
package/docs/voice.md
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Voice Commands
|
|
3
|
+
description: Natural language voice control for window management
|
|
4
|
+
order: 7
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Voice commands let you control Lattices by speaking. Press **Hyper+3**
|
|
8
|
+
to open the voice command window, hold **Option** to speak, release to
|
|
9
|
+
stop. Lattices transcribes your speech via Vox,
|
|
10
|
+
matches it to an intent, and executes it.
|
|
11
|
+
|
|
12
|
+
## Quick start
|
|
13
|
+
|
|
14
|
+
1. Install Vox (provides mic + transcription)
|
|
15
|
+
2. Install [Claude Code](https://claude.ai/code) CLI (provides AI advisor)
|
|
16
|
+
3. Press **Hyper+3** to open the voice command window
|
|
17
|
+
4. Hold **Option** and speak a command
|
|
18
|
+
5. Release **Option** — Lattices transcribes and executes
|
|
19
|
+
|
|
20
|
+
## Keyboard shortcuts
|
|
21
|
+
|
|
22
|
+
| Key | Action |
|
|
23
|
+
|-----|--------|
|
|
24
|
+
| **Hyper+3** | Open/close voice command window |
|
|
25
|
+
| **⌥ (hold)** | Push-to-talk — hold to record, release to stop |
|
|
26
|
+
| **Tab** | Arm/disarm the mic |
|
|
27
|
+
| **Escape** | Cancel recording or dismiss window |
|
|
28
|
+
|
|
29
|
+
## Built-in commands
|
|
30
|
+
|
|
31
|
+
### Search
|
|
32
|
+
|
|
33
|
+
Find windows by app name, title, content, or category.
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
"Find all vox windows"
|
|
37
|
+
"Find terminals" → expands to iTerm, Terminal, Warp, etc.
|
|
38
|
+
"Show me all browsers" → expands to Safari, Chrome, Firefox, Arc, etc.
|
|
39
|
+
"Where is my editor?" → expands to VS Code, Cursor, Xcode, etc.
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Category synonyms are built in — saying "terminals", "browsers", "editors",
|
|
43
|
+
"chat", "music", "mail", or "notes" automatically expands to search for
|
|
44
|
+
the actual app names.
|
|
45
|
+
|
|
46
|
+
### Tile
|
|
47
|
+
|
|
48
|
+
Move windows to screen positions.
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
"Tile this left"
|
|
52
|
+
"Snap to the right half"
|
|
53
|
+
"Maximize the window"
|
|
54
|
+
"Put this in the top right corner"
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Voice tiling should resolve into the same canonical daemon mutation used
|
|
58
|
+
by other agent surfaces: `window.place`.
|
|
59
|
+
|
|
60
|
+
### Focus
|
|
61
|
+
|
|
62
|
+
Bring a window or app to the front.
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
"Focus Safari"
|
|
66
|
+
"Switch to Slack"
|
|
67
|
+
"Go to the lattices window"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Open / Launch
|
|
71
|
+
|
|
72
|
+
Open applications or project workspaces.
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
"Open Spotify"
|
|
76
|
+
"Launch the vox project"
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Kill
|
|
80
|
+
|
|
81
|
+
Close windows or quit applications.
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
"Kill this window"
|
|
85
|
+
"Close Safari"
|
|
86
|
+
"Quit Spotify"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Scan
|
|
90
|
+
|
|
91
|
+
Trigger an OCR scan of visible windows.
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
"Scan the screen"
|
|
95
|
+
"Read what's on screen"
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Other
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
"List all windows"
|
|
102
|
+
"Show my sessions"
|
|
103
|
+
"Switch to layer 2"
|
|
104
|
+
"Help"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## AI advisor
|
|
108
|
+
|
|
109
|
+
Every voice command fires a Claude Haiku advisor in parallel. The
|
|
110
|
+
advisor provides commentary and follow-up suggestions in the **AI
|
|
111
|
+
corner** (bottom-right of the voice command window).
|
|
112
|
+
|
|
113
|
+
When local matching handles the command well, the AI corner shows
|
|
114
|
+
"no AI needed" with an optional "ask AI" button. When the advisor
|
|
115
|
+
has something useful, it shows a one-line comment and an actionable
|
|
116
|
+
suggestion button.
|
|
117
|
+
|
|
118
|
+
### How it works
|
|
119
|
+
|
|
120
|
+
1. You speak a command
|
|
121
|
+
2. Local intent matching runs immediately (fast, free)
|
|
122
|
+
3. Haiku advisor runs in parallel (takes ~2-5 seconds)
|
|
123
|
+
4. If the advisor suggests something, a button appears in the AI corner
|
|
124
|
+
5. Click the suggestion to execute it
|
|
125
|
+
6. If you engage with a suggestion that the local matcher missed,
|
|
126
|
+
it's recorded in `~/.lattices/advisor-learning.jsonl` for future
|
|
127
|
+
improvement
|
|
128
|
+
|
|
129
|
+
### Session persistence
|
|
130
|
+
|
|
131
|
+
The advisor maintains a conversation session across voice commands.
|
|
132
|
+
It remembers what you've asked and what worked. When the context
|
|
133
|
+
reaches 75% of the model's limit, the session auto-resets.
|
|
134
|
+
|
|
135
|
+
Context usage and session cost are shown in the AI corner header.
|
|
136
|
+
|
|
137
|
+
## Configuration
|
|
138
|
+
|
|
139
|
+
Open **Settings > AI** to configure:
|
|
140
|
+
|
|
141
|
+
| Setting | Default | Description |
|
|
142
|
+
|---------|---------|-------------|
|
|
143
|
+
| Claude CLI path | Auto-detected | Path to the `claude` binary. Checks `~/.local/bin/claude`, `/usr/local/bin/claude`, `/opt/homebrew/bin/claude`, then `which claude`. |
|
|
144
|
+
| Advisor model | Haiku | `haiku` (fast, cheap) or `sonnet` (smarter, slower) |
|
|
145
|
+
| Budget per session | $0.50 | Maximum spend per Claude CLI invocation |
|
|
146
|
+
|
|
147
|
+
### Installing Claude Code
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
npm install -g @anthropic-ai/claude-code
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Or see [claude.ai/code](https://claude.ai/code) for other install methods.
|
|
154
|
+
|
|
155
|
+
## Layout
|
|
156
|
+
|
|
157
|
+
The voice command window has four sections:
|
|
158
|
+
|
|
159
|
+
| Section | Position | Content |
|
|
160
|
+
|---------|----------|---------|
|
|
161
|
+
| **History** | Left column | Past commands with expandable details |
|
|
162
|
+
| **Voice Command** | Center column | Current transcript, matched intent, results |
|
|
163
|
+
| **Log** | Top-right | Rolling diagnostic log (last 12 entries) |
|
|
164
|
+
| **AI Corner** | Bottom-right | Advisor commentary, suggestions, session stats |
|
|
165
|
+
|
|
166
|
+
## Search architecture
|
|
167
|
+
|
|
168
|
+
Voice search uses the same backend as `lattices search`:
|
|
169
|
+
|
|
170
|
+
1. **Quick search** — window titles, app names, session tags (instant)
|
|
171
|
+
2. **Complete search** — adds terminal cwd/processes + OCR content
|
|
172
|
+
3. **Synonym expansion** — category terms like "terminals" expand to
|
|
173
|
+
actual app names before searching
|
|
174
|
+
4. **Query cleanup** — strips natural language qualifiers ("and sort by...",
|
|
175
|
+
"please", "for me") before searching
|
|
176
|
+
|
|
177
|
+
## Processing resilience
|
|
178
|
+
|
|
179
|
+
- **15-second timeout** — if processing doesn't complete, returns to idle
|
|
180
|
+
- **Cancellation on dismiss** — closing the window cancels in-flight work
|
|
181
|
+
- **Double-execution prevention** — streaming and stop callbacks can't
|
|
182
|
+
both fire the intent
|
|
183
|
+
|
|
184
|
+
## Advisor learning
|
|
185
|
+
|
|
186
|
+
When the local matcher fails but the AI advisor suggests something that
|
|
187
|
+
you engage with, the interaction is recorded:
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
~/.lattices/advisor-learning.jsonl
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Each line is a JSON object:
|
|
194
|
+
|
|
195
|
+
```json
|
|
196
|
+
{
|
|
197
|
+
"timestamp": "2026-03-15T18:30:00.000Z",
|
|
198
|
+
"transcript": "find all terminals",
|
|
199
|
+
"localIntent": "search",
|
|
200
|
+
"localSlots": {"query": "terminals"},
|
|
201
|
+
"localResultCount": 0,
|
|
202
|
+
"advisorIntent": "search",
|
|
203
|
+
"advisorSlots": {"query": "iterm"},
|
|
204
|
+
"advisorLabel": "Search iTerm"
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
This dataset captures where the local system falls short and what the
|
|
209
|
+
right answer was. Future work can mine it for automatic synonym
|
|
210
|
+
mappings and phrase pattern improvements.
|
|
211
|
+
|
|
212
|
+
## Requirements
|
|
213
|
+
|
|
214
|
+
- **Vox** — provides microphone access and
|
|
215
|
+
speech-to-text transcription
|
|
216
|
+
- **[Claude Code](https://claude.ai/code)** CLI — provides the AI advisor
|
|
217
|
+
(optional, voice commands work without it but no AI suggestions)
|
|
218
|
+
- **Accessibility** permission — for window tiling and focus
|
|
219
|
+
- **Screen Recording** permission — for window discovery
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lattices/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.1",
|
|
4
4
|
"description": "Agentic window manager for macOS — programmable workspace, smart layouts, managed tmux sessions, and a 35+-method agent API",
|
|
5
5
|
"bin": {
|
|
6
|
-
"lattices": "./bin/lattices.
|
|
7
|
-
"lattices-app": "./bin/lattices-app.
|
|
6
|
+
"lattices": "./bin/lattices.ts",
|
|
7
|
+
"lattices-app": "./bin/lattices-app.ts"
|
|
8
8
|
},
|
|
9
9
|
"keywords": [
|
|
10
10
|
"tmux",
|
|
@@ -21,22 +21,40 @@
|
|
|
21
21
|
},
|
|
22
22
|
"license": "MIT",
|
|
23
23
|
"exports": {
|
|
24
|
-
".": "./bin/client.
|
|
25
|
-
"./daemon-client": "./bin/daemon-client.
|
|
24
|
+
".": "./bin/client.ts",
|
|
25
|
+
"./daemon-client": "./bin/daemon-client.ts",
|
|
26
|
+
"./project-twin": "./bin/project-twin.ts"
|
|
26
27
|
},
|
|
27
28
|
"scripts": {
|
|
28
|
-
"dev": "bun --cwd docs-site dev"
|
|
29
|
+
"dev": "bun --cwd docs-site dev",
|
|
30
|
+
"typecheck": "tsc --noEmit",
|
|
31
|
+
"build:app-bundle": "bash ./bin/lattices-dev build",
|
|
32
|
+
"prepack": "bash ./bin/lattices-dev build"
|
|
29
33
|
},
|
|
30
34
|
"type": "module",
|
|
31
|
-
"engines": {
|
|
32
|
-
"node": ">=18"
|
|
33
|
-
},
|
|
34
35
|
"os": ["darwin"],
|
|
35
36
|
"files": [
|
|
36
37
|
"bin",
|
|
38
|
+
"app/Info.plist",
|
|
39
|
+
"app/Lattices.app",
|
|
40
|
+
"app/Lattices.entitlements",
|
|
37
41
|
"app/Package.swift",
|
|
42
|
+
"app/Resources",
|
|
38
43
|
"app/Sources",
|
|
39
|
-
"app/
|
|
44
|
+
"app/Tests",
|
|
45
|
+
"assets/AppIcon.icns",
|
|
40
46
|
"docs"
|
|
41
|
-
]
|
|
47
|
+
],
|
|
48
|
+
"devDependencies": {
|
|
49
|
+
"bun-types": "^1.3.10",
|
|
50
|
+
"typescript": "^5.9.3"
|
|
51
|
+
},
|
|
52
|
+
"dependencies": {
|
|
53
|
+
"@ai-sdk/anthropic": "^3.0.58",
|
|
54
|
+
"@ai-sdk/google": "^3.0.43",
|
|
55
|
+
"@ai-sdk/openai": "^3.0.41",
|
|
56
|
+
"@ai-sdk/xai": "^3.0.67",
|
|
57
|
+
"@arach/speakeasy": "^0.2.8",
|
|
58
|
+
"ai": "^6.0.116"
|
|
59
|
+
}
|
|
42
60
|
}
|
package/bin/client.js
DELETED