@lattices/cli 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -9
- package/app/Package.swift +8 -1
- package/app/Sources/AdvisorLearningStore.swift +90 -0
- package/app/Sources/AgentSession.swift +377 -0
- package/app/Sources/AppDelegate.swift +44 -12
- package/app/Sources/AppShellView.swift +81 -8
- package/app/Sources/AudioProvider.swift +386 -0
- package/app/Sources/CheatSheetHUD.swift +261 -19
- package/app/Sources/DaemonProtocol.swift +13 -0
- package/app/Sources/DaemonServer.swift +8 -0
- package/app/Sources/DesktopModel.swift +164 -5
- package/app/Sources/DesktopModelTypes.swift +2 -0
- package/app/Sources/DiagnosticLog.swift +104 -2
- package/app/Sources/EventBus.swift +1 -0
- package/app/Sources/HUDBottomBar.swift +279 -0
- package/app/Sources/HUDController.swift +1158 -0
- package/app/Sources/HUDLeftBar.swift +849 -0
- package/app/Sources/HUDMinimap.swift +179 -0
- package/app/Sources/HUDRightBar.swift +774 -0
- package/app/Sources/HUDState.swift +367 -0
- package/app/Sources/HUDTopBar.swift +243 -0
- package/app/Sources/HandsOffSession.swift +733 -0
- package/app/Sources/HomeDashboardView.swift +125 -0
- package/app/Sources/HotkeyManager.swift +2 -0
- package/app/Sources/HotkeyStore.swift +45 -9
- package/app/Sources/IntentEngine.swift +925 -0
- package/app/Sources/Intents/CreateLayerIntent.swift +54 -0
- package/app/Sources/Intents/DistributeIntent.swift +56 -0
- package/app/Sources/Intents/FocusIntent.swift +69 -0
- package/app/Sources/Intents/HelpIntent.swift +41 -0
- package/app/Sources/Intents/KillIntent.swift +47 -0
- package/app/Sources/Intents/LatticeIntent.swift +78 -0
- package/app/Sources/Intents/LaunchIntent.swift +67 -0
- package/app/Sources/Intents/ListSessionsIntent.swift +32 -0
- package/app/Sources/Intents/ListWindowsIntent.swift +30 -0
- package/app/Sources/Intents/ScanIntent.swift +52 -0
- package/app/Sources/Intents/SearchIntent.swift +190 -0
- package/app/Sources/Intents/SwitchLayerIntent.swift +50 -0
- package/app/Sources/Intents/TileIntent.swift +61 -0
- package/app/Sources/LatticesApi.swift +1235 -30
- package/app/Sources/LauncherHUD.swift +348 -0
- package/app/Sources/MainView.swift +147 -44
- package/app/Sources/OcrModel.swift +34 -1
- package/app/Sources/OmniSearchState.swift +99 -102
- package/app/Sources/OnboardingView.swift +457 -0
- package/app/Sources/PermissionChecker.swift +2 -12
- package/app/Sources/PiChatDock.swift +454 -0
- package/app/Sources/PiChatSession.swift +815 -0
- package/app/Sources/PiWorkspaceView.swift +364 -0
- package/app/Sources/PlacementSpec.swift +195 -0
- package/app/Sources/Preferences.swift +59 -0
- package/app/Sources/ProjectScanner.swift +1 -1
- package/app/Sources/ScreenMapState.swift +701 -55
- package/app/Sources/ScreenMapView.swift +843 -103
- package/app/Sources/ScreenMapWindowController.swift +22 -0
- package/app/Sources/SessionLayerStore.swift +285 -0
- package/app/Sources/SessionManager.swift +4 -1
- package/app/Sources/SettingsView.swift +186 -3
- package/app/Sources/Theme.swift +9 -8
- package/app/Sources/TmuxModel.swift +7 -0
- package/app/Sources/TmuxQuery.swift +27 -3
- package/app/Sources/VoiceChatView.swift +192 -0
- package/app/Sources/VoiceCommandWindow.swift +1594 -0
- package/app/Sources/VoiceIntentResolver.swift +671 -0
- package/app/Sources/VoxClient.swift +454 -0
- package/app/Sources/WindowTiler.swift +348 -87
- package/app/Sources/WorkspaceManager.swift +127 -18
- package/bin/client.ts +16 -0
- package/bin/{daemon-client.js → daemon-client.ts} +49 -30
- package/bin/handsoff-infer.ts +280 -0
- package/bin/handsoff-worker.ts +731 -0
- package/bin/{lattices-app.js → lattices-app.ts} +67 -32
- package/bin/lattices-dev +160 -0
- package/bin/{lattices.js → lattices.ts} +600 -137
- package/bin/project-twin.ts +645 -0
- package/docs/agent-execution-plan.md +562 -0
- package/docs/agents.md +142 -0
- package/docs/api.md +153 -34
- package/docs/app.md +29 -1
- package/docs/config.md +5 -1
- package/docs/handsoff-test-scenarios.md +84 -0
- package/docs/layers.md +20 -20
- package/docs/ocr.md +14 -5
- package/docs/overview.md +5 -1
- package/docs/presentation-execution-review.md +491 -0
- package/docs/prompts/hands-off-system.md +374 -0
- package/docs/prompts/hands-off-turn.md +30 -0
- package/docs/prompts/voice-advisor.md +31 -0
- package/docs/prompts/voice-fallback.md +23 -0
- package/docs/tiling-reference.md +167 -0
- package/docs/twins.md +138 -0
- package/docs/voice-command-protocol.md +278 -0
- package/docs/voice.md +219 -0
- package/package.json +21 -10
- package/bin/client.js +0 -4
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
import AppKit
|
|
2
|
+
|
|
3
|
+
/// Hands-off voice mode: hotkey → listen → worker handles everything.
|
|
4
|
+
///
|
|
5
|
+
/// Architecture:
|
|
6
|
+
/// - Swift owns: hotkey, Vox dictation, action execution
|
|
7
|
+
/// - Worker owns: inference (Groq), TTS (streaming OpenAI), fast path matching, audio caching
|
|
8
|
+
/// - Worker is a long-running bun process, started once, communicates via JSON lines over stdio
|
|
9
|
+
///
|
|
10
|
+
/// The worker handles the full turn orchestration in parallel:
|
|
11
|
+
/// - Fast path: local match → cached ack + execute + cached confirm (~300ms)
|
|
12
|
+
/// - Slow path: cached ack ∥ Groq inference → streaming TTS ∥ execute (~2s)
|
|
13
|
+
|
|
14
|
+
// MARK: - Chat Log Entry
|
|
15
|
+
|
|
16
|
+
struct VoiceChatEntry: Identifiable, Equatable {
|
|
17
|
+
let id = UUID()
|
|
18
|
+
let timestamp: Date
|
|
19
|
+
let role: Role
|
|
20
|
+
let text: String
|
|
21
|
+
/// Optional structured data — actions taken, search results, etc.
|
|
22
|
+
/// Displayable in the chat log but not spoken.
|
|
23
|
+
let detail: String?
|
|
24
|
+
|
|
25
|
+
enum Role: String, Equatable {
|
|
26
|
+
case user // what the user said
|
|
27
|
+
case assistant // spoken response
|
|
28
|
+
case system // silent info (actions executed, search results, etc.)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static func == (lhs: VoiceChatEntry, rhs: VoiceChatEntry) -> Bool {
|
|
32
|
+
lhs.id == rhs.id
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
final class HandsOffSession: ObservableObject {
|
|
37
|
+
static let shared = HandsOffSession()
|
|
38
|
+
|
|
39
|
+
enum State: Equatable {
|
|
40
|
+
case idle
|
|
41
|
+
case connecting
|
|
42
|
+
case listening
|
|
43
|
+
case thinking
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
@Published var state: State = .idle
|
|
47
|
+
@Published var lastTranscript: String?
|
|
48
|
+
@Published var lastResponse: String?
|
|
49
|
+
@Published var audibleFeedbackEnabled: Bool = false
|
|
50
|
+
|
|
51
|
+
/// Recently executed actions — shown as playback in the HUD bottom bar
|
|
52
|
+
@Published var recentActions: [[String: Any]] = []
|
|
53
|
+
|
|
54
|
+
/// Frame history for undo — stores pre-move frames of windows touched by the last turn
|
|
55
|
+
struct FrameSnapshot {
|
|
56
|
+
let wid: UInt32
|
|
57
|
+
let pid: Int32
|
|
58
|
+
let frame: WindowFrame
|
|
59
|
+
}
|
|
60
|
+
private(set) var frameHistory: [FrameSnapshot] = []
|
|
61
|
+
|
|
62
|
+
/// Snapshot current frames for all windows that are about to be moved.
|
|
63
|
+
/// Stores frames in CG/AX coordinates (top-left origin) for direct use with batchRestoreWindows.
|
|
64
|
+
func snapshotFrames(wids: [UInt32]) {
|
|
65
|
+
frameHistory.removeAll()
|
|
66
|
+
guard let windowList = CGWindowListCopyWindowInfo([.optionAll, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else { return }
|
|
67
|
+
for wid in wids {
|
|
68
|
+
guard let entry = DesktopModel.shared.windows[wid] else { continue }
|
|
69
|
+
for info in windowList {
|
|
70
|
+
guard let num = info[kCGWindowNumber as String] as? UInt32, num == wid,
|
|
71
|
+
let dict = info[kCGWindowBounds as String] as? NSDictionary else { continue }
|
|
72
|
+
var rect = CGRect.zero
|
|
73
|
+
if CGRectMakeWithDictionaryRepresentation(dict, &rect) {
|
|
74
|
+
let frame = WindowFrame(x: rect.origin.x, y: rect.origin.y, w: rect.width, h: rect.height)
|
|
75
|
+
frameHistory.append(FrameSnapshot(wid: wid, pid: entry.pid, frame: frame))
|
|
76
|
+
}
|
|
77
|
+
break
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
func clearFrameHistory() {
|
|
83
|
+
frameHistory.removeAll()
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/// Running chat log — visible in the voice chat panel. Persists across turns.
|
|
87
|
+
@Published private(set) var chatLog: [VoiceChatEntry] = []
|
|
88
|
+
private let maxChatEntries = 50
|
|
89
|
+
|
|
90
|
+
private var turnCount = 0
|
|
91
|
+
@Published private(set) var conversationHistory: [[String: String]] = []
|
|
92
|
+
private let maxHistoryTurns = 10
|
|
93
|
+
|
|
94
|
+
// Long-running worker process
|
|
95
|
+
private var workerProcess: Process?
|
|
96
|
+
private var workerStdin: FileHandle?
|
|
97
|
+
private var workerBuffer = ""
|
|
98
|
+
private let workerQueue = DispatchQueue(label: "com.lattices.handsoff-worker", qos: .userInitiated)
|
|
99
|
+
private var lastCueAt: Date = .distantPast
|
|
100
|
+
|
|
101
|
+
/// JSONL log for full turn data — ~/.lattices/handsoff.jsonl
|
|
102
|
+
private let turnLogPath = NSHomeDirectory() + "/.lattices/handsoff.jsonl"
|
|
103
|
+
|
|
104
|
+
private init() {}
|
|
105
|
+
|
|
106
|
+
// MARK: - Chat Log
|
|
107
|
+
|
|
108
|
+
func appendChat(_ role: VoiceChatEntry.Role, text: String, detail: String? = nil) {
|
|
109
|
+
let entry = VoiceChatEntry(timestamp: Date(), role: role, text: text, detail: detail)
|
|
110
|
+
DispatchQueue.main.async {
|
|
111
|
+
self.chatLog.append(entry)
|
|
112
|
+
if self.chatLog.count > self.maxChatEntries {
|
|
113
|
+
self.chatLog.removeFirst(self.chatLog.count - self.maxChatEntries)
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
func clearChatLog() {
|
|
119
|
+
DispatchQueue.main.async { self.chatLog.removeAll() }
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// MARK: - Lifecycle
|
|
123
|
+
|
|
124
|
+
func start() {
|
|
125
|
+
startWorker()
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
func setAudibleFeedbackEnabled(_ enabled: Bool) {
|
|
129
|
+
audibleFeedbackEnabled = enabled
|
|
130
|
+
if enabled {
|
|
131
|
+
startWorker()
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
func playCachedCue(_ phrase: String) {
|
|
136
|
+
guard audibleFeedbackEnabled else { return }
|
|
137
|
+
let now = Date()
|
|
138
|
+
guard now.timeIntervalSince(lastCueAt) >= 0.2 else { return }
|
|
139
|
+
lastCueAt = now
|
|
140
|
+
startWorker()
|
|
141
|
+
sendToWorker(["cmd": "play_cached", "text": phrase])
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/// Append a full turn record to the JSONL log
|
|
145
|
+
private func logTurn(transcript: String, response: [String: Any], turnMs: Int) {
|
|
146
|
+
let snapshot = buildSnapshot()
|
|
147
|
+
var record: [String: Any] = [
|
|
148
|
+
"ts": ISO8601DateFormatter().string(from: Date()),
|
|
149
|
+
"turn": turnCount,
|
|
150
|
+
"transcript": transcript,
|
|
151
|
+
"turnMs": turnMs,
|
|
152
|
+
"snapshot": snapshot,
|
|
153
|
+
]
|
|
154
|
+
if let data = response["data"] as? [String: Any] {
|
|
155
|
+
record["actions"] = data["actions"]
|
|
156
|
+
record["spoken"] = data["spoken"]
|
|
157
|
+
record["meta"] = data["_meta"]
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
guard let jsonData = try? JSONSerialization.data(withJSONObject: record),
|
|
161
|
+
var line = String(data: jsonData, encoding: .utf8) else { return }
|
|
162
|
+
line += "\n"
|
|
163
|
+
|
|
164
|
+
if let handle = FileHandle(forWritingAtPath: turnLogPath) {
|
|
165
|
+
handle.seekToEndOfFile()
|
|
166
|
+
handle.write(line.data(using: .utf8)!)
|
|
167
|
+
handle.closeFile()
|
|
168
|
+
} else {
|
|
169
|
+
FileManager.default.createFile(atPath: turnLogPath, contents: line.data(using: .utf8))
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
private func startWorker() {
|
|
174
|
+
if workerProcess?.isRunning == true, workerStdin != nil {
|
|
175
|
+
return
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let bunPaths = [
|
|
179
|
+
NSHomeDirectory() + "/.bun/bin/bun",
|
|
180
|
+
"/usr/local/bin/bun",
|
|
181
|
+
"/opt/homebrew/bin/bun",
|
|
182
|
+
]
|
|
183
|
+
guard let bunPath = bunPaths.first(where: { FileManager.default.isExecutableFile(atPath: $0) }) else {
|
|
184
|
+
DiagnosticLog.shared.warn("HandsOff: bun not found, worker disabled")
|
|
185
|
+
return
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
let scriptPath = NSHomeDirectory() + "/dev/lattices/bin/handsoff-worker.ts"
|
|
189
|
+
guard FileManager.default.fileExists(atPath: scriptPath) else {
|
|
190
|
+
DiagnosticLog.shared.warn("HandsOff: worker script not found at \(scriptPath)")
|
|
191
|
+
return
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
let proc = Process()
|
|
195
|
+
proc.executableURL = URL(fileURLWithPath: bunPath)
|
|
196
|
+
proc.arguments = ["run", scriptPath]
|
|
197
|
+
proc.currentDirectoryURL = URL(fileURLWithPath: NSHomeDirectory() + "/dev/lattices")
|
|
198
|
+
|
|
199
|
+
var env = ProcessInfo.processInfo.environment
|
|
200
|
+
env.removeValue(forKey: "CLAUDECODE")
|
|
201
|
+
proc.environment = env
|
|
202
|
+
|
|
203
|
+
let inPipe = Pipe()
|
|
204
|
+
let outPipe = Pipe()
|
|
205
|
+
let errPipe = Pipe()
|
|
206
|
+
proc.standardInput = inPipe
|
|
207
|
+
proc.standardOutput = outPipe
|
|
208
|
+
proc.standardError = errPipe
|
|
209
|
+
|
|
210
|
+
do {
|
|
211
|
+
try proc.run()
|
|
212
|
+
} catch {
|
|
213
|
+
DiagnosticLog.shared.warn("HandsOff: failed to start worker — \(error)")
|
|
214
|
+
return
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
workerProcess = proc
|
|
218
|
+
workerStdin = inPipe.fileHandleForWriting
|
|
219
|
+
|
|
220
|
+
// Read stdout for responses
|
|
221
|
+
outPipe.fileHandleForReading.readabilityHandler = { [weak self] handle in
|
|
222
|
+
let data = handle.availableData
|
|
223
|
+
guard !data.isEmpty, let str = String(data: data, encoding: .utf8) else { return }
|
|
224
|
+
self?.handleWorkerOutput(str)
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Log stderr
|
|
228
|
+
errPipe.fileHandleForReading.readabilityHandler = { handle in
|
|
229
|
+
let data = handle.availableData
|
|
230
|
+
guard !data.isEmpty, let str = String(data: data, encoding: .utf8) else { return }
|
|
231
|
+
for line in str.components(separatedBy: "\n") where !line.isEmpty {
|
|
232
|
+
DiagnosticLog.shared.info("HandsOff worker: \(line)")
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Handle worker crash → restart
|
|
237
|
+
proc.terminationHandler = { [weak self] proc in
|
|
238
|
+
DiagnosticLog.shared.warn("HandsOff: worker exited (code \(proc.terminationStatus)), restarting in 2s")
|
|
239
|
+
self?.workerProcess = nil
|
|
240
|
+
self?.workerStdin = nil
|
|
241
|
+
DispatchQueue.main.asyncAfter(deadline: .now() + 2) {
|
|
242
|
+
self?.startWorker()
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Ping to verify
|
|
247
|
+
sendToWorker(["cmd": "ping"])
|
|
248
|
+
DiagnosticLog.shared.info("HandsOff: worker started (pid \(proc.processIdentifier))")
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// MARK: - Worker communication
|
|
252
|
+
|
|
253
|
+
private var pendingCallback: (([String: Any]) -> Void)?
|
|
254
|
+
|
|
255
|
+
private func sendToWorker(_ dict: [String: Any]) {
|
|
256
|
+
guard let data = try? JSONSerialization.data(withJSONObject: dict),
|
|
257
|
+
var str = String(data: data, encoding: .utf8) else { return }
|
|
258
|
+
str += "\n"
|
|
259
|
+
workerQueue.async { [weak self] in
|
|
260
|
+
self?.workerStdin?.write(str.data(using: .utf8)!)
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
private func sendToWorkerWithCallback(_ dict: [String: Any], callback: @escaping ([String: Any]) -> Void) {
|
|
265
|
+
pendingCallback = callback
|
|
266
|
+
sendToWorker(dict)
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
private func handleWorkerOutput(_ str: String) {
|
|
270
|
+
workerBuffer += str
|
|
271
|
+
let lines = workerBuffer.components(separatedBy: "\n")
|
|
272
|
+
workerBuffer = lines.last ?? ""
|
|
273
|
+
|
|
274
|
+
for line in lines.dropLast() {
|
|
275
|
+
let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
276
|
+
guard !trimmed.isEmpty,
|
|
277
|
+
let data = trimmed.data(using: .utf8),
|
|
278
|
+
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any]
|
|
279
|
+
else { continue }
|
|
280
|
+
|
|
281
|
+
DiagnosticLog.shared.info("HandsOff: worker response → \(trimmed)")
|
|
282
|
+
|
|
283
|
+
// Parse everything on the background thread, then do ONE main-queue dispatch
|
|
284
|
+
// to update all @Published properties atomically. Scattered dispatches cause
|
|
285
|
+
// Combine deadlocks (os_unfair_lock contention with SwiftUI rendering).
|
|
286
|
+
let dataObj = json["data"] as? [String: Any]
|
|
287
|
+
let spoken = dataObj?["spoken"] as? String
|
|
288
|
+
let actions = dataObj?["actions"] as? [[String: Any]]
|
|
289
|
+
let cb = pendingCallback
|
|
290
|
+
pendingCallback = nil
|
|
291
|
+
|
|
292
|
+
// Build chat entries off-main
|
|
293
|
+
var chatEntries: [(VoiceChatEntry.Role, String)] = []
|
|
294
|
+
if let spoken { chatEntries.append((.assistant, spoken)) }
|
|
295
|
+
if let actions, !actions.isEmpty {
|
|
296
|
+
let summaries = actions.compactMap { action -> String? in
|
|
297
|
+
guard let intent = action["intent"] as? String else { return nil }
|
|
298
|
+
let slots = action["slots"] as? [String: Any] ?? [:]
|
|
299
|
+
let target = slots["app"] as? String ?? slots["query"] as? String ?? ""
|
|
300
|
+
let pos = slots["position"] as? String ?? ""
|
|
301
|
+
return [intent, target, pos].filter { !$0.isEmpty }.joined(separator: " ")
|
|
302
|
+
}
|
|
303
|
+
if !summaries.isEmpty {
|
|
304
|
+
chatEntries.append((.system, summaries.joined(separator: ", ")))
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Single dispatch — all @Published mutations in one block
|
|
309
|
+
DispatchQueue.main.async { [weak self] in
|
|
310
|
+
guard let self else { return }
|
|
311
|
+
if let spoken { self.lastResponse = spoken }
|
|
312
|
+
for (role, text) in chatEntries {
|
|
313
|
+
self.chatLog.append(VoiceChatEntry(timestamp: Date(), role: role, text: text, detail: nil))
|
|
314
|
+
}
|
|
315
|
+
if self.chatLog.count > self.maxChatEntries {
|
|
316
|
+
self.chatLog.removeFirst(self.chatLog.count - self.maxChatEntries)
|
|
317
|
+
}
|
|
318
|
+
if let actions, !actions.isEmpty {
|
|
319
|
+
self.recentActions = actions
|
|
320
|
+
self.executeActions(actions)
|
|
321
|
+
}
|
|
322
|
+
self.state = .idle
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
cb?(json)
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// MARK: - Toggle
|
|
330
|
+
|
|
331
|
+
func toggle() {
|
|
332
|
+
switch state {
|
|
333
|
+
case .idle:
|
|
334
|
+
beginListening()
|
|
335
|
+
case .listening:
|
|
336
|
+
finishListening()
|
|
337
|
+
case .thinking:
|
|
338
|
+
DiagnosticLog.shared.info("HandsOff: busy, ignoring toggle")
|
|
339
|
+
case .connecting:
|
|
340
|
+
cancel()
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
func cancel() {
|
|
345
|
+
state = .idle
|
|
346
|
+
DiagnosticLog.shared.info("HandsOff: cancelled")
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// MARK: - Voice capture
|
|
350
|
+
|
|
351
|
+
private func beginListening() {
|
|
352
|
+
let client = VoxClient.shared
|
|
353
|
+
|
|
354
|
+
if client.connectionState != .connected {
|
|
355
|
+
state = .connecting
|
|
356
|
+
client.connect()
|
|
357
|
+
DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [weak self] in
|
|
358
|
+
self?.retryListenIfConnected(attempts: 5)
|
|
359
|
+
}
|
|
360
|
+
return
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
startDictation()
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
private func retryListenIfConnected(attempts: Int) {
|
|
367
|
+
if VoxClient.shared.connectionState == .connected {
|
|
368
|
+
startDictation()
|
|
369
|
+
} else if attempts > 0 {
|
|
370
|
+
DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [weak self] in
|
|
371
|
+
self?.retryListenIfConnected(attempts: attempts - 1)
|
|
372
|
+
}
|
|
373
|
+
} else {
|
|
374
|
+
state = .idle
|
|
375
|
+
DiagnosticLog.shared.warn("HandsOff: Vox not available")
|
|
376
|
+
playSound("Basso")
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
private func startDictation() {
|
|
381
|
+
state = .listening
|
|
382
|
+
lastTranscript = nil
|
|
383
|
+
playSound("Tink")
|
|
384
|
+
|
|
385
|
+
DiagnosticLog.shared.info("HandsOff: listening...")
|
|
386
|
+
|
|
387
|
+
// Vox live session: startSession opens the mic, events flow on the start call ID.
|
|
388
|
+
// No partial transcripts — Vox transcribes after recording stops.
|
|
389
|
+
VoxClient.shared.startSession(
|
|
390
|
+
onProgress: { [weak self] event, data in
|
|
391
|
+
DispatchQueue.main.async {
|
|
392
|
+
if event == "session.state" {
|
|
393
|
+
let sessionState = data["state"] as? String ?? ""
|
|
394
|
+
DiagnosticLog.shared.info("HandsOff: session → \(sessionState)")
|
|
395
|
+
}
|
|
396
|
+
if event == "session.final", let text = data["text"] as? String {
|
|
397
|
+
self?.lastTranscript = text
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
},
|
|
401
|
+
completion: { [weak self] result in
|
|
402
|
+
DispatchQueue.main.async {
|
|
403
|
+
guard let self else { return }
|
|
404
|
+
switch result {
|
|
405
|
+
case .success(let data):
|
|
406
|
+
let text = data["text"] as? String ?? ""
|
|
407
|
+
if text.isEmpty {
|
|
408
|
+
self.state = .idle
|
|
409
|
+
DiagnosticLog.shared.info("HandsOff: no speech detected")
|
|
410
|
+
} else {
|
|
411
|
+
self.lastTranscript = text
|
|
412
|
+
DiagnosticLog.shared.info("HandsOff: heard → '\(text)'")
|
|
413
|
+
self.appendChat(.user, text: text)
|
|
414
|
+
self.processTurn(text)
|
|
415
|
+
}
|
|
416
|
+
case .failure(let error):
|
|
417
|
+
self.state = .idle
|
|
418
|
+
DiagnosticLog.shared.warn("HandsOff: session error — \(error.localizedDescription)")
|
|
419
|
+
self.playSound("Basso")
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
)
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
func finishListening() {
|
|
427
|
+
guard state == .listening else { return }
|
|
428
|
+
playSound("Tink")
|
|
429
|
+
VoxClient.shared.stopSession()
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// MARK: - Turn processing (delegates to worker)
|
|
433
|
+
|
|
434
|
+
private func processTurn(_ transcript: String) {
|
|
435
|
+
state = .thinking
|
|
436
|
+
turnCount += 1
|
|
437
|
+
|
|
438
|
+
let turnStart = Date()
|
|
439
|
+
DiagnosticLog.shared.info("HandsOff: ⏱ turn \(turnCount) — '\(transcript)'")
|
|
440
|
+
|
|
441
|
+
// Build snapshot
|
|
442
|
+
let snapshot = buildSnapshot()
|
|
443
|
+
|
|
444
|
+
// Send turn to worker — it handles ack, inference, TTS, everything in parallel
|
|
445
|
+
let turnCmd: [String: Any] = [
|
|
446
|
+
"cmd": "turn",
|
|
447
|
+
"transcript": transcript,
|
|
448
|
+
"snapshot": snapshot,
|
|
449
|
+
"history": conversationHistory,
|
|
450
|
+
]
|
|
451
|
+
|
|
452
|
+
sendToWorkerWithCallback(turnCmd) { [weak self] response in
|
|
453
|
+
guard let self else { return }
|
|
454
|
+
|
|
455
|
+
let turnMs = Int(Date().timeIntervalSince(turnStart) * 1000)
|
|
456
|
+
DiagnosticLog.shared.info("HandsOff: ⏱ turn \(self.turnCount) complete — \(turnMs)ms")
|
|
457
|
+
|
|
458
|
+
// Log full turn to JSONL
|
|
459
|
+
self.logTurn(transcript: transcript, response: response, turnMs: turnMs)
|
|
460
|
+
|
|
461
|
+
// Record history
|
|
462
|
+
if let data = response["data"] as? [String: Any] {
|
|
463
|
+
let responseStr = (try? String(data: JSONSerialization.data(withJSONObject: data), encoding: .utf8)) ?? ""
|
|
464
|
+
self.conversationHistory.append(["role": "user", "content": transcript])
|
|
465
|
+
self.conversationHistory.append(["role": "assistant", "content": responseStr])
|
|
466
|
+
if self.conversationHistory.count > self.maxHistoryTurns * 2 {
|
|
467
|
+
self.conversationHistory = Array(self.conversationHistory.suffix(self.maxHistoryTurns * 2))
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// MARK: - Desktop snapshot (full context — all windows, all screens)
|
|
474
|
+
|
|
475
|
+
private func buildSnapshot() -> [String: Any] {
|
|
476
|
+
let allWindows = DesktopModel.shared.allWindows()
|
|
477
|
+
let smEnabled = UserDefaults(suiteName: "com.apple.WindowManager")?.bool(forKey: "GloballyEnabled") ?? false
|
|
478
|
+
let grouping = UserDefaults(suiteName: "com.apple.WindowManager")?.integer(forKey: "AppWindowGroupingBehavior") ?? 0
|
|
479
|
+
|
|
480
|
+
// All windows — no filtering. Order is front-to-back (Z-order).
|
|
481
|
+
let windowList: [[String: Any]] = allWindows.enumerated().map { (zIndex, w) in
|
|
482
|
+
var entry: [String: Any] = [
|
|
483
|
+
"wid": w.wid,
|
|
484
|
+
"app": w.app,
|
|
485
|
+
"title": w.title,
|
|
486
|
+
"frame": "\(Int(w.frame.x)),\(Int(w.frame.y)) \(Int(w.frame.w))x\(Int(w.frame.h))",
|
|
487
|
+
"onScreen": w.isOnScreen,
|
|
488
|
+
"zIndex": zIndex, // 0 = frontmost
|
|
489
|
+
]
|
|
490
|
+
if let session = w.latticesSession {
|
|
491
|
+
entry["session"] = session
|
|
492
|
+
}
|
|
493
|
+
if !w.spaceIds.isEmpty {
|
|
494
|
+
entry["spaces"] = w.spaceIds
|
|
495
|
+
}
|
|
496
|
+
return entry
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// All screens
|
|
500
|
+
let screens: [[String: Any]] = NSScreen.screens.enumerated().map { (i, s) in
|
|
501
|
+
[
|
|
502
|
+
"index": i + 1,
|
|
503
|
+
"width": Int(s.frame.width),
|
|
504
|
+
"height": Int(s.frame.height),
|
|
505
|
+
"isMain": s == NSScreen.main,
|
|
506
|
+
"visibleWidth": Int(s.visibleFrame.width),
|
|
507
|
+
"visibleHeight": Int(s.visibleFrame.height),
|
|
508
|
+
]
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
// Layers
|
|
512
|
+
var layerInfo: [String: Any]?
|
|
513
|
+
let layerStore = SessionLayerStore.shared
|
|
514
|
+
if layerStore.activeIndex >= 0 && layerStore.activeIndex < layerStore.layers.count {
|
|
515
|
+
let current = layerStore.layers[layerStore.activeIndex]
|
|
516
|
+
layerInfo = ["name": current.name, "index": layerStore.activeIndex]
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
// Terminal enrichment — cwd, running commands, claude, tmux sessions
|
|
520
|
+
let terminals = ProcessModel.shared.synthesizeTerminals()
|
|
521
|
+
let terminalList: [[String: Any]] = terminals.compactMap { inst in
|
|
522
|
+
var entry: [String: Any] = [
|
|
523
|
+
"tty": inst.tty,
|
|
524
|
+
"hasClaude": inst.hasClaude,
|
|
525
|
+
"displayName": inst.displayName,
|
|
526
|
+
"isActiveTab": inst.isActiveTab,
|
|
527
|
+
]
|
|
528
|
+
if let cwd = inst.cwd { entry["cwd"] = cwd }
|
|
529
|
+
if let app = inst.app { entry["app"] = app.rawValue }
|
|
530
|
+
if let session = inst.tmuxSession { entry["tmuxSession"] = session }
|
|
531
|
+
if let wid = inst.windowId { entry["windowId"] = Int(wid) }
|
|
532
|
+
if let title = inst.tabTitle { entry["tabTitle"] = title }
|
|
533
|
+
// Top running command (most useful for context)
|
|
534
|
+
let userProcesses = inst.processes.filter {
|
|
535
|
+
!["zsh", "bash", "fish", "login", "-zsh", "-bash"].contains($0.comm)
|
|
536
|
+
}
|
|
537
|
+
if !userProcesses.isEmpty {
|
|
538
|
+
entry["runningCommands"] = userProcesses.map { proc in
|
|
539
|
+
var cmd: [String: Any] = ["command": proc.comm]
|
|
540
|
+
if let cwd = proc.cwd { cmd["cwd"] = cwd }
|
|
541
|
+
return cmd
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
return entry
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
// Tmux sessions
|
|
548
|
+
let tmuxSessions = TmuxModel.shared.sessions
|
|
549
|
+
let tmuxList: [[String: Any]] = tmuxSessions.map { s in
|
|
550
|
+
[
|
|
551
|
+
"name": s.name,
|
|
552
|
+
"windowCount": s.windowCount,
|
|
553
|
+
"attached": s.attached,
|
|
554
|
+
]
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
var snapshot: [String: Any] = [
|
|
558
|
+
"stageManager": smEnabled,
|
|
559
|
+
"smGrouping": grouping == 0 ? "all-at-once" : "one-at-a-time",
|
|
560
|
+
"windows": windowList,
|
|
561
|
+
"terminals": terminalList,
|
|
562
|
+
"screens": screens,
|
|
563
|
+
"windowCount": allWindows.count,
|
|
564
|
+
"onScreenCount": allWindows.filter(\.isOnScreen).count,
|
|
565
|
+
]
|
|
566
|
+
if !tmuxList.isEmpty { snapshot["tmuxSessions"] = tmuxList }
|
|
567
|
+
if let layerInfo { snapshot["currentLayer"] = layerInfo }
|
|
568
|
+
|
|
569
|
+
return snapshot
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
// MARK: - Action execution
|
|
573
|
+
|
|
574
|
+
/// Hard cap on simultaneous actions. Rearranging 20+ windows is never right.
|
|
575
|
+
/// distribute is exempt because it's a single intent that handles all windows safely.
|
|
576
|
+
private static let maxActions = 6
|
|
577
|
+
|
|
578
|
+
private func executeActions(_ actions: [[String: Any]]) {
|
|
579
|
+
// Snapshot frames of all windows about to be moved (for undo)
|
|
580
|
+
let movingWids: [UInt32] = actions.compactMap { action in
|
|
581
|
+
let intent = action["intent"] as? String ?? ""
|
|
582
|
+
guard ["tile_window", "swap", "distribute", "move_to_display"].contains(intent) else { return nil }
|
|
583
|
+
let slots = action["slots"] as? [String: Any] ?? [:]
|
|
584
|
+
return (slots["wid"] as? NSNumber)?.uint32Value
|
|
585
|
+
?? (slots["wid_a"] as? NSNumber)?.uint32Value
|
|
586
|
+
}
|
|
587
|
+
// Also grab wid_b from swap actions
|
|
588
|
+
let swapBWids: [UInt32] = actions.compactMap { action in
|
|
589
|
+
let slots = action["slots"] as? [String: Any] ?? [:]
|
|
590
|
+
return (slots["wid_b"] as? NSNumber)?.uint32Value
|
|
591
|
+
}
|
|
592
|
+
snapshotFrames(wids: movingWids + swapBWids)
|
|
593
|
+
|
|
594
|
+
// Guard: refuse to execute bulk operations that would be disorienting
|
|
595
|
+
let nonDistributeActions = actions.filter { ($0["intent"] as? String) != "distribute" }
|
|
596
|
+
if nonDistributeActions.count > Self.maxActions {
|
|
597
|
+
DiagnosticLog.shared.warn(
|
|
598
|
+
"HandsOff: BLOCKED — \(nonDistributeActions.count) actions exceeds limit of \(Self.maxActions). " +
|
|
599
|
+
"Skipping execution to avoid disorienting window rearrangement."
|
|
600
|
+
)
|
|
601
|
+
return
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
// Smart distribution: when multiple tile_window actions target the same
|
|
605
|
+
// position, subdivide that region instead of stacking windows on top of each other.
|
|
606
|
+
let distributed = distributeTileActions(actions)
|
|
607
|
+
|
|
608
|
+
for action in distributed {
|
|
609
|
+
guard let intent = action["intent"] as? String else { continue }
|
|
610
|
+
let slots = action["slots"] as? [String: Any] ?? [:]
|
|
611
|
+
|
|
612
|
+
let jsonSlots = slots.reduce(into: [String: JSON]()) { dict, pair in
|
|
613
|
+
if let s = pair.value as? String {
|
|
614
|
+
dict[pair.key] = .string(s)
|
|
615
|
+
} else if let n = pair.value as? Int {
|
|
616
|
+
dict[pair.key] = .int(n)
|
|
617
|
+
} else if let b = pair.value as? Bool {
|
|
618
|
+
dict[pair.key] = .bool(b)
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
let match = IntentMatch(
|
|
623
|
+
intentName: intent,
|
|
624
|
+
slots: jsonSlots,
|
|
625
|
+
confidence: 0.95,
|
|
626
|
+
matchedPhrase: "hands-off"
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
do {
|
|
630
|
+
_ = try PhraseMatcher.shared.execute(match)
|
|
631
|
+
DiagnosticLog.shared.success("HandsOff: \(intent) executed")
|
|
632
|
+
} catch {
|
|
633
|
+
DiagnosticLog.shared.warn("HandsOff: \(intent) failed — \(error.localizedDescription)")
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
/// When multiple tile_window actions target the same position, distribute them
|
|
639
|
+
/// within that region. E.g., 3 windows → "left" becomes top-left, left, bottom-left.
|
|
640
|
+
private func distributeTileActions(_ actions: [[String: Any]]) -> [[String: Any]] {
|
|
641
|
+
// Group tile_window actions by position
|
|
642
|
+
var tileGroups: [String: [[String: Any]]] = [:]
|
|
643
|
+
var otherActions: [[String: Any]] = []
|
|
644
|
+
|
|
645
|
+
for action in actions {
|
|
646
|
+
let intent = action["intent"] as? String ?? ""
|
|
647
|
+
if intent == "tile_window",
|
|
648
|
+
let slots = action["slots"] as? [String: Any],
|
|
649
|
+
let position = slots["position"] as? String {
|
|
650
|
+
tileGroups[position, default: []].append(action)
|
|
651
|
+
} else {
|
|
652
|
+
otherActions.append(action)
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
var result = otherActions
|
|
657
|
+
|
|
658
|
+
for (position, group) in tileGroups {
|
|
659
|
+
if group.count == 1 {
|
|
660
|
+
// Single window — keep as-is
|
|
661
|
+
result.append(group[0])
|
|
662
|
+
} else {
|
|
663
|
+
// Multiple windows targeting the same position — subdivide
|
|
664
|
+
let subPositions = subdividePosition(position, count: group.count)
|
|
665
|
+
for (i, action) in group.enumerated() {
|
|
666
|
+
var modified = action
|
|
667
|
+
var slots = (action["slots"] as? [String: Any]) ?? [:]
|
|
668
|
+
slots["position"] = subPositions[i]
|
|
669
|
+
modified["slots"] = slots
|
|
670
|
+
result.append(modified)
|
|
671
|
+
DiagnosticLog.shared.info("HandsOff: distributed \(position) → \(subPositions[i]) for window \(slots["wid"] ?? "?")")
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
return result
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
/// Subdivide a tile position for N windows.
|
|
680
|
+
private func subdividePosition(_ position: String, count: Int) -> [String] {
|
|
681
|
+
// 2-3 windows in a half → vertical stack
|
|
682
|
+
let verticalSubs: [String: [String]] = [
|
|
683
|
+
"left": ["top-left", "bottom-left"],
|
|
684
|
+
"right": ["top-right", "bottom-right"],
|
|
685
|
+
]
|
|
686
|
+
// 4+ windows in a half → 2×2 grid using the eighths
|
|
687
|
+
let gridSubs: [String: [String]] = [
|
|
688
|
+
"left": ["top-first-fourth", "top-second-fourth", "bottom-first-fourth", "bottom-second-fourth"],
|
|
689
|
+
"right": ["top-third-fourth", "top-last-fourth", "bottom-third-fourth", "bottom-last-fourth"],
|
|
690
|
+
]
|
|
691
|
+
// Horizontal stacking within a half
|
|
692
|
+
let horizontalSubs: [String: [String]] = [
|
|
693
|
+
"top": ["top-left", "top-right"],
|
|
694
|
+
"bottom": ["bottom-left", "bottom-right"],
|
|
695
|
+
]
|
|
696
|
+
// 4+ windows horizontal → use fourths
|
|
697
|
+
let horizontalGridSubs: [String: [String]] = [
|
|
698
|
+
"top": ["top-first-fourth", "top-second-fourth", "top-third-fourth", "top-last-fourth"],
|
|
699
|
+
"bottom": ["bottom-first-fourth", "bottom-second-fourth", "bottom-third-fourth", "bottom-last-fourth"],
|
|
700
|
+
]
|
|
701
|
+
// Full screen → grid
|
|
702
|
+
let fullSubs = ["top-left", "top-right", "bottom-left", "bottom-right", "left", "right"]
|
|
703
|
+
|
|
704
|
+
let subs: [String]
|
|
705
|
+
if count >= 4, let g = gridSubs[position] {
|
|
706
|
+
subs = g
|
|
707
|
+
} else if let v = verticalSubs[position] {
|
|
708
|
+
subs = v
|
|
709
|
+
} else if count >= 4, let hg = horizontalGridSubs[position] {
|
|
710
|
+
subs = hg
|
|
711
|
+
} else if let h = horizontalSubs[position] {
|
|
712
|
+
subs = h
|
|
713
|
+
} else if position == "maximize" || position == "center" {
|
|
714
|
+
subs = fullSubs
|
|
715
|
+
} else {
|
|
716
|
+
// Can't subdivide further — just repeat the position
|
|
717
|
+
return Array(repeating: position, count: count)
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// Distribute windows across available sub-positions
|
|
721
|
+
var result: [String] = []
|
|
722
|
+
for i in 0..<count {
|
|
723
|
+
result.append(subs[i % subs.count])
|
|
724
|
+
}
|
|
725
|
+
return result
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
// MARK: - Sound
|
|
729
|
+
|
|
730
|
+
private func playSound(_ name: NSSound.Name) {
|
|
731
|
+
NSSound(named: name)?.play()
|
|
732
|
+
}
|
|
733
|
+
}
|