@lattices/cli 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/README.md +85 -9
  2. package/app/Info.plist +30 -0
  3. package/app/Lattices.app/Contents/Info.plist +8 -2
  4. package/app/Lattices.app/Contents/MacOS/Lattices +0 -0
  5. package/app/Lattices.app/Contents/Resources/AppIcon.icns +0 -0
  6. package/app/Lattices.app/Contents/Resources/tap.wav +0 -0
  7. package/app/Lattices.app/Contents/_CodeSignature/CodeResources +139 -0
  8. package/app/Lattices.entitlements +15 -0
  9. package/app/Package.swift +8 -1
  10. package/app/Resources/tap.wav +0 -0
  11. package/app/Sources/AdvisorLearningStore.swift +90 -0
  12. package/app/Sources/AgentSession.swift +377 -0
  13. package/app/Sources/AppDelegate.swift +45 -12
  14. package/app/Sources/AppShellView.swift +81 -8
  15. package/app/Sources/AudioProvider.swift +386 -0
  16. package/app/Sources/CheatSheetHUD.swift +261 -19
  17. package/app/Sources/DaemonProtocol.swift +13 -0
  18. package/app/Sources/DaemonServer.swift +8 -0
  19. package/app/Sources/DesktopModel.swift +189 -6
  20. package/app/Sources/DesktopModelTypes.swift +2 -0
  21. package/app/Sources/DiagnosticLog.swift +104 -2
  22. package/app/Sources/EventBus.swift +1 -0
  23. package/app/Sources/HUDBottomBar.swift +279 -0
  24. package/app/Sources/HUDController.swift +1158 -0
  25. package/app/Sources/HUDLeftBar.swift +849 -0
  26. package/app/Sources/HUDMinimap.swift +179 -0
  27. package/app/Sources/HUDRightBar.swift +774 -0
  28. package/app/Sources/HUDState.swift +367 -0
  29. package/app/Sources/HUDTopBar.swift +243 -0
  30. package/app/Sources/HandsOffSession.swift +802 -0
  31. package/app/Sources/HomeDashboardView.swift +125 -0
  32. package/app/Sources/HotkeyManager.swift +2 -0
  33. package/app/Sources/HotkeyStore.swift +49 -9
  34. package/app/Sources/IntentEngine.swift +962 -0
  35. package/app/Sources/Intents/CreateLayerIntent.swift +54 -0
  36. package/app/Sources/Intents/DistributeIntent.swift +56 -0
  37. package/app/Sources/Intents/FocusIntent.swift +69 -0
  38. package/app/Sources/Intents/HelpIntent.swift +41 -0
  39. package/app/Sources/Intents/KillIntent.swift +47 -0
  40. package/app/Sources/Intents/LatticeIntent.swift +78 -0
  41. package/app/Sources/Intents/LaunchIntent.swift +67 -0
  42. package/app/Sources/Intents/ListSessionsIntent.swift +32 -0
  43. package/app/Sources/Intents/ListWindowsIntent.swift +30 -0
  44. package/app/Sources/Intents/ScanIntent.swift +52 -0
  45. package/app/Sources/Intents/SearchIntent.swift +190 -0
  46. package/app/Sources/Intents/SwitchLayerIntent.swift +50 -0
  47. package/app/Sources/Intents/TileIntent.swift +61 -0
  48. package/app/Sources/LatticesApi.swift +1275 -30
  49. package/app/Sources/LauncherHUD.swift +348 -0
  50. package/app/Sources/MainView.swift +147 -44
  51. package/app/Sources/MouseFinder.swift +222 -0
  52. package/app/Sources/OcrModel.swift +34 -1
  53. package/app/Sources/OmniSearchState.swift +99 -102
  54. package/app/Sources/OnboardingView.swift +457 -0
  55. package/app/Sources/PermissionChecker.swift +2 -12
  56. package/app/Sources/PiChatDock.swift +454 -0
  57. package/app/Sources/PiChatSession.swift +815 -0
  58. package/app/Sources/PiWorkspaceView.swift +364 -0
  59. package/app/Sources/PlacementSpec.swift +195 -0
  60. package/app/Sources/Preferences.swift +59 -0
  61. package/app/Sources/ProjectScanner.swift +58 -45
  62. package/app/Sources/ScreenMapState.swift +701 -55
  63. package/app/Sources/ScreenMapView.swift +843 -103
  64. package/app/Sources/ScreenMapWindowController.swift +22 -0
  65. package/app/Sources/SessionLayerStore.swift +285 -0
  66. package/app/Sources/SessionManager.swift +4 -1
  67. package/app/Sources/SettingsView.swift +186 -3
  68. package/app/Sources/Theme.swift +9 -8
  69. package/app/Sources/TmuxModel.swift +7 -0
  70. package/app/Sources/TmuxQuery.swift +27 -3
  71. package/app/Sources/VoiceChatView.swift +192 -0
  72. package/app/Sources/VoiceCommandWindow.swift +1594 -0
  73. package/app/Sources/VoiceIntentResolver.swift +671 -0
  74. package/app/Sources/VoxClient.swift +454 -0
  75. package/app/Sources/WindowTiler.swift +348 -87
  76. package/app/Sources/WorkspaceManager.swift +127 -18
  77. package/app/Tests/StageDragTests.swift +333 -0
  78. package/app/Tests/StageJoinTests.swift +313 -0
  79. package/app/Tests/StageManagerTests.swift +280 -0
  80. package/app/Tests/StageTileTests.swift +353 -0
  81. package/assets/AppIcon.icns +0 -0
  82. package/bin/client.ts +16 -0
  83. package/bin/{daemon-client.js → daemon-client.ts} +49 -30
  84. package/bin/handsoff-infer.ts +280 -0
  85. package/bin/handsoff-worker.ts +740 -0
  86. package/bin/lattices-app.ts +338 -0
  87. package/bin/lattices-dev +208 -0
  88. package/bin/{lattices.js → lattices.ts} +777 -140
  89. package/bin/project-twin.ts +645 -0
  90. package/docs/agent-execution-plan.md +562 -0
  91. package/docs/agent-layer-guide.md +207 -0
  92. package/docs/agents.md +142 -0
  93. package/docs/api.md +153 -34
  94. package/docs/app.md +29 -1
  95. package/docs/config.md +5 -1
  96. package/docs/handsoff-test-scenarios.md +84 -0
  97. package/docs/layers.md +20 -20
  98. package/docs/ocr.md +14 -5
  99. package/docs/overview.md +5 -1
  100. package/docs/presentation-execution-review.md +491 -0
  101. package/docs/prompts/hands-off-system.md +374 -0
  102. package/docs/prompts/hands-off-turn.md +30 -0
  103. package/docs/prompts/voice-advisor.md +31 -0
  104. package/docs/prompts/voice-fallback.md +23 -0
  105. package/docs/tiling-reference.md +167 -0
  106. package/docs/twins.md +138 -0
  107. package/docs/voice-command-protocol.md +278 -0
  108. package/docs/voice.md +219 -0
  109. package/package.json +29 -11
  110. package/bin/client.js +0 -4
  111. package/bin/lattices-app.js +0 -221
@@ -0,0 +1,802 @@
1
+ import AppKit
2
+
3
+ /// Hands-off voice mode: hotkey → listen → worker handles everything.
4
+ ///
5
+ /// Architecture:
6
+ /// - Swift owns: hotkey, Vox dictation, action execution
7
+ /// - Worker owns: inference (Groq), TTS (streaming OpenAI), fast path matching, audio caching
8
+ /// - Worker is a long-running bun process, started once, communicates via JSON lines over stdio
9
+ ///
10
+ /// The worker handles the full turn orchestration in parallel:
11
+ /// - Fast path: local match → cached ack + execute + cached confirm (~300ms)
12
+ /// - Slow path: cached ack ∥ Groq inference → streaming TTS ∥ execute (~2s)
13
+
14
+ // MARK: - Chat Log Entry
15
+
16
+ struct VoiceChatEntry: Identifiable, Equatable {
17
+ let id = UUID()
18
+ let timestamp: Date
19
+ let role: Role
20
+ let text: String
21
+ /// Optional structured data — actions taken, search results, etc.
22
+ /// Displayable in the chat log but not spoken.
23
+ let detail: String?
24
+
25
+ enum Role: String, Equatable {
26
+ case user // what the user said
27
+ case assistant // spoken response
28
+ case system // silent info (actions executed, search results, etc.)
29
+ }
30
+
31
+ static func == (lhs: VoiceChatEntry, rhs: VoiceChatEntry) -> Bool {
32
+ lhs.id == rhs.id
33
+ }
34
+ }
35
+
36
+ final class HandsOffSession: ObservableObject {
37
+ static let shared = HandsOffSession()
38
+
39
+ enum State: Equatable {
40
+ case idle
41
+ case connecting
42
+ case listening
43
+ case thinking
44
+ }
45
+
46
+ @Published var state: State = .idle
47
+ @Published var lastTranscript: String?
48
+ @Published var lastResponse: String?
49
+ @Published var audibleFeedbackEnabled: Bool = false
50
+
51
+ /// Recently executed actions — shown as playback in the HUD bottom bar
52
+ @Published var recentActions: [[String: Any]] = []
53
+
54
+ /// Frame history for undo — stores pre-move frames of windows touched by the last turn
55
+ struct FrameSnapshot {
56
+ let wid: UInt32
57
+ let pid: Int32
58
+ let frame: WindowFrame
59
+ }
60
+ private(set) var frameHistory: [FrameSnapshot] = []
61
+
62
+ /// Snapshot current frames for all windows that are about to be moved.
63
+ /// Stores frames in CG/AX coordinates (top-left origin) for direct use with batchRestoreWindows.
64
+ func snapshotFrames(wids: [UInt32]) {
65
+ frameHistory.removeAll()
66
+ guard let windowList = CGWindowListCopyWindowInfo([.optionAll, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else { return }
67
+ for wid in wids {
68
+ guard let entry = DesktopModel.shared.windows[wid] else { continue }
69
+ for info in windowList {
70
+ guard let num = info[kCGWindowNumber as String] as? UInt32, num == wid,
71
+ let dict = info[kCGWindowBounds as String] as? NSDictionary else { continue }
72
+ var rect = CGRect.zero
73
+ if CGRectMakeWithDictionaryRepresentation(dict, &rect) {
74
+ let frame = WindowFrame(x: rect.origin.x, y: rect.origin.y, w: rect.width, h: rect.height)
75
+ frameHistory.append(FrameSnapshot(wid: wid, pid: entry.pid, frame: frame))
76
+ }
77
+ break
78
+ }
79
+ }
80
+ }
81
+
82
+ func clearFrameHistory() {
83
+ frameHistory.removeAll()
84
+ }
85
+
86
+ /// Running chat log — visible in the voice chat panel. Persists across turns.
87
+ @Published private(set) var chatLog: [VoiceChatEntry] = []
88
+ private let maxChatEntries = 50
89
+
90
+ private var turnCount = 0
91
+ @Published private(set) var conversationHistory: [[String: String]] = []
92
+ private let maxHistoryTurns = 10
93
+
94
+ // Long-running worker process
95
+ private var workerProcess: Process?
96
+ private var workerStdin: FileHandle?
97
+ private var workerBuffer = ""
98
+ private let workerQueue = DispatchQueue(label: "com.lattices.handsoff-worker", qos: .userInitiated)
99
+ private var lastCueAt: Date = .distantPast
100
+
101
+ /// JSONL log for full turn data — ~/.lattices/handsoff.jsonl
102
+ private let turnLogPath = NSHomeDirectory() + "/.lattices/handsoff.jsonl"
103
+
104
+ private init() {}
105
+
106
+ // MARK: - Chat Log
107
+
108
+ func appendChat(_ role: VoiceChatEntry.Role, text: String, detail: String? = nil) {
109
+ let entry = VoiceChatEntry(timestamp: Date(), role: role, text: text, detail: detail)
110
+ DispatchQueue.main.async {
111
+ self.chatLog.append(entry)
112
+ if self.chatLog.count > self.maxChatEntries {
113
+ self.chatLog.removeFirst(self.chatLog.count - self.maxChatEntries)
114
+ }
115
+ }
116
+ }
117
+
118
+ func clearChatLog() {
119
+ DispatchQueue.main.async { self.chatLog.removeAll() }
120
+ }
121
+
122
+ // MARK: - Lifecycle
123
+
124
+ func start() {
125
+ startWorker()
126
+ }
127
+
128
+ func setAudibleFeedbackEnabled(_ enabled: Bool) {
129
+ audibleFeedbackEnabled = enabled
130
+ if enabled {
131
+ startWorker()
132
+ }
133
+ }
134
+
135
+ func playCachedCue(_ phrase: String) {
136
+ guard audibleFeedbackEnabled else { return }
137
+ let now = Date()
138
+ guard now.timeIntervalSince(lastCueAt) >= 0.2 else { return }
139
+ lastCueAt = now
140
+ startWorker()
141
+ sendToWorker(["cmd": "play_cached", "text": phrase])
142
+ }
143
+
144
+ /// Append a full turn record to the JSONL log
145
+ private func logTurn(transcript: String, response: [String: Any], turnMs: Int) {
146
+ let snapshot = buildSnapshot()
147
+ var record: [String: Any] = [
148
+ "ts": ISO8601DateFormatter().string(from: Date()),
149
+ "turn": turnCount,
150
+ "transcript": transcript,
151
+ "turnMs": turnMs,
152
+ "snapshot": snapshot,
153
+ ]
154
+ if let data = response["data"] as? [String: Any] {
155
+ record["actions"] = data["actions"]
156
+ record["spoken"] = data["spoken"]
157
+ record["meta"] = data["_meta"]
158
+ }
159
+
160
+ guard let jsonData = try? JSONSerialization.data(withJSONObject: record),
161
+ var line = String(data: jsonData, encoding: .utf8) else { return }
162
+ line += "\n"
163
+
164
+ if let handle = FileHandle(forWritingAtPath: turnLogPath) {
165
+ handle.seekToEndOfFile()
166
+ handle.write(line.data(using: .utf8)!)
167
+ handle.closeFile()
168
+ } else {
169
+ FileManager.default.createFile(atPath: turnLogPath, contents: line.data(using: .utf8))
170
+ }
171
+ }
172
+
173
+ private func startWorker() {
174
+ if workerProcess?.isRunning == true, workerStdin != nil {
175
+ return
176
+ }
177
+
178
+ let bunPaths = [
179
+ NSHomeDirectory() + "/.bun/bin/bun",
180
+ "/usr/local/bin/bun",
181
+ "/opt/homebrew/bin/bun",
182
+ ]
183
+ guard let bunPath = bunPaths.first(where: { FileManager.default.isExecutableFile(atPath: $0) }) else {
184
+ DiagnosticLog.shared.warn("HandsOff: bun not found, worker disabled")
185
+ return
186
+ }
187
+
188
+ let scriptPath = NSHomeDirectory() + "/dev/lattices/bin/handsoff-worker.ts"
189
+ guard FileManager.default.fileExists(atPath: scriptPath) else {
190
+ DiagnosticLog.shared.warn("HandsOff: worker script not found at \(scriptPath)")
191
+ return
192
+ }
193
+
194
+ let proc = Process()
195
+ proc.executableURL = URL(fileURLWithPath: bunPath)
196
+ proc.arguments = ["run", scriptPath]
197
+ proc.currentDirectoryURL = URL(fileURLWithPath: NSHomeDirectory() + "/dev/lattices")
198
+
199
+ var env = ProcessInfo.processInfo.environment
200
+ env.removeValue(forKey: "CLAUDECODE")
201
+ proc.environment = env
202
+
203
+ let inPipe = Pipe()
204
+ let outPipe = Pipe()
205
+ let errPipe = Pipe()
206
+ proc.standardInput = inPipe
207
+ proc.standardOutput = outPipe
208
+ proc.standardError = errPipe
209
+
210
+ do {
211
+ try proc.run()
212
+ } catch {
213
+ DiagnosticLog.shared.warn("HandsOff: failed to start worker — \(error)")
214
+ return
215
+ }
216
+
217
+ workerProcess = proc
218
+ workerStdin = inPipe.fileHandleForWriting
219
+
220
+ // Read stdout for responses
221
+ outPipe.fileHandleForReading.readabilityHandler = { [weak self] handle in
222
+ let data = handle.availableData
223
+ guard !data.isEmpty, let str = String(data: data, encoding: .utf8) else { return }
224
+ self?.handleWorkerOutput(str)
225
+ }
226
+
227
+ // Log stderr
228
+ errPipe.fileHandleForReading.readabilityHandler = { handle in
229
+ let data = handle.availableData
230
+ guard !data.isEmpty, let str = String(data: data, encoding: .utf8) else { return }
231
+ for line in str.components(separatedBy: "\n") where !line.isEmpty {
232
+ DiagnosticLog.shared.info("HandsOff worker: \(line)")
233
+ }
234
+ }
235
+
236
+ // Handle worker crash → restart
237
+ proc.terminationHandler = { [weak self] proc in
238
+ DiagnosticLog.shared.warn("HandsOff: worker exited (code \(proc.terminationStatus)), restarting in 2s")
239
+ self?.workerProcess = nil
240
+ self?.workerStdin = nil
241
+ DispatchQueue.main.asyncAfter(deadline: .now() + 2) {
242
+ self?.startWorker()
243
+ }
244
+ }
245
+
246
+ // Ping to verify
247
+ sendToWorker(["cmd": "ping"])
248
+ DiagnosticLog.shared.info("HandsOff: worker started (pid \(proc.processIdentifier))")
249
+ }
250
+
251
+ // MARK: - Worker communication
252
+
253
+ private var pendingCallback: (([String: Any]) -> Void)?
254
+ private var turnTimeoutWork: DispatchWorkItem?
255
+ private static let turnTimeoutSeconds: TimeInterval = 30
256
+
257
+ private func sendToWorker(_ dict: [String: Any]) {
258
+ guard let data = try? JSONSerialization.data(withJSONObject: dict),
259
+ var str = String(data: data, encoding: .utf8) else { return }
260
+ str += "\n"
261
+ workerQueue.async { [weak self] in
262
+ self?.workerStdin?.write(str.data(using: .utf8)!)
263
+ }
264
+ }
265
+
266
+ private func sendToWorkerWithCallback(_ dict: [String: Any], callback: @escaping ([String: Any]) -> Void) {
267
+ pendingCallback = callback
268
+ sendToWorker(dict)
269
+ }
270
+
271
+ private func handleWorkerOutput(_ str: String) {
272
+ workerBuffer += str
273
+ let lines = workerBuffer.components(separatedBy: "\n")
274
+ workerBuffer = lines.last ?? ""
275
+
276
+ for line in lines.dropLast() {
277
+ let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines)
278
+ guard !trimmed.isEmpty,
279
+ let data = trimmed.data(using: .utf8),
280
+ let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any]
281
+ else { continue }
282
+
283
+ DiagnosticLog.shared.info("HandsOff: worker response → \(trimmed)")
284
+
285
+ // Parse everything on the background thread, then do ONE main-queue dispatch
286
+ // to update all @Published properties atomically. Scattered dispatches cause
287
+ // Combine deadlocks (os_unfair_lock contention with SwiftUI rendering).
288
+ let dataObj = json["data"] as? [String: Any]
289
+ let spoken = dataObj?["spoken"] as? String
290
+ let actions = dataObj?["actions"] as? [[String: Any]]
291
+ let cb = pendingCallback
292
+ pendingCallback = nil
293
+
294
+ // Build chat entries off-main
295
+ var chatEntries: [(VoiceChatEntry.Role, String)] = []
296
+ if let spoken { chatEntries.append((.assistant, spoken)) }
297
+ if let actions, !actions.isEmpty {
298
+ let summaries = actions.compactMap { action -> String? in
299
+ guard let intent = action["intent"] as? String else { return nil }
300
+ let slots = action["slots"] as? [String: Any] ?? [:]
301
+ let target = slots["app"] as? String ?? slots["query"] as? String ?? ""
302
+ let pos = slots["position"] as? String ?? ""
303
+ return [intent, target, pos].filter { !$0.isEmpty }.joined(separator: " ")
304
+ }
305
+ if !summaries.isEmpty {
306
+ chatEntries.append((.system, summaries.joined(separator: ", ")))
307
+ }
308
+ }
309
+
310
+ // Single dispatch — all @Published mutations in one block
311
+ DispatchQueue.main.async { [weak self] in
312
+ guard let self else { return }
313
+ if let spoken { self.lastResponse = spoken }
314
+ for (role, text) in chatEntries {
315
+ self.chatLog.append(VoiceChatEntry(timestamp: Date(), role: role, text: text, detail: nil))
316
+ }
317
+ if self.chatLog.count > self.maxChatEntries {
318
+ self.chatLog.removeFirst(self.chatLog.count - self.maxChatEntries)
319
+ }
320
+ if let actions, !actions.isEmpty {
321
+ self.recentActions = actions
322
+ self.executeActions(actions)
323
+ }
324
+ self.state = .idle
325
+ }
326
+
327
+ cb?(json)
328
+ }
329
+ }
330
+
331
+ // MARK: - Toggle
332
+
333
+ func toggle() {
334
+ switch state {
335
+ case .idle:
336
+ beginListening()
337
+ case .listening:
338
+ finishListening()
339
+ case .thinking:
340
+ cancelTurn()
341
+ case .connecting:
342
+ cancel()
343
+ }
344
+ }
345
+
346
+ func cancel() {
347
+ cancelVoxSession()
348
+ state = .idle
349
+ DiagnosticLog.shared.info("HandsOff: cancelled")
350
+ }
351
+
352
+ private func cancelTurn() {
353
+ turnTimeoutWork?.cancel()
354
+ turnTimeoutWork = nil
355
+ pendingCallback = nil
356
+ state = .idle
357
+ DiagnosticLog.shared.warn("HandsOff: turn cancelled by user")
358
+ playSound("Funk")
359
+ }
360
+
361
+ /// Cancel any active Vox recording session without transcribing.
362
+ private func cancelVoxSession() {
363
+ guard VoxClient.shared.activeSessionId != nil else { return }
364
+ DiagnosticLog.shared.info("HandsOff: cancelling Vox session")
365
+ VoxClient.shared.cancelSession()
366
+ }
367
+
368
+ // MARK: - Voice capture
369
+
370
+ private func beginListening() {
371
+ let client = VoxClient.shared
372
+
373
+ if client.connectionState != .connected {
374
+ state = .connecting
375
+ client.connect()
376
+ DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [weak self] in
377
+ self?.retryListenIfConnected(attempts: 5)
378
+ }
379
+ return
380
+ }
381
+
382
+ startDictation()
383
+ }
384
+
385
+ private func retryListenIfConnected(attempts: Int) {
386
+ if VoxClient.shared.connectionState == .connected {
387
+ startDictation()
388
+ } else if attempts > 0 {
389
+ DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [weak self] in
390
+ self?.retryListenIfConnected(attempts: attempts - 1)
391
+ }
392
+ } else {
393
+ state = .idle
394
+ DiagnosticLog.shared.warn("HandsOff: Vox not available")
395
+ playSound("Basso")
396
+ }
397
+ }
398
+
399
+ /// Guard against double-processing the transcript (session.final + completion can both deliver it).
400
+ private var turnProcessed = false
401
+
402
+ private func startDictation() {
403
+ state = .listening
404
+ lastTranscript = nil
405
+ turnProcessed = false
406
+ playSound("Tink")
407
+
408
+ DiagnosticLog.shared.info("HandsOff: listening...")
409
+
410
+ // Vox live session: startSession opens the mic, events flow on the start call ID.
411
+ // session.final arrives via onProgress, then the same data arrives via completion.
412
+ // We process the transcript from whichever arrives first to be resilient against
413
+ // connection drops between the two.
414
+ VoxClient.shared.startSession(
415
+ onProgress: { [weak self] event, data in
416
+ DispatchQueue.main.async {
417
+ guard let self else { return }
418
+ switch event {
419
+ case "session.state":
420
+ let sessionState = data["state"] as? String ?? ""
421
+ DiagnosticLog.shared.info("HandsOff: session → \(sessionState)")
422
+ // Vox cancelled the session (e.g. recording timeout)
423
+ if sessionState == "cancelled" {
424
+ let reason = data["reason"] as? String ?? "unknown"
425
+ DiagnosticLog.shared.warn("HandsOff: Vox cancelled session — \(reason)")
426
+ if self.state == .listening {
427
+ self.state = .idle
428
+ self.playSound("Basso")
429
+ }
430
+ }
431
+ case "session.final":
432
+ // Primary transcript delivery — process immediately
433
+ if let text = data["text"] as? String, !text.isEmpty {
434
+ self.lastTranscript = text
435
+ self.deliverTranscript(text)
436
+ }
437
+ default:
438
+ break
439
+ }
440
+ }
441
+ },
442
+ completion: { [weak self] result in
443
+ DispatchQueue.main.async {
444
+ guard let self else { return }
445
+ switch result {
446
+ case .success(let data):
447
+ let text = data["text"] as? String ?? ""
448
+ if text.isEmpty {
449
+ if !self.turnProcessed {
450
+ self.state = .idle
451
+ DiagnosticLog.shared.info("HandsOff: no speech detected")
452
+ }
453
+ } else {
454
+ // Fallback — deliver if session.final didn't already
455
+ self.lastTranscript = text
456
+ self.deliverTranscript(text)
457
+ }
458
+ case .failure(let error):
459
+ if !self.turnProcessed {
460
+ self.state = .idle
461
+ DiagnosticLog.shared.warn("HandsOff: session error — \(error.localizedDescription)")
462
+ self.playSound("Basso")
463
+ }
464
+ }
465
+ }
466
+ }
467
+ )
468
+ }
469
+
470
+ /// Deliver transcript exactly once — called from both session.final and completion.
471
+ private func deliverTranscript(_ text: String) {
472
+ guard !turnProcessed else { return }
473
+ turnProcessed = true
474
+ DiagnosticLog.shared.info("HandsOff: heard → '\(text)'")
475
+ appendChat(.user, text: text)
476
+ processTurn(text)
477
+ }
478
+
479
+ func finishListening() {
480
+ guard state == .listening else { return }
481
+ playSound("Tink")
482
+ VoxClient.shared.stopSession()
483
+ }
484
+
485
+ // MARK: - Turn processing (delegates to worker)
486
+
487
+ private func processTurn(_ transcript: String) {
488
+ state = .thinking
489
+ turnCount += 1
490
+
491
+ let turnStart = Date()
492
+ DiagnosticLog.shared.info("HandsOff: ⏱ turn \(turnCount) — '\(transcript)'")
493
+
494
+ // Build snapshot
495
+ let snapshot = buildSnapshot()
496
+
497
+ // Send turn to worker — it handles ack, inference, TTS, everything in parallel
498
+ let turnCmd: [String: Any] = [
499
+ "cmd": "turn",
500
+ "transcript": transcript,
501
+ "snapshot": snapshot,
502
+ "history": conversationHistory,
503
+ ]
504
+
505
+ // Start turn timeout — forcibly reset if worker never responds
506
+ turnTimeoutWork?.cancel()
507
+ let timeout = DispatchWorkItem { [weak self] in
508
+ guard let self, self.state == .thinking else { return }
509
+ DiagnosticLog.shared.warn("HandsOff: ⏱ turn \(self.turnCount) timed out after \(Int(Self.turnTimeoutSeconds))s")
510
+ self.pendingCallback = nil
511
+ self.state = .idle
512
+ self.playSound("Basso")
513
+ }
514
+ turnTimeoutWork = timeout
515
+ DispatchQueue.main.asyncAfter(deadline: .now() + Self.turnTimeoutSeconds, execute: timeout)
516
+
517
+ sendToWorkerWithCallback(turnCmd) { [weak self] response in
518
+ guard let self else { return }
519
+
520
+ // Cancel the timeout — we got a response
521
+ self.turnTimeoutWork?.cancel()
522
+ self.turnTimeoutWork = nil
523
+
524
+ let turnMs = Int(Date().timeIntervalSince(turnStart) * 1000)
525
+ DiagnosticLog.shared.info("HandsOff: ⏱ turn \(self.turnCount) complete — \(turnMs)ms")
526
+
527
+ // Log full turn to JSONL
528
+ self.logTurn(transcript: transcript, response: response, turnMs: turnMs)
529
+
530
+ // Record history
531
+ if let data = response["data"] as? [String: Any] {
532
+ let responseStr = (try? String(data: JSONSerialization.data(withJSONObject: data), encoding: .utf8)) ?? ""
533
+ self.conversationHistory.append(["role": "user", "content": transcript])
534
+ self.conversationHistory.append(["role": "assistant", "content": responseStr])
535
+ if self.conversationHistory.count > self.maxHistoryTurns * 2 {
536
+ self.conversationHistory = Array(self.conversationHistory.suffix(self.maxHistoryTurns * 2))
537
+ }
538
+ }
539
+ }
540
+ }
541
+
542
+ // MARK: - Desktop snapshot (full context — all windows, all screens)
543
+
544
+ private func buildSnapshot() -> [String: Any] {
545
+ let allWindows = DesktopModel.shared.allWindows()
546
+ let smEnabled = UserDefaults(suiteName: "com.apple.WindowManager")?.bool(forKey: "GloballyEnabled") ?? false
547
+ let grouping = UserDefaults(suiteName: "com.apple.WindowManager")?.integer(forKey: "AppWindowGroupingBehavior") ?? 0
548
+
549
+ // All windows — no filtering. Order is front-to-back (Z-order).
550
+ let windowList: [[String: Any]] = allWindows.enumerated().map { (zIndex, w) in
551
+ var entry: [String: Any] = [
552
+ "wid": w.wid,
553
+ "app": w.app,
554
+ "title": w.title,
555
+ "frame": "\(Int(w.frame.x)),\(Int(w.frame.y)) \(Int(w.frame.w))x\(Int(w.frame.h))",
556
+ "onScreen": w.isOnScreen,
557
+ "zIndex": zIndex, // 0 = frontmost
558
+ ]
559
+ if let session = w.latticesSession {
560
+ entry["session"] = session
561
+ }
562
+ if !w.spaceIds.isEmpty {
563
+ entry["spaces"] = w.spaceIds
564
+ }
565
+ return entry
566
+ }
567
+
568
+ // All screens
569
+ let screens: [[String: Any]] = NSScreen.screens.enumerated().map { (i, s) in
570
+ [
571
+ "index": i + 1,
572
+ "width": Int(s.frame.width),
573
+ "height": Int(s.frame.height),
574
+ "isMain": s == NSScreen.main,
575
+ "visibleWidth": Int(s.visibleFrame.width),
576
+ "visibleHeight": Int(s.visibleFrame.height),
577
+ ]
578
+ }
579
+
580
+ // Layers
581
+ var layerInfo: [String: Any]?
582
+ let layerStore = SessionLayerStore.shared
583
+ if layerStore.activeIndex >= 0 && layerStore.activeIndex < layerStore.layers.count {
584
+ let current = layerStore.layers[layerStore.activeIndex]
585
+ layerInfo = ["name": current.name, "index": layerStore.activeIndex]
586
+ }
587
+
588
+ // Terminal enrichment — cwd, running commands, claude, tmux sessions
589
+ let terminals = ProcessModel.shared.synthesizeTerminals()
590
+ let terminalList: [[String: Any]] = terminals.compactMap { inst in
591
+ var entry: [String: Any] = [
592
+ "tty": inst.tty,
593
+ "hasClaude": inst.hasClaude,
594
+ "displayName": inst.displayName,
595
+ "isActiveTab": inst.isActiveTab,
596
+ ]
597
+ if let cwd = inst.cwd { entry["cwd"] = cwd }
598
+ if let app = inst.app { entry["app"] = app.rawValue }
599
+ if let session = inst.tmuxSession { entry["tmuxSession"] = session }
600
+ if let wid = inst.windowId { entry["windowId"] = Int(wid) }
601
+ if let title = inst.tabTitle { entry["tabTitle"] = title }
602
+ // Top running command (most useful for context)
603
+ let userProcesses = inst.processes.filter {
604
+ !["zsh", "bash", "fish", "login", "-zsh", "-bash"].contains($0.comm)
605
+ }
606
+ if !userProcesses.isEmpty {
607
+ entry["runningCommands"] = userProcesses.map { proc in
608
+ var cmd: [String: Any] = ["command": proc.comm]
609
+ if let cwd = proc.cwd { cmd["cwd"] = cwd }
610
+ return cmd
611
+ }
612
+ }
613
+ return entry
614
+ }
615
+
616
+ // Tmux sessions
617
+ let tmuxSessions = TmuxModel.shared.sessions
618
+ let tmuxList: [[String: Any]] = tmuxSessions.map { s in
619
+ [
620
+ "name": s.name,
621
+ "windowCount": s.windowCount,
622
+ "attached": s.attached,
623
+ ]
624
+ }
625
+
626
+ var snapshot: [String: Any] = [
627
+ "stageManager": smEnabled,
628
+ "smGrouping": grouping == 0 ? "all-at-once" : "one-at-a-time",
629
+ "windows": windowList,
630
+ "terminals": terminalList,
631
+ "screens": screens,
632
+ "windowCount": allWindows.count,
633
+ "onScreenCount": allWindows.filter(\.isOnScreen).count,
634
+ ]
635
+ if !tmuxList.isEmpty { snapshot["tmuxSessions"] = tmuxList }
636
+ if let layerInfo { snapshot["currentLayer"] = layerInfo }
637
+
638
+ return snapshot
639
+ }
640
+
641
+ // MARK: - Action execution
642
+
643
+ /// Hard cap on simultaneous actions. Rearranging 20+ windows is never right.
644
+ /// distribute is exempt because it's a single intent that handles all windows safely.
645
+ private static let maxActions = 6
646
+
647
+ private func executeActions(_ actions: [[String: Any]]) {
648
+ // Snapshot frames of all windows about to be moved (for undo)
649
+ let movingWids: [UInt32] = actions.compactMap { action in
650
+ let intent = action["intent"] as? String ?? ""
651
+ guard ["tile_window", "swap", "distribute", "move_to_display"].contains(intent) else { return nil }
652
+ let slots = action["slots"] as? [String: Any] ?? [:]
653
+ return (slots["wid"] as? NSNumber)?.uint32Value
654
+ ?? (slots["wid_a"] as? NSNumber)?.uint32Value
655
+ }
656
+ // Also grab wid_b from swap actions
657
+ let swapBWids: [UInt32] = actions.compactMap { action in
658
+ let slots = action["slots"] as? [String: Any] ?? [:]
659
+ return (slots["wid_b"] as? NSNumber)?.uint32Value
660
+ }
661
+ snapshotFrames(wids: movingWids + swapBWids)
662
+
663
+ // Guard: refuse to execute bulk operations that would be disorienting
664
+ let nonDistributeActions = actions.filter { ($0["intent"] as? String) != "distribute" }
665
+ if nonDistributeActions.count > Self.maxActions {
666
+ DiagnosticLog.shared.warn(
667
+ "HandsOff: BLOCKED — \(nonDistributeActions.count) actions exceeds limit of \(Self.maxActions). " +
668
+ "Skipping execution to avoid disorienting window rearrangement."
669
+ )
670
+ return
671
+ }
672
+
673
+ // Smart distribution: when multiple tile_window actions target the same
674
+ // position, subdivide that region instead of stacking windows on top of each other.
675
+ let distributed = distributeTileActions(actions)
676
+
677
+ for action in distributed {
678
+ guard let intent = action["intent"] as? String else { continue }
679
+ let slots = action["slots"] as? [String: Any] ?? [:]
680
+
681
+ let jsonSlots = slots.reduce(into: [String: JSON]()) { dict, pair in
682
+ if let s = pair.value as? String {
683
+ dict[pair.key] = .string(s)
684
+ } else if let n = pair.value as? Int {
685
+ dict[pair.key] = .int(n)
686
+ } else if let b = pair.value as? Bool {
687
+ dict[pair.key] = .bool(b)
688
+ }
689
+ }
690
+
691
+ let match = IntentMatch(
692
+ intentName: intent,
693
+ slots: jsonSlots,
694
+ confidence: 0.95,
695
+ matchedPhrase: "hands-off"
696
+ )
697
+
698
+ do {
699
+ _ = try PhraseMatcher.shared.execute(match)
700
+ DiagnosticLog.shared.success("HandsOff: \(intent) executed")
701
+ } catch {
702
+ DiagnosticLog.shared.warn("HandsOff: \(intent) failed — \(error.localizedDescription)")
703
+ }
704
+ }
705
+ }
706
+
707
+ /// When multiple tile_window actions target the same position, distribute them
708
+ /// within that region. E.g., 3 windows → "left" becomes top-left, left, bottom-left.
709
+ private func distributeTileActions(_ actions: [[String: Any]]) -> [[String: Any]] {
710
+ // Group tile_window actions by position
711
+ var tileGroups: [String: [[String: Any]]] = [:]
712
+ var otherActions: [[String: Any]] = []
713
+
714
+ for action in actions {
715
+ let intent = action["intent"] as? String ?? ""
716
+ if intent == "tile_window",
717
+ let slots = action["slots"] as? [String: Any],
718
+ let position = slots["position"] as? String {
719
+ tileGroups[position, default: []].append(action)
720
+ } else {
721
+ otherActions.append(action)
722
+ }
723
+ }
724
+
725
+ var result = otherActions
726
+
727
+ for (position, group) in tileGroups {
728
+ if group.count == 1 {
729
+ // Single window — keep as-is
730
+ result.append(group[0])
731
+ } else {
732
+ // Multiple windows targeting the same position — subdivide
733
+ let subPositions = subdividePosition(position, count: group.count)
734
+ for (i, action) in group.enumerated() {
735
+ var modified = action
736
+ var slots = (action["slots"] as? [String: Any]) ?? [:]
737
+ slots["position"] = subPositions[i]
738
+ modified["slots"] = slots
739
+ result.append(modified)
740
+ DiagnosticLog.shared.info("HandsOff: distributed \(position) → \(subPositions[i]) for window \(slots["wid"] ?? "?")")
741
+ }
742
+ }
743
+ }
744
+
745
+ return result
746
+ }
747
+
748
+ /// Subdivide a tile position for N windows.
749
+ private func subdividePosition(_ position: String, count: Int) -> [String] {
750
+ // 2-3 windows in a half → vertical stack
751
+ let verticalSubs: [String: [String]] = [
752
+ "left": ["top-left", "bottom-left"],
753
+ "right": ["top-right", "bottom-right"],
754
+ ]
755
+ // 4+ windows in a half → 2×2 grid using the eighths
756
+ let gridSubs: [String: [String]] = [
757
+ "left": ["top-first-fourth", "top-second-fourth", "bottom-first-fourth", "bottom-second-fourth"],
758
+ "right": ["top-third-fourth", "top-last-fourth", "bottom-third-fourth", "bottom-last-fourth"],
759
+ ]
760
+ // Horizontal stacking within a half
761
+ let horizontalSubs: [String: [String]] = [
762
+ "top": ["top-left", "top-right"],
763
+ "bottom": ["bottom-left", "bottom-right"],
764
+ ]
765
+ // 4+ windows horizontal → use fourths
766
+ let horizontalGridSubs: [String: [String]] = [
767
+ "top": ["top-first-fourth", "top-second-fourth", "top-third-fourth", "top-last-fourth"],
768
+ "bottom": ["bottom-first-fourth", "bottom-second-fourth", "bottom-third-fourth", "bottom-last-fourth"],
769
+ ]
770
+ // Full screen → grid
771
+ let fullSubs = ["top-left", "top-right", "bottom-left", "bottom-right", "left", "right"]
772
+
773
+ let subs: [String]
774
+ if count >= 4, let g = gridSubs[position] {
775
+ subs = g
776
+ } else if let v = verticalSubs[position] {
777
+ subs = v
778
+ } else if count >= 4, let hg = horizontalGridSubs[position] {
779
+ subs = hg
780
+ } else if let h = horizontalSubs[position] {
781
+ subs = h
782
+ } else if position == "maximize" || position == "center" {
783
+ subs = fullSubs
784
+ } else {
785
+ // Can't subdivide further — just repeat the position
786
+ return Array(repeating: position, count: count)
787
+ }
788
+
789
+ // Distribute windows across available sub-positions
790
+ var result: [String] = []
791
+ for i in 0..<count {
792
+ result.append(subs[i % subs.count])
793
+ }
794
+ return result
795
+ }
796
+
797
+ // MARK: - Sound
798
+
799
+ private func playSound(_ name: NSSound.Name) {
800
+ NSSound(named: name)?.play()
801
+ }
802
+ }