@lattices/cli 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +85 -9
  2. package/app/Package.swift +8 -1
  3. package/app/Sources/AdvisorLearningStore.swift +90 -0
  4. package/app/Sources/AgentSession.swift +377 -0
  5. package/app/Sources/AppDelegate.swift +44 -12
  6. package/app/Sources/AppShellView.swift +81 -8
  7. package/app/Sources/AudioProvider.swift +386 -0
  8. package/app/Sources/CheatSheetHUD.swift +261 -19
  9. package/app/Sources/DaemonProtocol.swift +13 -0
  10. package/app/Sources/DaemonServer.swift +8 -0
  11. package/app/Sources/DesktopModel.swift +164 -5
  12. package/app/Sources/DesktopModelTypes.swift +2 -0
  13. package/app/Sources/DiagnosticLog.swift +104 -2
  14. package/app/Sources/EventBus.swift +1 -0
  15. package/app/Sources/HUDBottomBar.swift +279 -0
  16. package/app/Sources/HUDController.swift +1158 -0
  17. package/app/Sources/HUDLeftBar.swift +849 -0
  18. package/app/Sources/HUDMinimap.swift +179 -0
  19. package/app/Sources/HUDRightBar.swift +774 -0
  20. package/app/Sources/HUDState.swift +367 -0
  21. package/app/Sources/HUDTopBar.swift +243 -0
  22. package/app/Sources/HandsOffSession.swift +733 -0
  23. package/app/Sources/HomeDashboardView.swift +125 -0
  24. package/app/Sources/HotkeyManager.swift +2 -0
  25. package/app/Sources/HotkeyStore.swift +45 -9
  26. package/app/Sources/IntentEngine.swift +925 -0
  27. package/app/Sources/Intents/CreateLayerIntent.swift +54 -0
  28. package/app/Sources/Intents/DistributeIntent.swift +56 -0
  29. package/app/Sources/Intents/FocusIntent.swift +69 -0
  30. package/app/Sources/Intents/HelpIntent.swift +41 -0
  31. package/app/Sources/Intents/KillIntent.swift +47 -0
  32. package/app/Sources/Intents/LatticeIntent.swift +78 -0
  33. package/app/Sources/Intents/LaunchIntent.swift +67 -0
  34. package/app/Sources/Intents/ListSessionsIntent.swift +32 -0
  35. package/app/Sources/Intents/ListWindowsIntent.swift +30 -0
  36. package/app/Sources/Intents/ScanIntent.swift +52 -0
  37. package/app/Sources/Intents/SearchIntent.swift +190 -0
  38. package/app/Sources/Intents/SwitchLayerIntent.swift +50 -0
  39. package/app/Sources/Intents/TileIntent.swift +61 -0
  40. package/app/Sources/LatticesApi.swift +1235 -30
  41. package/app/Sources/LauncherHUD.swift +348 -0
  42. package/app/Sources/MainView.swift +147 -44
  43. package/app/Sources/OcrModel.swift +34 -1
  44. package/app/Sources/OmniSearchState.swift +99 -102
  45. package/app/Sources/OnboardingView.swift +457 -0
  46. package/app/Sources/PermissionChecker.swift +2 -12
  47. package/app/Sources/PiChatDock.swift +454 -0
  48. package/app/Sources/PiChatSession.swift +815 -0
  49. package/app/Sources/PiWorkspaceView.swift +364 -0
  50. package/app/Sources/PlacementSpec.swift +195 -0
  51. package/app/Sources/Preferences.swift +59 -0
  52. package/app/Sources/ProjectScanner.swift +1 -1
  53. package/app/Sources/ScreenMapState.swift +701 -55
  54. package/app/Sources/ScreenMapView.swift +843 -103
  55. package/app/Sources/ScreenMapWindowController.swift +22 -0
  56. package/app/Sources/SessionLayerStore.swift +285 -0
  57. package/app/Sources/SessionManager.swift +4 -1
  58. package/app/Sources/SettingsView.swift +186 -3
  59. package/app/Sources/Theme.swift +9 -8
  60. package/app/Sources/TmuxModel.swift +7 -0
  61. package/app/Sources/TmuxQuery.swift +27 -3
  62. package/app/Sources/VoiceChatView.swift +192 -0
  63. package/app/Sources/VoiceCommandWindow.swift +1594 -0
  64. package/app/Sources/VoiceIntentResolver.swift +671 -0
  65. package/app/Sources/VoxClient.swift +454 -0
  66. package/app/Sources/WindowTiler.swift +348 -87
  67. package/app/Sources/WorkspaceManager.swift +127 -18
  68. package/bin/client.ts +16 -0
  69. package/bin/{daemon-client.js → daemon-client.ts} +49 -30
  70. package/bin/handsoff-infer.ts +280 -0
  71. package/bin/handsoff-worker.ts +731 -0
  72. package/bin/{lattices-app.js → lattices-app.ts} +67 -32
  73. package/bin/lattices-dev +160 -0
  74. package/bin/{lattices.js → lattices.ts} +600 -137
  75. package/bin/project-twin.ts +645 -0
  76. package/docs/agent-execution-plan.md +562 -0
  77. package/docs/agents.md +142 -0
  78. package/docs/api.md +153 -34
  79. package/docs/app.md +29 -1
  80. package/docs/config.md +5 -1
  81. package/docs/handsoff-test-scenarios.md +84 -0
  82. package/docs/layers.md +20 -20
  83. package/docs/ocr.md +14 -5
  84. package/docs/overview.md +5 -1
  85. package/docs/presentation-execution-review.md +491 -0
  86. package/docs/prompts/hands-off-system.md +374 -0
  87. package/docs/prompts/hands-off-turn.md +30 -0
  88. package/docs/prompts/voice-advisor.md +31 -0
  89. package/docs/prompts/voice-fallback.md +23 -0
  90. package/docs/tiling-reference.md +167 -0
  91. package/docs/twins.md +138 -0
  92. package/docs/voice-command-protocol.md +278 -0
  93. package/docs/voice.md +219 -0
  94. package/package.json +21 -10
  95. package/bin/client.js +0 -4
@@ -0,0 +1,733 @@
1
+ import AppKit
2
+
3
+ /// Hands-off voice mode: hotkey → listen → worker handles everything.
4
+ ///
5
+ /// Architecture:
6
+ /// - Swift owns: hotkey, Vox dictation, action execution
7
+ /// - Worker owns: inference (Groq), TTS (streaming OpenAI), fast path matching, audio caching
8
+ /// - Worker is a long-running bun process, started once, communicates via JSON lines over stdio
9
+ ///
10
+ /// The worker handles the full turn orchestration in parallel:
11
+ /// - Fast path: local match → cached ack + execute + cached confirm (~300ms)
12
+ /// - Slow path: cached ack ∥ Groq inference → streaming TTS ∥ execute (~2s)
13
+
14
+ // MARK: - Chat Log Entry
15
+
16
+ struct VoiceChatEntry: Identifiable, Equatable {
17
+ let id = UUID()
18
+ let timestamp: Date
19
+ let role: Role
20
+ let text: String
21
+ /// Optional structured data — actions taken, search results, etc.
22
+ /// Displayable in the chat log but not spoken.
23
+ let detail: String?
24
+
25
+ enum Role: String, Equatable {
26
+ case user // what the user said
27
+ case assistant // spoken response
28
+ case system // silent info (actions executed, search results, etc.)
29
+ }
30
+
31
+ static func == (lhs: VoiceChatEntry, rhs: VoiceChatEntry) -> Bool {
32
+ lhs.id == rhs.id
33
+ }
34
+ }
35
+
36
+ final class HandsOffSession: ObservableObject {
37
+ static let shared = HandsOffSession()
38
+
39
+ enum State: Equatable {
40
+ case idle
41
+ case connecting
42
+ case listening
43
+ case thinking
44
+ }
45
+
46
+ @Published var state: State = .idle
47
+ @Published var lastTranscript: String?
48
+ @Published var lastResponse: String?
49
+ @Published var audibleFeedbackEnabled: Bool = false
50
+
51
+ /// Recently executed actions — shown as playback in the HUD bottom bar
52
+ @Published var recentActions: [[String: Any]] = []
53
+
54
+ /// Frame history for undo — stores pre-move frames of windows touched by the last turn
55
+ struct FrameSnapshot {
56
+ let wid: UInt32
57
+ let pid: Int32
58
+ let frame: WindowFrame
59
+ }
60
+ private(set) var frameHistory: [FrameSnapshot] = []
61
+
62
+ /// Snapshot current frames for all windows that are about to be moved.
63
+ /// Stores frames in CG/AX coordinates (top-left origin) for direct use with batchRestoreWindows.
64
+ func snapshotFrames(wids: [UInt32]) {
65
+ frameHistory.removeAll()
66
+ guard let windowList = CGWindowListCopyWindowInfo([.optionAll, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else { return }
67
+ for wid in wids {
68
+ guard let entry = DesktopModel.shared.windows[wid] else { continue }
69
+ for info in windowList {
70
+ guard let num = info[kCGWindowNumber as String] as? UInt32, num == wid,
71
+ let dict = info[kCGWindowBounds as String] as? NSDictionary else { continue }
72
+ var rect = CGRect.zero
73
+ if CGRectMakeWithDictionaryRepresentation(dict, &rect) {
74
+ let frame = WindowFrame(x: rect.origin.x, y: rect.origin.y, w: rect.width, h: rect.height)
75
+ frameHistory.append(FrameSnapshot(wid: wid, pid: entry.pid, frame: frame))
76
+ }
77
+ break
78
+ }
79
+ }
80
+ }
81
+
82
+ func clearFrameHistory() {
83
+ frameHistory.removeAll()
84
+ }
85
+
86
+ /// Running chat log — visible in the voice chat panel. Persists across turns.
87
+ @Published private(set) var chatLog: [VoiceChatEntry] = []
88
+ private let maxChatEntries = 50
89
+
90
+ private var turnCount = 0
91
+ @Published private(set) var conversationHistory: [[String: String]] = []
92
+ private let maxHistoryTurns = 10
93
+
94
+ // Long-running worker process
95
+ private var workerProcess: Process?
96
+ private var workerStdin: FileHandle?
97
+ private var workerBuffer = ""
98
+ private let workerQueue = DispatchQueue(label: "com.lattices.handsoff-worker", qos: .userInitiated)
99
+ private var lastCueAt: Date = .distantPast
100
+
101
+ /// JSONL log for full turn data — ~/.lattices/handsoff.jsonl
102
+ private let turnLogPath = NSHomeDirectory() + "/.lattices/handsoff.jsonl"
103
+
104
+ private init() {}
105
+
106
+ // MARK: - Chat Log
107
+
108
+ func appendChat(_ role: VoiceChatEntry.Role, text: String, detail: String? = nil) {
109
+ let entry = VoiceChatEntry(timestamp: Date(), role: role, text: text, detail: detail)
110
+ DispatchQueue.main.async {
111
+ self.chatLog.append(entry)
112
+ if self.chatLog.count > self.maxChatEntries {
113
+ self.chatLog.removeFirst(self.chatLog.count - self.maxChatEntries)
114
+ }
115
+ }
116
+ }
117
+
118
+ func clearChatLog() {
119
+ DispatchQueue.main.async { self.chatLog.removeAll() }
120
+ }
121
+
122
+ // MARK: - Lifecycle
123
+
124
+ func start() {
125
+ startWorker()
126
+ }
127
+
128
+ func setAudibleFeedbackEnabled(_ enabled: Bool) {
129
+ audibleFeedbackEnabled = enabled
130
+ if enabled {
131
+ startWorker()
132
+ }
133
+ }
134
+
135
+ func playCachedCue(_ phrase: String) {
136
+ guard audibleFeedbackEnabled else { return }
137
+ let now = Date()
138
+ guard now.timeIntervalSince(lastCueAt) >= 0.2 else { return }
139
+ lastCueAt = now
140
+ startWorker()
141
+ sendToWorker(["cmd": "play_cached", "text": phrase])
142
+ }
143
+
144
+ /// Append a full turn record to the JSONL log
145
+ private func logTurn(transcript: String, response: [String: Any], turnMs: Int) {
146
+ let snapshot = buildSnapshot()
147
+ var record: [String: Any] = [
148
+ "ts": ISO8601DateFormatter().string(from: Date()),
149
+ "turn": turnCount,
150
+ "transcript": transcript,
151
+ "turnMs": turnMs,
152
+ "snapshot": snapshot,
153
+ ]
154
+ if let data = response["data"] as? [String: Any] {
155
+ record["actions"] = data["actions"]
156
+ record["spoken"] = data["spoken"]
157
+ record["meta"] = data["_meta"]
158
+ }
159
+
160
+ guard let jsonData = try? JSONSerialization.data(withJSONObject: record),
161
+ var line = String(data: jsonData, encoding: .utf8) else { return }
162
+ line += "\n"
163
+
164
+ if let handle = FileHandle(forWritingAtPath: turnLogPath) {
165
+ handle.seekToEndOfFile()
166
+ handle.write(line.data(using: .utf8)!)
167
+ handle.closeFile()
168
+ } else {
169
+ FileManager.default.createFile(atPath: turnLogPath, contents: line.data(using: .utf8))
170
+ }
171
+ }
172
+
173
+ private func startWorker() {
174
+ if workerProcess?.isRunning == true, workerStdin != nil {
175
+ return
176
+ }
177
+
178
+ let bunPaths = [
179
+ NSHomeDirectory() + "/.bun/bin/bun",
180
+ "/usr/local/bin/bun",
181
+ "/opt/homebrew/bin/bun",
182
+ ]
183
+ guard let bunPath = bunPaths.first(where: { FileManager.default.isExecutableFile(atPath: $0) }) else {
184
+ DiagnosticLog.shared.warn("HandsOff: bun not found, worker disabled")
185
+ return
186
+ }
187
+
188
+ let scriptPath = NSHomeDirectory() + "/dev/lattices/bin/handsoff-worker.ts"
189
+ guard FileManager.default.fileExists(atPath: scriptPath) else {
190
+ DiagnosticLog.shared.warn("HandsOff: worker script not found at \(scriptPath)")
191
+ return
192
+ }
193
+
194
+ let proc = Process()
195
+ proc.executableURL = URL(fileURLWithPath: bunPath)
196
+ proc.arguments = ["run", scriptPath]
197
+ proc.currentDirectoryURL = URL(fileURLWithPath: NSHomeDirectory() + "/dev/lattices")
198
+
199
+ var env = ProcessInfo.processInfo.environment
200
+ env.removeValue(forKey: "CLAUDECODE")
201
+ proc.environment = env
202
+
203
+ let inPipe = Pipe()
204
+ let outPipe = Pipe()
205
+ let errPipe = Pipe()
206
+ proc.standardInput = inPipe
207
+ proc.standardOutput = outPipe
208
+ proc.standardError = errPipe
209
+
210
+ do {
211
+ try proc.run()
212
+ } catch {
213
+ DiagnosticLog.shared.warn("HandsOff: failed to start worker — \(error)")
214
+ return
215
+ }
216
+
217
+ workerProcess = proc
218
+ workerStdin = inPipe.fileHandleForWriting
219
+
220
+ // Read stdout for responses
221
+ outPipe.fileHandleForReading.readabilityHandler = { [weak self] handle in
222
+ let data = handle.availableData
223
+ guard !data.isEmpty, let str = String(data: data, encoding: .utf8) else { return }
224
+ self?.handleWorkerOutput(str)
225
+ }
226
+
227
+ // Log stderr
228
+ errPipe.fileHandleForReading.readabilityHandler = { handle in
229
+ let data = handle.availableData
230
+ guard !data.isEmpty, let str = String(data: data, encoding: .utf8) else { return }
231
+ for line in str.components(separatedBy: "\n") where !line.isEmpty {
232
+ DiagnosticLog.shared.info("HandsOff worker: \(line)")
233
+ }
234
+ }
235
+
236
+ // Handle worker crash → restart
237
+ proc.terminationHandler = { [weak self] proc in
238
+ DiagnosticLog.shared.warn("HandsOff: worker exited (code \(proc.terminationStatus)), restarting in 2s")
239
+ self?.workerProcess = nil
240
+ self?.workerStdin = nil
241
+ DispatchQueue.main.asyncAfter(deadline: .now() + 2) {
242
+ self?.startWorker()
243
+ }
244
+ }
245
+
246
+ // Ping to verify
247
+ sendToWorker(["cmd": "ping"])
248
+ DiagnosticLog.shared.info("HandsOff: worker started (pid \(proc.processIdentifier))")
249
+ }
250
+
251
+ // MARK: - Worker communication
252
+
253
+ private var pendingCallback: (([String: Any]) -> Void)?
254
+
255
+ private func sendToWorker(_ dict: [String: Any]) {
256
+ guard let data = try? JSONSerialization.data(withJSONObject: dict),
257
+ var str = String(data: data, encoding: .utf8) else { return }
258
+ str += "\n"
259
+ workerQueue.async { [weak self] in
260
+ self?.workerStdin?.write(str.data(using: .utf8)!)
261
+ }
262
+ }
263
+
264
+ private func sendToWorkerWithCallback(_ dict: [String: Any], callback: @escaping ([String: Any]) -> Void) {
265
+ pendingCallback = callback
266
+ sendToWorker(dict)
267
+ }
268
+
269
+ private func handleWorkerOutput(_ str: String) {
270
+ workerBuffer += str
271
+ let lines = workerBuffer.components(separatedBy: "\n")
272
+ workerBuffer = lines.last ?? ""
273
+
274
+ for line in lines.dropLast() {
275
+ let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines)
276
+ guard !trimmed.isEmpty,
277
+ let data = trimmed.data(using: .utf8),
278
+ let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any]
279
+ else { continue }
280
+
281
+ DiagnosticLog.shared.info("HandsOff: worker response → \(trimmed)")
282
+
283
+ // Parse everything on the background thread, then do ONE main-queue dispatch
284
+ // to update all @Published properties atomically. Scattered dispatches cause
285
+ // Combine deadlocks (os_unfair_lock contention with SwiftUI rendering).
286
+ let dataObj = json["data"] as? [String: Any]
287
+ let spoken = dataObj?["spoken"] as? String
288
+ let actions = dataObj?["actions"] as? [[String: Any]]
289
+ let cb = pendingCallback
290
+ pendingCallback = nil
291
+
292
+ // Build chat entries off-main
293
+ var chatEntries: [(VoiceChatEntry.Role, String)] = []
294
+ if let spoken { chatEntries.append((.assistant, spoken)) }
295
+ if let actions, !actions.isEmpty {
296
+ let summaries = actions.compactMap { action -> String? in
297
+ guard let intent = action["intent"] as? String else { return nil }
298
+ let slots = action["slots"] as? [String: Any] ?? [:]
299
+ let target = slots["app"] as? String ?? slots["query"] as? String ?? ""
300
+ let pos = slots["position"] as? String ?? ""
301
+ return [intent, target, pos].filter { !$0.isEmpty }.joined(separator: " ")
302
+ }
303
+ if !summaries.isEmpty {
304
+ chatEntries.append((.system, summaries.joined(separator: ", ")))
305
+ }
306
+ }
307
+
308
+ // Single dispatch — all @Published mutations in one block
309
+ DispatchQueue.main.async { [weak self] in
310
+ guard let self else { return }
311
+ if let spoken { self.lastResponse = spoken }
312
+ for (role, text) in chatEntries {
313
+ self.chatLog.append(VoiceChatEntry(timestamp: Date(), role: role, text: text, detail: nil))
314
+ }
315
+ if self.chatLog.count > self.maxChatEntries {
316
+ self.chatLog.removeFirst(self.chatLog.count - self.maxChatEntries)
317
+ }
318
+ if let actions, !actions.isEmpty {
319
+ self.recentActions = actions
320
+ self.executeActions(actions)
321
+ }
322
+ self.state = .idle
323
+ }
324
+
325
+ cb?(json)
326
+ }
327
+ }
328
+
329
+ // MARK: - Toggle
330
+
331
+ func toggle() {
332
+ switch state {
333
+ case .idle:
334
+ beginListening()
335
+ case .listening:
336
+ finishListening()
337
+ case .thinking:
338
+ DiagnosticLog.shared.info("HandsOff: busy, ignoring toggle")
339
+ case .connecting:
340
+ cancel()
341
+ }
342
+ }
343
+
344
+ func cancel() {
345
+ state = .idle
346
+ DiagnosticLog.shared.info("HandsOff: cancelled")
347
+ }
348
+
349
+ // MARK: - Voice capture
350
+
351
+ private func beginListening() {
352
+ let client = VoxClient.shared
353
+
354
+ if client.connectionState != .connected {
355
+ state = .connecting
356
+ client.connect()
357
+ DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [weak self] in
358
+ self?.retryListenIfConnected(attempts: 5)
359
+ }
360
+ return
361
+ }
362
+
363
+ startDictation()
364
+ }
365
+
366
+ private func retryListenIfConnected(attempts: Int) {
367
+ if VoxClient.shared.connectionState == .connected {
368
+ startDictation()
369
+ } else if attempts > 0 {
370
+ DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [weak self] in
371
+ self?.retryListenIfConnected(attempts: attempts - 1)
372
+ }
373
+ } else {
374
+ state = .idle
375
+ DiagnosticLog.shared.warn("HandsOff: Vox not available")
376
+ playSound("Basso")
377
+ }
378
+ }
379
+
380
+ private func startDictation() {
381
+ state = .listening
382
+ lastTranscript = nil
383
+ playSound("Tink")
384
+
385
+ DiagnosticLog.shared.info("HandsOff: listening...")
386
+
387
+ // Vox live session: startSession opens the mic, events flow on the start call ID.
388
+ // No partial transcripts — Vox transcribes after recording stops.
389
+ VoxClient.shared.startSession(
390
+ onProgress: { [weak self] event, data in
391
+ DispatchQueue.main.async {
392
+ if event == "session.state" {
393
+ let sessionState = data["state"] as? String ?? ""
394
+ DiagnosticLog.shared.info("HandsOff: session → \(sessionState)")
395
+ }
396
+ if event == "session.final", let text = data["text"] as? String {
397
+ self?.lastTranscript = text
398
+ }
399
+ }
400
+ },
401
+ completion: { [weak self] result in
402
+ DispatchQueue.main.async {
403
+ guard let self else { return }
404
+ switch result {
405
+ case .success(let data):
406
+ let text = data["text"] as? String ?? ""
407
+ if text.isEmpty {
408
+ self.state = .idle
409
+ DiagnosticLog.shared.info("HandsOff: no speech detected")
410
+ } else {
411
+ self.lastTranscript = text
412
+ DiagnosticLog.shared.info("HandsOff: heard → '\(text)'")
413
+ self.appendChat(.user, text: text)
414
+ self.processTurn(text)
415
+ }
416
+ case .failure(let error):
417
+ self.state = .idle
418
+ DiagnosticLog.shared.warn("HandsOff: session error — \(error.localizedDescription)")
419
+ self.playSound("Basso")
420
+ }
421
+ }
422
+ }
423
+ )
424
+ }
425
+
426
+ func finishListening() {
427
+ guard state == .listening else { return }
428
+ playSound("Tink")
429
+ VoxClient.shared.stopSession()
430
+ }
431
+
432
+ // MARK: - Turn processing (delegates to worker)
433
+
434
+ private func processTurn(_ transcript: String) {
435
+ state = .thinking
436
+ turnCount += 1
437
+
438
+ let turnStart = Date()
439
+ DiagnosticLog.shared.info("HandsOff: ⏱ turn \(turnCount) — '\(transcript)'")
440
+
441
+ // Build snapshot
442
+ let snapshot = buildSnapshot()
443
+
444
+ // Send turn to worker — it handles ack, inference, TTS, everything in parallel
445
+ let turnCmd: [String: Any] = [
446
+ "cmd": "turn",
447
+ "transcript": transcript,
448
+ "snapshot": snapshot,
449
+ "history": conversationHistory,
450
+ ]
451
+
452
+ sendToWorkerWithCallback(turnCmd) { [weak self] response in
453
+ guard let self else { return }
454
+
455
+ let turnMs = Int(Date().timeIntervalSince(turnStart) * 1000)
456
+ DiagnosticLog.shared.info("HandsOff: ⏱ turn \(self.turnCount) complete — \(turnMs)ms")
457
+
458
+ // Log full turn to JSONL
459
+ self.logTurn(transcript: transcript, response: response, turnMs: turnMs)
460
+
461
+ // Record history
462
+ if let data = response["data"] as? [String: Any] {
463
+ let responseStr = (try? String(data: JSONSerialization.data(withJSONObject: data), encoding: .utf8)) ?? ""
464
+ self.conversationHistory.append(["role": "user", "content": transcript])
465
+ self.conversationHistory.append(["role": "assistant", "content": responseStr])
466
+ if self.conversationHistory.count > self.maxHistoryTurns * 2 {
467
+ self.conversationHistory = Array(self.conversationHistory.suffix(self.maxHistoryTurns * 2))
468
+ }
469
+ }
470
+ }
471
+ }
472
+
473
+ // MARK: - Desktop snapshot (full context — all windows, all screens)
474
+
475
+ private func buildSnapshot() -> [String: Any] {
476
+ let allWindows = DesktopModel.shared.allWindows()
477
+ let smEnabled = UserDefaults(suiteName: "com.apple.WindowManager")?.bool(forKey: "GloballyEnabled") ?? false
478
+ let grouping = UserDefaults(suiteName: "com.apple.WindowManager")?.integer(forKey: "AppWindowGroupingBehavior") ?? 0
479
+
480
+ // All windows — no filtering. Order is front-to-back (Z-order).
481
+ let windowList: [[String: Any]] = allWindows.enumerated().map { (zIndex, w) in
482
+ var entry: [String: Any] = [
483
+ "wid": w.wid,
484
+ "app": w.app,
485
+ "title": w.title,
486
+ "frame": "\(Int(w.frame.x)),\(Int(w.frame.y)) \(Int(w.frame.w))x\(Int(w.frame.h))",
487
+ "onScreen": w.isOnScreen,
488
+ "zIndex": zIndex, // 0 = frontmost
489
+ ]
490
+ if let session = w.latticesSession {
491
+ entry["session"] = session
492
+ }
493
+ if !w.spaceIds.isEmpty {
494
+ entry["spaces"] = w.spaceIds
495
+ }
496
+ return entry
497
+ }
498
+
499
+ // All screens
500
+ let screens: [[String: Any]] = NSScreen.screens.enumerated().map { (i, s) in
501
+ [
502
+ "index": i + 1,
503
+ "width": Int(s.frame.width),
504
+ "height": Int(s.frame.height),
505
+ "isMain": s == NSScreen.main,
506
+ "visibleWidth": Int(s.visibleFrame.width),
507
+ "visibleHeight": Int(s.visibleFrame.height),
508
+ ]
509
+ }
510
+
511
+ // Layers
512
+ var layerInfo: [String: Any]?
513
+ let layerStore = SessionLayerStore.shared
514
+ if layerStore.activeIndex >= 0 && layerStore.activeIndex < layerStore.layers.count {
515
+ let current = layerStore.layers[layerStore.activeIndex]
516
+ layerInfo = ["name": current.name, "index": layerStore.activeIndex]
517
+ }
518
+
519
+ // Terminal enrichment — cwd, running commands, claude, tmux sessions
520
+ let terminals = ProcessModel.shared.synthesizeTerminals()
521
+ let terminalList: [[String: Any]] = terminals.compactMap { inst in
522
+ var entry: [String: Any] = [
523
+ "tty": inst.tty,
524
+ "hasClaude": inst.hasClaude,
525
+ "displayName": inst.displayName,
526
+ "isActiveTab": inst.isActiveTab,
527
+ ]
528
+ if let cwd = inst.cwd { entry["cwd"] = cwd }
529
+ if let app = inst.app { entry["app"] = app.rawValue }
530
+ if let session = inst.tmuxSession { entry["tmuxSession"] = session }
531
+ if let wid = inst.windowId { entry["windowId"] = Int(wid) }
532
+ if let title = inst.tabTitle { entry["tabTitle"] = title }
533
+ // Top running command (most useful for context)
534
+ let userProcesses = inst.processes.filter {
535
+ !["zsh", "bash", "fish", "login", "-zsh", "-bash"].contains($0.comm)
536
+ }
537
+ if !userProcesses.isEmpty {
538
+ entry["runningCommands"] = userProcesses.map { proc in
539
+ var cmd: [String: Any] = ["command": proc.comm]
540
+ if let cwd = proc.cwd { cmd["cwd"] = cwd }
541
+ return cmd
542
+ }
543
+ }
544
+ return entry
545
+ }
546
+
547
+ // Tmux sessions
548
+ let tmuxSessions = TmuxModel.shared.sessions
549
+ let tmuxList: [[String: Any]] = tmuxSessions.map { s in
550
+ [
551
+ "name": s.name,
552
+ "windowCount": s.windowCount,
553
+ "attached": s.attached,
554
+ ]
555
+ }
556
+
557
+ var snapshot: [String: Any] = [
558
+ "stageManager": smEnabled,
559
+ "smGrouping": grouping == 0 ? "all-at-once" : "one-at-a-time",
560
+ "windows": windowList,
561
+ "terminals": terminalList,
562
+ "screens": screens,
563
+ "windowCount": allWindows.count,
564
+ "onScreenCount": allWindows.filter(\.isOnScreen).count,
565
+ ]
566
+ if !tmuxList.isEmpty { snapshot["tmuxSessions"] = tmuxList }
567
+ if let layerInfo { snapshot["currentLayer"] = layerInfo }
568
+
569
+ return snapshot
570
+ }
571
+
572
+ // MARK: - Action execution
573
+
574
+ /// Hard cap on simultaneous actions. Rearranging 20+ windows is never right.
575
+ /// distribute is exempt because it's a single intent that handles all windows safely.
576
+ private static let maxActions = 6
577
+
578
+ private func executeActions(_ actions: [[String: Any]]) {
579
+ // Snapshot frames of all windows about to be moved (for undo)
580
+ let movingWids: [UInt32] = actions.compactMap { action in
581
+ let intent = action["intent"] as? String ?? ""
582
+ guard ["tile_window", "swap", "distribute", "move_to_display"].contains(intent) else { return nil }
583
+ let slots = action["slots"] as? [String: Any] ?? [:]
584
+ return (slots["wid"] as? NSNumber)?.uint32Value
585
+ ?? (slots["wid_a"] as? NSNumber)?.uint32Value
586
+ }
587
+ // Also grab wid_b from swap actions
588
+ let swapBWids: [UInt32] = actions.compactMap { action in
589
+ let slots = action["slots"] as? [String: Any] ?? [:]
590
+ return (slots["wid_b"] as? NSNumber)?.uint32Value
591
+ }
592
+ snapshotFrames(wids: movingWids + swapBWids)
593
+
594
+ // Guard: refuse to execute bulk operations that would be disorienting
595
+ let nonDistributeActions = actions.filter { ($0["intent"] as? String) != "distribute" }
596
+ if nonDistributeActions.count > Self.maxActions {
597
+ DiagnosticLog.shared.warn(
598
+ "HandsOff: BLOCKED — \(nonDistributeActions.count) actions exceeds limit of \(Self.maxActions). " +
599
+ "Skipping execution to avoid disorienting window rearrangement."
600
+ )
601
+ return
602
+ }
603
+
604
+ // Smart distribution: when multiple tile_window actions target the same
605
+ // position, subdivide that region instead of stacking windows on top of each other.
606
+ let distributed = distributeTileActions(actions)
607
+
608
+ for action in distributed {
609
+ guard let intent = action["intent"] as? String else { continue }
610
+ let slots = action["slots"] as? [String: Any] ?? [:]
611
+
612
+ let jsonSlots = slots.reduce(into: [String: JSON]()) { dict, pair in
613
+ if let s = pair.value as? String {
614
+ dict[pair.key] = .string(s)
615
+ } else if let n = pair.value as? Int {
616
+ dict[pair.key] = .int(n)
617
+ } else if let b = pair.value as? Bool {
618
+ dict[pair.key] = .bool(b)
619
+ }
620
+ }
621
+
622
+ let match = IntentMatch(
623
+ intentName: intent,
624
+ slots: jsonSlots,
625
+ confidence: 0.95,
626
+ matchedPhrase: "hands-off"
627
+ )
628
+
629
+ do {
630
+ _ = try PhraseMatcher.shared.execute(match)
631
+ DiagnosticLog.shared.success("HandsOff: \(intent) executed")
632
+ } catch {
633
+ DiagnosticLog.shared.warn("HandsOff: \(intent) failed — \(error.localizedDescription)")
634
+ }
635
+ }
636
+ }
637
+
638
+ /// When multiple tile_window actions target the same position, distribute them
639
+ /// within that region. E.g., 3 windows → "left" becomes top-left, left, bottom-left.
640
+ private func distributeTileActions(_ actions: [[String: Any]]) -> [[String: Any]] {
641
+ // Group tile_window actions by position
642
+ var tileGroups: [String: [[String: Any]]] = [:]
643
+ var otherActions: [[String: Any]] = []
644
+
645
+ for action in actions {
646
+ let intent = action["intent"] as? String ?? ""
647
+ if intent == "tile_window",
648
+ let slots = action["slots"] as? [String: Any],
649
+ let position = slots["position"] as? String {
650
+ tileGroups[position, default: []].append(action)
651
+ } else {
652
+ otherActions.append(action)
653
+ }
654
+ }
655
+
656
+ var result = otherActions
657
+
658
+ for (position, group) in tileGroups {
659
+ if group.count == 1 {
660
+ // Single window — keep as-is
661
+ result.append(group[0])
662
+ } else {
663
+ // Multiple windows targeting the same position — subdivide
664
+ let subPositions = subdividePosition(position, count: group.count)
665
+ for (i, action) in group.enumerated() {
666
+ var modified = action
667
+ var slots = (action["slots"] as? [String: Any]) ?? [:]
668
+ slots["position"] = subPositions[i]
669
+ modified["slots"] = slots
670
+ result.append(modified)
671
+ DiagnosticLog.shared.info("HandsOff: distributed \(position) → \(subPositions[i]) for window \(slots["wid"] ?? "?")")
672
+ }
673
+ }
674
+ }
675
+
676
+ return result
677
+ }
678
+
679
+ /// Subdivide a tile position for N windows.
680
+ private func subdividePosition(_ position: String, count: Int) -> [String] {
681
+ // 2-3 windows in a half → vertical stack
682
+ let verticalSubs: [String: [String]] = [
683
+ "left": ["top-left", "bottom-left"],
684
+ "right": ["top-right", "bottom-right"],
685
+ ]
686
+ // 4+ windows in a half → 2×2 grid using the eighths
687
+ let gridSubs: [String: [String]] = [
688
+ "left": ["top-first-fourth", "top-second-fourth", "bottom-first-fourth", "bottom-second-fourth"],
689
+ "right": ["top-third-fourth", "top-last-fourth", "bottom-third-fourth", "bottom-last-fourth"],
690
+ ]
691
+ // Horizontal stacking within a half
692
+ let horizontalSubs: [String: [String]] = [
693
+ "top": ["top-left", "top-right"],
694
+ "bottom": ["bottom-left", "bottom-right"],
695
+ ]
696
+ // 4+ windows horizontal → use fourths
697
+ let horizontalGridSubs: [String: [String]] = [
698
+ "top": ["top-first-fourth", "top-second-fourth", "top-third-fourth", "top-last-fourth"],
699
+ "bottom": ["bottom-first-fourth", "bottom-second-fourth", "bottom-third-fourth", "bottom-last-fourth"],
700
+ ]
701
+ // Full screen → grid
702
+ let fullSubs = ["top-left", "top-right", "bottom-left", "bottom-right", "left", "right"]
703
+
704
+ let subs: [String]
705
+ if count >= 4, let g = gridSubs[position] {
706
+ subs = g
707
+ } else if let v = verticalSubs[position] {
708
+ subs = v
709
+ } else if count >= 4, let hg = horizontalGridSubs[position] {
710
+ subs = hg
711
+ } else if let h = horizontalSubs[position] {
712
+ subs = h
713
+ } else if position == "maximize" || position == "center" {
714
+ subs = fullSubs
715
+ } else {
716
+ // Can't subdivide further — just repeat the position
717
+ return Array(repeating: position, count: count)
718
+ }
719
+
720
+ // Distribute windows across available sub-positions
721
+ var result: [String] = []
722
+ for i in 0..<count {
723
+ result.append(subs[i % subs.count])
724
+ }
725
+ return result
726
+ }
727
+
728
+ // MARK: - Sound
729
+
730
+ private func playSound(_ name: NSSound.Name) {
731
+ NSSound(named: name)?.play()
732
+ }
733
+ }