@kata-sh/cli 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +156 -0
  3. package/dist/app-paths.d.ts +4 -0
  4. package/dist/app-paths.js +6 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.js +56 -0
  7. package/dist/loader.d.ts +2 -0
  8. package/dist/loader.js +95 -0
  9. package/dist/resource-loader.d.ts +18 -0
  10. package/dist/resource-loader.js +50 -0
  11. package/dist/wizard.d.ts +15 -0
  12. package/dist/wizard.js +159 -0
  13. package/package.json +50 -21
  14. package/pkg/dist/modes/interactive/theme/dark.json +85 -0
  15. package/pkg/dist/modes/interactive/theme/light.json +84 -0
  16. package/pkg/dist/modes/interactive/theme/theme-schema.json +335 -0
  17. package/pkg/dist/modes/interactive/theme/theme.d.ts +78 -0
  18. package/pkg/dist/modes/interactive/theme/theme.d.ts.map +1 -0
  19. package/pkg/dist/modes/interactive/theme/theme.js +949 -0
  20. package/pkg/dist/modes/interactive/theme/theme.js.map +1 -0
  21. package/pkg/package.json +8 -0
  22. package/scripts/postinstall.js +45 -0
  23. package/src/resources/AGENTS.md +108 -0
  24. package/src/resources/KATA-WORKFLOW.md +661 -0
  25. package/src/resources/agents/researcher.md +29 -0
  26. package/src/resources/agents/scout.md +56 -0
  27. package/src/resources/agents/worker.md +31 -0
  28. package/src/resources/extensions/ask-user-questions.ts +200 -0
  29. package/src/resources/extensions/bg-shell/index.ts +2758 -0
  30. package/src/resources/extensions/browser-tools/BROWSER-TOOLS-V2-PROPOSAL.md +1277 -0
  31. package/src/resources/extensions/browser-tools/core.js +1057 -0
  32. package/src/resources/extensions/browser-tools/index.ts +4916 -0
  33. package/src/resources/extensions/browser-tools/package.json +20 -0
  34. package/src/resources/extensions/context7/index.ts +428 -0
  35. package/src/resources/extensions/context7/package.json +11 -0
  36. package/src/resources/extensions/get-secrets-from-user.ts +352 -0
  37. package/src/resources/extensions/github/formatters.ts +207 -0
  38. package/src/resources/extensions/github/gh-api.ts +537 -0
  39. package/src/resources/extensions/github/index.ts +778 -0
  40. package/src/resources/extensions/kata/activity-log.ts +88 -0
  41. package/src/resources/extensions/kata/auto.ts +2786 -0
  42. package/src/resources/extensions/kata/commands.ts +355 -0
  43. package/src/resources/extensions/kata/crash-recovery.ts +85 -0
  44. package/src/resources/extensions/kata/dashboard-overlay.ts +516 -0
  45. package/src/resources/extensions/kata/docs/preferences-reference.md +103 -0
  46. package/src/resources/extensions/kata/doctor.ts +683 -0
  47. package/src/resources/extensions/kata/files.ts +730 -0
  48. package/src/resources/extensions/kata/gitignore.ts +165 -0
  49. package/src/resources/extensions/kata/guided-flow.ts +976 -0
  50. package/src/resources/extensions/kata/index.ts +556 -0
  51. package/src/resources/extensions/kata/metrics.ts +397 -0
  52. package/src/resources/extensions/kata/observability-validator.ts +408 -0
  53. package/src/resources/extensions/kata/package.json +11 -0
  54. package/src/resources/extensions/kata/paths.ts +346 -0
  55. package/src/resources/extensions/kata/preferences.ts +695 -0
  56. package/src/resources/extensions/kata/prompt-loader.ts +50 -0
  57. package/src/resources/extensions/kata/prompts/complete-milestone.md +25 -0
  58. package/src/resources/extensions/kata/prompts/complete-slice.md +27 -0
  59. package/src/resources/extensions/kata/prompts/discuss.md +151 -0
  60. package/src/resources/extensions/kata/prompts/doctor-heal.md +29 -0
  61. package/src/resources/extensions/kata/prompts/execute-task.md +64 -0
  62. package/src/resources/extensions/kata/prompts/guided-complete-slice.md +1 -0
  63. package/src/resources/extensions/kata/prompts/guided-discuss-milestone.md +3 -0
  64. package/src/resources/extensions/kata/prompts/guided-discuss-slice.md +59 -0
  65. package/src/resources/extensions/kata/prompts/guided-execute-task.md +1 -0
  66. package/src/resources/extensions/kata/prompts/guided-plan-milestone.md +23 -0
  67. package/src/resources/extensions/kata/prompts/guided-plan-slice.md +1 -0
  68. package/src/resources/extensions/kata/prompts/guided-research-slice.md +11 -0
  69. package/src/resources/extensions/kata/prompts/guided-resume-task.md +1 -0
  70. package/src/resources/extensions/kata/prompts/plan-milestone.md +47 -0
  71. package/src/resources/extensions/kata/prompts/plan-slice.md +63 -0
  72. package/src/resources/extensions/kata/prompts/queue.md +85 -0
  73. package/src/resources/extensions/kata/prompts/reassess-roadmap.md +48 -0
  74. package/src/resources/extensions/kata/prompts/replan-slice.md +39 -0
  75. package/src/resources/extensions/kata/prompts/research-milestone.md +37 -0
  76. package/src/resources/extensions/kata/prompts/research-slice.md +28 -0
  77. package/src/resources/extensions/kata/prompts/run-uat.md +109 -0
  78. package/src/resources/extensions/kata/prompts/system.md +341 -0
  79. package/src/resources/extensions/kata/session-forensics.ts +550 -0
  80. package/src/resources/extensions/kata/skill-discovery.ts +137 -0
  81. package/src/resources/extensions/kata/state.ts +509 -0
  82. package/src/resources/extensions/kata/templates/context.md +76 -0
  83. package/src/resources/extensions/kata/templates/decisions.md +8 -0
  84. package/src/resources/extensions/kata/templates/milestone-summary.md +73 -0
  85. package/src/resources/extensions/kata/templates/plan.md +133 -0
  86. package/src/resources/extensions/kata/templates/preferences.md +15 -0
  87. package/src/resources/extensions/kata/templates/project.md +31 -0
  88. package/src/resources/extensions/kata/templates/reassessment.md +28 -0
  89. package/src/resources/extensions/kata/templates/requirements.md +81 -0
  90. package/src/resources/extensions/kata/templates/research.md +46 -0
  91. package/src/resources/extensions/kata/templates/roadmap.md +118 -0
  92. package/src/resources/extensions/kata/templates/slice-context.md +58 -0
  93. package/src/resources/extensions/kata/templates/slice-summary.md +99 -0
  94. package/src/resources/extensions/kata/templates/state.md +19 -0
  95. package/src/resources/extensions/kata/templates/task-plan.md +52 -0
  96. package/src/resources/extensions/kata/templates/task-summary.md +57 -0
  97. package/src/resources/extensions/kata/templates/uat.md +54 -0
  98. package/src/resources/extensions/kata/tests/activity-log-prune.test.ts +327 -0
  99. package/src/resources/extensions/kata/tests/auto-preflight.test.ts +97 -0
  100. package/src/resources/extensions/kata/tests/auto-supervisor.test.mjs +53 -0
  101. package/src/resources/extensions/kata/tests/complete-milestone.test.ts +317 -0
  102. package/src/resources/extensions/kata/tests/cost-projection.test.ts +160 -0
  103. package/src/resources/extensions/kata/tests/derive-state-deps.test.ts +477 -0
  104. package/src/resources/extensions/kata/tests/derive-state.test.ts +1013 -0
  105. package/src/resources/extensions/kata/tests/doctor.test.ts +718 -0
  106. package/src/resources/extensions/kata/tests/idle-recovery.test.ts +490 -0
  107. package/src/resources/extensions/kata/tests/metrics-io.test.ts +254 -0
  108. package/src/resources/extensions/kata/tests/metrics.test.ts +217 -0
  109. package/src/resources/extensions/kata/tests/must-have-parser.test.ts +309 -0
  110. package/src/resources/extensions/kata/tests/parsers.test.ts +1257 -0
  111. package/src/resources/extensions/kata/tests/plan-milestone.test.ts +185 -0
  112. package/src/resources/extensions/kata/tests/plan-quality-validator.test.ts +386 -0
  113. package/src/resources/extensions/kata/tests/reassess-prompt.test.ts +208 -0
  114. package/src/resources/extensions/kata/tests/replan-slice.test.ts +686 -0
  115. package/src/resources/extensions/kata/tests/requirements.test.ts +151 -0
  116. package/src/resources/extensions/kata/tests/resolve-ts-hooks.mjs +17 -0
  117. package/src/resources/extensions/kata/tests/resolve-ts.mjs +11 -0
  118. package/src/resources/extensions/kata/tests/run-uat.test.ts +383 -0
  119. package/src/resources/extensions/kata/tests/unit-runtime.test.ts +388 -0
  120. package/src/resources/extensions/kata/tests/workspace-index.test.ts +118 -0
  121. package/src/resources/extensions/kata/tests/worktree.test.ts +222 -0
  122. package/src/resources/extensions/kata/types.ts +159 -0
  123. package/src/resources/extensions/kata/unit-runtime.ts +163 -0
  124. package/src/resources/extensions/kata/workspace-index.ts +203 -0
  125. package/src/resources/extensions/kata/worktree.ts +182 -0
  126. package/src/resources/extensions/mac-tools/index.ts +852 -0
  127. package/src/resources/extensions/mac-tools/swift-cli/Package.swift +22 -0
  128. package/src/resources/extensions/mac-tools/swift-cli/Sources/main.swift +1318 -0
  129. package/src/resources/extensions/search-the-web/cache.ts +78 -0
  130. package/src/resources/extensions/search-the-web/format.ts +258 -0
  131. package/src/resources/extensions/search-the-web/http.ts +238 -0
  132. package/src/resources/extensions/search-the-web/index.ts +68 -0
  133. package/src/resources/extensions/search-the-web/tool-fetch-page.ts +519 -0
  134. package/src/resources/extensions/search-the-web/tool-llm-context.ts +404 -0
  135. package/src/resources/extensions/search-the-web/tool-search.ts +503 -0
  136. package/src/resources/extensions/search-the-web/url-utils.ts +91 -0
  137. package/src/resources/extensions/shared/confirm-ui.ts +126 -0
  138. package/src/resources/extensions/shared/interview-ui.ts +822 -0
  139. package/src/resources/extensions/shared/next-action-ui.ts +235 -0
  140. package/src/resources/extensions/shared/progress-widget.ts +282 -0
  141. package/src/resources/extensions/shared/thinking-widget.ts +107 -0
  142. package/src/resources/extensions/shared/ui.ts +400 -0
  143. package/src/resources/extensions/shared/wizard-ui.ts +551 -0
  144. package/src/resources/extensions/slash-commands/audit.ts +92 -0
  145. package/src/resources/extensions/slash-commands/create-extension.ts +375 -0
  146. package/src/resources/extensions/slash-commands/create-slash-command.ts +280 -0
  147. package/src/resources/extensions/slash-commands/index.ts +12 -0
  148. package/src/resources/extensions/slash-commands/kata-run.ts +34 -0
  149. package/src/resources/extensions/subagent/agents.ts +126 -0
  150. package/src/resources/extensions/subagent/index.ts +1293 -0
  151. package/src/resources/skills/debug-like-expert/SKILL.md +231 -0
  152. package/src/resources/skills/debug-like-expert/references/debugging-mindset.md +253 -0
  153. package/src/resources/skills/debug-like-expert/references/hypothesis-testing.md +373 -0
  154. package/src/resources/skills/debug-like-expert/references/investigation-techniques.md +337 -0
  155. package/src/resources/skills/debug-like-expert/references/verification-patterns.md +425 -0
  156. package/src/resources/skills/debug-like-expert/references/when-to-research.md +361 -0
  157. package/src/resources/skills/frontend-design/SKILL.md +45 -0
  158. package/src/resources/skills/swiftui/SKILL.md +208 -0
  159. package/src/resources/skills/swiftui/references/animations.md +921 -0
  160. package/src/resources/skills/swiftui/references/architecture.md +1561 -0
  161. package/src/resources/skills/swiftui/references/layout-system.md +1186 -0
  162. package/src/resources/skills/swiftui/references/navigation.md +1492 -0
  163. package/src/resources/skills/swiftui/references/networking-async.md +214 -0
  164. package/src/resources/skills/swiftui/references/performance.md +1706 -0
  165. package/src/resources/skills/swiftui/references/platform-integration.md +204 -0
  166. package/src/resources/skills/swiftui/references/state-management.md +1443 -0
  167. package/src/resources/skills/swiftui/references/swiftdata.md +297 -0
  168. package/src/resources/skills/swiftui/references/testing-debugging.md +247 -0
  169. package/src/resources/skills/swiftui/references/uikit-appkit-interop.md +218 -0
  170. package/src/resources/skills/swiftui/workflows/add-feature.md +191 -0
  171. package/src/resources/skills/swiftui/workflows/build-new-app.md +311 -0
  172. package/src/resources/skills/swiftui/workflows/debug-swiftui.md +192 -0
  173. package/src/resources/skills/swiftui/workflows/optimize-performance.md +197 -0
  174. package/src/resources/skills/swiftui/workflows/ship-app.md +203 -0
  175. package/src/resources/skills/swiftui/workflows/write-tests.md +235 -0
  176. package/dist/commands/task.d.ts +0 -9
  177. package/dist/commands/task.d.ts.map +0 -1
  178. package/dist/commands/task.js +0 -129
  179. package/dist/commands/task.js.map +0 -1
  180. package/dist/commands/task.test.d.ts +0 -2
  181. package/dist/commands/task.test.d.ts.map +0 -1
  182. package/dist/commands/task.test.js +0 -169
  183. package/dist/commands/task.test.js.map +0 -1
  184. package/dist/e2e/task-e2e.test.d.ts +0 -2
  185. package/dist/e2e/task-e2e.test.d.ts.map +0 -1
  186. package/dist/e2e/task-e2e.test.js +0 -173
  187. package/dist/e2e/task-e2e.test.js.map +0 -1
  188. package/dist/index.d.ts +0 -3
  189. package/dist/index.d.ts.map +0 -1
  190. package/dist/index.js +0 -93
  191. package/dist/index.js.map +0 -1
  192. package/dist/slug.d.ts +0 -2
  193. package/dist/slug.d.ts.map +0 -1
  194. package/dist/slug.js +0 -12
  195. package/dist/slug.js.map +0 -1
  196. package/dist/slug.test.d.ts +0 -2
  197. package/dist/slug.test.d.ts.map +0 -1
  198. package/dist/slug.test.js +0 -32
  199. package/dist/slug.test.js.map +0 -1
@@ -0,0 +1,1318 @@
1
+ import Foundation
2
+ import ApplicationServices
3
+ import AppKit
4
+ import ScreenCaptureKit
5
+ import UniformTypeIdentifiers
6
+
7
+ // MARK: - JSON Protocol Types
8
+
9
+ struct CommandRequest: Decodable {
10
+ let command: String
11
+ let params: [String: AnyCodable]?
12
+ }
13
+
14
+ struct CommandResponse: Encodable {
15
+ let success: Bool
16
+ let data: AnyCodable?
17
+ let error: String?
18
+
19
+ static func ok(_ data: Any) -> CommandResponse {
20
+ CommandResponse(success: true, data: AnyCodable(data), error: nil)
21
+ }
22
+
23
+ static func fail(_ message: String) -> CommandResponse {
24
+ CommandResponse(success: false, data: nil, error: message)
25
+ }
26
+ }
27
+
28
+ /// Type-erased Codable wrapper for heterogeneous JSON values.
29
+ struct AnyCodable: Codable {
30
+ let value: Any
31
+
32
+ init(_ value: Any) {
33
+ self.value = value
34
+ }
35
+
36
+ init(from decoder: Decoder) throws {
37
+ let container = try decoder.singleValueContainer()
38
+ if container.decodeNil() {
39
+ value = NSNull()
40
+ } else if let b = try? container.decode(Bool.self) {
41
+ value = b
42
+ } else if let i = try? container.decode(Int.self) {
43
+ value = i
44
+ } else if let d = try? container.decode(Double.self) {
45
+ value = d
46
+ } else if let s = try? container.decode(String.self) {
47
+ value = s
48
+ } else if let a = try? container.decode([AnyCodable].self) {
49
+ value = a.map(\.value)
50
+ } else if let dict = try? container.decode([String: AnyCodable].self) {
51
+ value = dict.mapValues(\.value)
52
+ } else {
53
+ throw DecodingError.dataCorruptedError(in: container, debugDescription: "Unsupported JSON type")
54
+ }
55
+ }
56
+
57
+ func encode(to encoder: Encoder) throws {
58
+ var container = encoder.singleValueContainer()
59
+ switch value {
60
+ case is NSNull:
61
+ try container.encodeNil()
62
+ case let b as Bool:
63
+ try container.encode(b)
64
+ case let i as Int:
65
+ try container.encode(i)
66
+ case let i as Int64:
67
+ try container.encode(i)
68
+ case let i as Int32:
69
+ try container.encode(i)
70
+ case let i as UInt32:
71
+ try container.encode(i)
72
+ case let d as Double:
73
+ try container.encode(d)
74
+ case let s as String:
75
+ try container.encode(s)
76
+ case let a as [Any]:
77
+ try container.encode(a.map { AnyCodable($0) })
78
+ case let dict as [String: Any]:
79
+ try container.encode(dict.mapValues { AnyCodable($0) })
80
+ default:
81
+ try container.encode(String(describing: value))
82
+ }
83
+ }
84
+ }
85
+
86
+ // MARK: - Debug Logging (stderr only)
87
+
88
+ func debug(_ message: String) {
89
+ FileHandle.standardError.write(Data("[mac-agent] \(message)\n".utf8))
90
+ }
91
+
92
+ // MARK: - Command Handlers
93
+
94
+ func handlePing() -> CommandResponse {
95
+ .ok(["status": "ok"])
96
+ }
97
+
98
+ func handleCheckPermissions() -> CommandResponse {
99
+ let accessibilityEnabled = AXIsProcessTrusted()
100
+ let screenRecordingEnabled = CGPreflightScreenCaptureAccess()
101
+ return .ok([
102
+ "accessibilityEnabled": accessibilityEnabled,
103
+ "screenRecordingEnabled": screenRecordingEnabled
104
+ ] as [String: Any])
105
+ }
106
+
107
+ // MARK: - App Lifecycle Commands
108
+
109
+ func handleListApps(_ params: [String: AnyCodable]?) -> CommandResponse {
110
+ let includeBackground = (params?["includeBackground"]?.value as? Bool) ?? false
111
+ let apps = NSWorkspace.shared.runningApplications
112
+
113
+ var result: [[String: Any]] = []
114
+ for app in apps {
115
+ let policy = app.activationPolicy
116
+ if policy == .regular || (includeBackground && policy == .accessory) {
117
+ let entry: [String: Any] = [
118
+ "name": app.localizedName ?? "Unknown",
119
+ "bundleId": app.bundleIdentifier ?? "",
120
+ "pid": Int(app.processIdentifier),
121
+ "isActive": app.isActive
122
+ ]
123
+ result.append(entry)
124
+ }
125
+ }
126
+
127
+ return .ok(result)
128
+ }
129
+
130
+ /// Find a running application by name or bundle ID.
131
+ func findRunningApp(params: [String: AnyCodable]?) -> NSRunningApplication? {
132
+ let name = params?["name"]?.value as? String
133
+ let bundleId = params?["bundleId"]?.value as? String
134
+
135
+ guard name != nil || bundleId != nil else { return nil }
136
+
137
+ let apps = NSWorkspace.shared.runningApplications
138
+ for app in apps {
139
+ if let bundleId = bundleId, app.bundleIdentifier == bundleId {
140
+ return app
141
+ }
142
+ if let name = name, app.localizedName?.lowercased() == name.lowercased() {
143
+ return app
144
+ }
145
+ }
146
+ return nil
147
+ }
148
+
149
+ func handleLaunchApp(_ params: [String: AnyCodable]?) -> CommandResponse {
150
+ let name = params?["name"]?.value as? String
151
+ let bundleId = params?["bundleId"]?.value as? String
152
+
153
+ guard name != nil || bundleId != nil else {
154
+ return .fail("launchApp requires 'name' or 'bundleId' parameter")
155
+ }
156
+
157
+ // Try bundle ID first if provided
158
+ if let bundleId = bundleId {
159
+ if let appURL = NSWorkspace.shared.urlForApplication(withBundleIdentifier: bundleId) {
160
+ do {
161
+ let config = NSWorkspace.OpenConfiguration()
162
+ config.activates = true
163
+ let semaphore = DispatchSemaphore(value: 0)
164
+ var launchedApp: NSRunningApplication?
165
+ var launchError: Error?
166
+
167
+ NSWorkspace.shared.openApplication(at: appURL, configuration: config) { app, error in
168
+ launchedApp = app
169
+ launchError = error
170
+ semaphore.signal()
171
+ }
172
+ semaphore.wait()
173
+
174
+ if let error = launchError {
175
+ return .fail("Failed to launch app with bundleId '\(bundleId)': \(error.localizedDescription)")
176
+ }
177
+
178
+ return .ok([
179
+ "launched": true,
180
+ "name": launchedApp?.localizedName ?? "Unknown",
181
+ "bundleId": bundleId,
182
+ "pid": Int(launchedApp?.processIdentifier ?? 0)
183
+ ] as [String: Any])
184
+ }
185
+ } else {
186
+ return .fail("App not found with bundleId: \(bundleId)")
187
+ }
188
+ }
189
+
190
+ // Launch by name using /usr/bin/open -a
191
+ if let name = name {
192
+ let process = Process()
193
+ process.executableURL = URL(fileURLWithPath: "/usr/bin/open")
194
+ process.arguments = ["-a", name]
195
+ let errPipe = Pipe()
196
+ process.standardError = errPipe
197
+
198
+ do {
199
+ try process.run()
200
+ process.waitUntilExit()
201
+ } catch {
202
+ return .fail("Failed to launch '\(name)': \(error.localizedDescription)")
203
+ }
204
+
205
+ if process.terminationStatus != 0 {
206
+ let errData = errPipe.fileHandleForReading.readDataToEndOfFile()
207
+ let errMsg = String(data: errData, encoding: .utf8)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "Unknown error"
208
+ return .fail("App not found: \(name). \(errMsg)")
209
+ }
210
+
211
+ // Give the app a moment to appear in running apps, then find it
212
+ Thread.sleep(forTimeInterval: 0.5)
213
+ let apps = NSWorkspace.shared.runningApplications
214
+ let launched = apps.first { $0.localizedName?.lowercased() == name.lowercased() }
215
+
216
+ return .ok([
217
+ "launched": true,
218
+ "name": launched?.localizedName ?? name,
219
+ "bundleId": launched?.bundleIdentifier ?? "",
220
+ "pid": Int(launched?.processIdentifier ?? 0)
221
+ ] as [String: Any])
222
+ }
223
+
224
+ return .fail("launchApp requires 'name' or 'bundleId' parameter")
225
+ }
226
+
227
+ func handleActivateApp(_ params: [String: AnyCodable]?) -> CommandResponse {
228
+ let name = params?["name"]?.value as? String
229
+ let bundleId = params?["bundleId"]?.value as? String
230
+
231
+ guard name != nil || bundleId != nil else {
232
+ return .fail("activateApp requires 'name' or 'bundleId' parameter")
233
+ }
234
+
235
+ guard let app = findRunningApp(params: params) else {
236
+ let identifier = name ?? bundleId ?? "unknown"
237
+ return .fail("App not running: \(identifier)")
238
+ }
239
+
240
+ let activated = app.activate(options: .activateIgnoringOtherApps)
241
+ if activated {
242
+ return .ok([
243
+ "activated": true,
244
+ "name": app.localizedName ?? "Unknown"
245
+ ] as [String: Any])
246
+ } else {
247
+ return .fail("Failed to activate app: \(app.localizedName ?? "Unknown")")
248
+ }
249
+ }
250
+
251
+ func handleQuitApp(_ params: [String: AnyCodable]?) -> CommandResponse {
252
+ let name = params?["name"]?.value as? String
253
+ let bundleId = params?["bundleId"]?.value as? String
254
+
255
+ guard name != nil || bundleId != nil else {
256
+ return .fail("quitApp requires 'name' or 'bundleId' parameter")
257
+ }
258
+
259
+ guard let app = findRunningApp(params: params) else {
260
+ let identifier = name ?? bundleId ?? "unknown"
261
+ return .fail("App not running: \(identifier)")
262
+ }
263
+
264
+ let appName = app.localizedName ?? "Unknown"
265
+ let terminated = app.terminate()
266
+ if terminated {
267
+ return .ok([
268
+ "quit": true,
269
+ "name": appName
270
+ ] as [String: Any])
271
+ } else {
272
+ return .fail("Failed to quit app: \(appName). The app may have unsaved changes or refused to terminate.")
273
+ }
274
+ }
275
+
276
+ // MARK: - AX Element Helpers
277
+
278
+ /// Resolve an `app` parameter (name or bundleId) to a running application.
279
+ func resolveApp(_ params: [String: AnyCodable]?) -> (app: NSRunningApplication?, identifier: String) {
280
+ let appIdentifier = params?["app"]?.value as? String ?? ""
281
+ guard !appIdentifier.isEmpty else { return (nil, "") }
282
+
283
+ let apps = NSWorkspace.shared.runningApplications
284
+ for app in apps {
285
+ if app.bundleIdentifier == appIdentifier { return (app, appIdentifier) }
286
+ if let name = app.localizedName, name.lowercased() == appIdentifier.lowercased() { return (app, appIdentifier) }
287
+ }
288
+ return (nil, appIdentifier)
289
+ }
290
+
291
+ /// Get child AXUIElements of a given element.
292
+ /// Uses AXUIElementCopyAttributeValues (plural, indexed) as primary path,
293
+ /// falling back to AXUIElementCopyAttributeValue for kAXChildrenAttribute.
294
+ /// Returns empty array on failure (leaf elements have no children — not an error).
295
+ func getChildren(_ element: AXUIElement) -> [AXUIElement] {
296
+ // Primary: AXUIElementCopyAttributeValues (plural) — handles edge cases in some apps
297
+ var values: CFArray?
298
+ let pluralErr = AXUIElementCopyAttributeValues(element, kAXChildrenAttribute as CFString, 0, 100, &values)
299
+ if pluralErr == .success, let cfArray = values {
300
+ let arr = cfArray as [AnyObject]
301
+ return arr.compactMap { $0 as! AXUIElement? }
302
+ }
303
+
304
+ // Fallback: AXUIElementCopyAttributeValue (singular)
305
+ var value: CFTypeRef?
306
+ let singularErr = AXUIElementCopyAttributeValue(element, kAXChildrenAttribute as CFString, &value)
307
+ if singularErr == .success, let cfArray = value as? [AXUIElement] {
308
+ return cfArray
309
+ }
310
+
311
+ return []
312
+ }
313
+
314
+ /// Extract key attributes from an AXUIElement as a dictionary.
315
+ /// Omits nil values. This is the standard element representation for JSON responses.
316
+ func getElementAttributes(_ element: AXUIElement) -> [String: Any] {
317
+ var attrs: [String: Any] = [:]
318
+
319
+ // Helper to read a string attribute
320
+ func readString(_ attr: String) -> String? {
321
+ var value: CFTypeRef?
322
+ let err = AXUIElementCopyAttributeValue(element, attr as CFString, &value)
323
+ guard err == .success, let v = value else { return nil }
324
+ return v as? String
325
+ }
326
+
327
+ // Helper to read a bool attribute
328
+ func readBool(_ attr: String) -> Bool? {
329
+ var value: CFTypeRef?
330
+ let err = AXUIElementCopyAttributeValue(element, attr as CFString, &value)
331
+ guard err == .success, let v = value else { return nil }
332
+ if let num = v as? NSNumber { return num.boolValue }
333
+ return nil
334
+ }
335
+
336
+ if let role = readString(kAXRoleAttribute) { attrs["role"] = role }
337
+ if let title = readString(kAXTitleAttribute) { attrs["title"] = title }
338
+ if let desc = readString(kAXDescriptionAttribute) { attrs["description"] = desc }
339
+ if let ident = readString("AXIdentifier") { attrs["identifier"] = ident }
340
+
341
+ // AXValue: return string if it's a simple string, otherwise a type description
342
+ var axValue: CFTypeRef?
343
+ let valErr = AXUIElementCopyAttributeValue(element, kAXValueAttribute as CFString, &axValue)
344
+ if valErr == .success, let v = axValue {
345
+ if let s = v as? String {
346
+ attrs["value"] = s
347
+ } else if let n = v as? NSNumber {
348
+ attrs["value"] = n.stringValue
349
+ } else {
350
+ attrs["value"] = String(describing: type(of: v))
351
+ }
352
+ }
353
+
354
+ if let enabled = readBool(kAXEnabledAttribute) { attrs["enabled"] = enabled }
355
+ if let focused = readBool(kAXFocusedAttribute) { attrs["focused"] = focused }
356
+
357
+ return attrs
358
+ }
359
+
360
+ /// DFS search for AXUIElements matching the given criteria.
361
+ /// Returns (matches, totalVisited, truncated).
362
+ func findMatchingElements(
363
+ root: AXUIElement,
364
+ role: String?,
365
+ title: String?,
366
+ value: String?,
367
+ identifier: String?,
368
+ matchType: String,
369
+ maxDepth: Int,
370
+ maxCount: Int
371
+ ) -> (matches: [[String: Any]], totalVisited: Int, truncated: Bool) {
372
+ var matches: [[String: Any]] = []
373
+ var totalVisited = 0
374
+ var truncated = false
375
+
376
+ func matchesString(_ actual: String?, _ expected: String?, _ matchType: String) -> Bool {
377
+ guard let expected = expected else { return true } // no criteria = matches
378
+ guard let actual = actual else { return false }
379
+ if matchType == "exact" {
380
+ return actual == expected
381
+ } else {
382
+ // contains, case-insensitive
383
+ return actual.lowercased().contains(expected.lowercased())
384
+ }
385
+ }
386
+
387
+ func dfs(_ element: AXUIElement, depth: Int) {
388
+ guard !truncated else { return }
389
+ totalVisited += 1
390
+
391
+ let attrs = getElementAttributes(element)
392
+ let elementRole = attrs["role"] as? String
393
+ let elementTitle = attrs["title"] as? String
394
+ let elementValue = attrs["value"] as? String
395
+ let elementIdent = attrs["identifier"] as? String
396
+
397
+ // Check all specified criteria
398
+ let roleMatch = matchesString(elementRole, role, matchType)
399
+ let titleMatch = matchesString(elementTitle, title, matchType)
400
+ let valueMatch = matchesString(elementValue, value, matchType)
401
+ let identMatch = matchesString(elementIdent, identifier, matchType)
402
+
403
+ // Only add if at least one criterion was specified and all specified criteria match
404
+ let hasCriteria = role != nil || title != nil || value != nil || identifier != nil
405
+ if !hasCriteria || (roleMatch && titleMatch && valueMatch && identMatch) {
406
+ matches.append(attrs)
407
+ if matches.count >= maxCount {
408
+ truncated = true
409
+ return
410
+ }
411
+ }
412
+
413
+ // Recurse into children if within depth
414
+ if depth < maxDepth {
415
+ let children = getChildren(element)
416
+ for child in children {
417
+ guard !truncated else { return }
418
+ dfs(child, depth: depth + 1)
419
+ }
420
+ }
421
+ }
422
+
423
+ dfs(root, depth: 0)
424
+ return (matches, totalVisited, truncated)
425
+ }
426
+
427
+ // MARK: - Element Discovery Commands
428
+
429
+ func handleFindElements(_ params: [String: AnyCodable]?) -> CommandResponse {
430
+ let (app, identifier) = resolveApp(params)
431
+
432
+ guard !identifier.isEmpty else {
433
+ return .fail("findElements requires 'app' parameter (app name or bundleId)")
434
+ }
435
+ guard let app = app else {
436
+ return .fail("App not running: \(identifier)")
437
+ }
438
+
439
+ let pid = app.processIdentifier
440
+ let appElement = AXUIElementCreateApplication(pid)
441
+
442
+ let role = params?["role"]?.value as? String
443
+ let title = params?["title"]?.value as? String
444
+ let value = params?["value"]?.value as? String
445
+ let identifierParam = params?["identifier"]?.value as? String
446
+ let matchType = (params?["matchType"]?.value as? String) ?? "contains"
447
+
448
+ let maxDepth: Int
449
+ if let d = params?["maxDepth"]?.value as? Int { maxDepth = d }
450
+ else if let d = params?["maxDepth"]?.value as? Double { maxDepth = Int(d) }
451
+ else { maxDepth = 5 }
452
+
453
+ let maxCount: Int
454
+ if let c = params?["maxCount"]?.value as? Int { maxCount = c }
455
+ else if let c = params?["maxCount"]?.value as? Double { maxCount = Int(c) }
456
+ else { maxCount = 200 }
457
+
458
+ let (matches, totalVisited, truncated) = findMatchingElements(
459
+ root: appElement,
460
+ role: role,
461
+ title: title,
462
+ value: value,
463
+ identifier: identifierParam,
464
+ matchType: matchType,
465
+ maxDepth: maxDepth,
466
+ maxCount: maxCount
467
+ )
468
+
469
+ return .ok([
470
+ "elements": matches,
471
+ "totalVisited": totalVisited,
472
+ "truncated": truncated
473
+ ] as [String: Any])
474
+ }
475
+
476
+ func handleGetTree(_ params: [String: AnyCodable]?) -> CommandResponse {
477
+ let (app, identifier) = resolveApp(params)
478
+
479
+ guard !identifier.isEmpty else {
480
+ return .fail("getTree requires 'app' parameter (app name or bundleId)")
481
+ }
482
+ guard let app = app else {
483
+ return .fail("App not running: \(identifier)")
484
+ }
485
+
486
+ let pid = app.processIdentifier
487
+ let appElement = AXUIElementCreateApplication(pid)
488
+
489
+ let maxDepth: Int
490
+ if let d = params?["maxDepth"]?.value as? Int { maxDepth = d }
491
+ else if let d = params?["maxDepth"]?.value as? Double { maxDepth = Int(d) }
492
+ else { maxDepth = 5 }
493
+
494
+ let maxCount: Int
495
+ if let c = params?["maxCount"]?.value as? Int { maxCount = c }
496
+ else if let c = params?["maxCount"]?.value as? Double { maxCount = Int(c) }
497
+ else { maxCount = 200 }
498
+
499
+ var totalElements = 0
500
+ var truncated = false
501
+
502
+ func buildTree(_ element: AXUIElement, depth: Int) -> [String: Any]? {
503
+ guard !truncated else { return nil }
504
+ totalElements += 1
505
+
506
+ if totalElements > maxCount {
507
+ truncated = true
508
+ return nil
509
+ }
510
+
511
+ let attrs = getElementAttributes(element)
512
+ var node: [String: Any] = [:]
513
+ if let v = attrs["role"] { node["role"] = v }
514
+ if let v = attrs["title"] { node["title"] = v }
515
+ if let v = attrs["value"] { node["value"] = v }
516
+ if let v = attrs["description"] { node["description"] = v }
517
+ if let v = attrs["identifier"] { node["identifier"] = v }
518
+
519
+ if depth < maxDepth {
520
+ let children = getChildren(element)
521
+ var childNodes: [[String: Any]] = []
522
+ for child in children {
523
+ guard !truncated else { break }
524
+ if let childNode = buildTree(child, depth: depth + 1) {
525
+ childNodes.append(childNode)
526
+ }
527
+ }
528
+ if !childNodes.isEmpty {
529
+ node["children"] = childNodes
530
+ }
531
+ }
532
+
533
+ return node
534
+ }
535
+
536
+ // Build tree from the app element's children (the app element itself is the root context)
537
+ let rootChildren = getChildren(appElement)
538
+ var tree: [[String: Any]] = []
539
+ for child in rootChildren {
540
+ guard !truncated else { break }
541
+ if let node = buildTree(child, depth: 1) {
542
+ tree.append(node)
543
+ }
544
+ }
545
+
546
+ return .ok([
547
+ "tree": tree,
548
+ "totalElements": totalElements,
549
+ "truncated": truncated
550
+ ] as [String: Any])
551
+ }
552
+
553
+ // MARK: - AXValue Unpacking and Attribute Reading
554
+
555
+ /// Unpack an AXValue (CGPoint, CGSize, CGRect, CFRange) into a JSON-serializable dictionary.
556
+ /// Returns nil if the value is not an AXValue type.
557
+ func unpackAXValue(_ value: CFTypeRef) -> [String: Any]? {
558
+ guard CFGetTypeID(value) == AXValueGetTypeID() else { return nil }
559
+
560
+ let axValue = value as! AXValue
561
+ let axType = AXValueGetType(axValue)
562
+
563
+ switch axType {
564
+ case .cgPoint:
565
+ var point = CGPoint.zero
566
+ if AXValueGetValue(axValue, .cgPoint, &point) {
567
+ return ["type": "CGPoint", "x": Double(point.x), "y": Double(point.y)]
568
+ }
569
+ case .cgSize:
570
+ var size = CGSize.zero
571
+ if AXValueGetValue(axValue, .cgSize, &size) {
572
+ return ["type": "CGSize", "width": Double(size.width), "height": Double(size.height)]
573
+ }
574
+ case .cgRect:
575
+ var rect = CGRect.zero
576
+ if AXValueGetValue(axValue, .cgRect, &rect) {
577
+ return ["type": "CGRect", "x": Double(rect.origin.x), "y": Double(rect.origin.y),
578
+ "width": Double(rect.size.width), "height": Double(rect.size.height)]
579
+ }
580
+ case .cfRange:
581
+ var range = CFRange(location: 0, length: 0)
582
+ if AXValueGetValue(axValue, .cfRange, &range) {
583
+ return ["type": "CFRange", "location": range.location, "length": range.length]
584
+ }
585
+ default:
586
+ return ["type": "unknown", "description": String(describing: axType)]
587
+ }
588
+
589
+ return nil
590
+ }
591
+
592
+ /// Read a single attribute from an AXUIElement and return a JSON-serializable value.
593
+ /// Handles: NSString → String, NSNumber → Bool/Int/Double, AXValue → unpacked dict,
594
+ /// [AXUIElement] → count description, AXUIElement → role description.
595
+ func readElementAttribute(_ element: AXUIElement, attribute: String) -> Any? {
596
+ var value: CFTypeRef?
597
+ let err = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
598
+ guard err == .success, let v = value else { return nil }
599
+
600
+ // String
601
+ if let s = v as? String { return s }
602
+
603
+ // NSNumber — check for boolean first (CFBoolean is bridged to NSNumber)
604
+ if let num = v as? NSNumber {
605
+ if CFGetTypeID(num) == CFBooleanGetTypeID() {
606
+ return num.boolValue
607
+ }
608
+ // Check if it's an integer (no fractional part)
609
+ if num.doubleValue == Double(num.intValue) {
610
+ return num.intValue
611
+ }
612
+ return num.doubleValue
613
+ }
614
+
615
+ // AXValue subtypes (CGPoint, CGSize, CGRect, CFRange)
616
+ if let unpacked = unpackAXValue(v) {
617
+ return unpacked
618
+ }
619
+
620
+ // Array of AXUIElements
621
+ if let elements = v as? [AXUIElement] {
622
+ return ["type": "elementArray", "count": elements.count]
623
+ }
624
+
625
+ // Single AXUIElement reference
626
+ if CFGetTypeID(v) == AXUIElementGetTypeID() {
627
+ let childElement = v as! AXUIElement
628
+ var role: CFTypeRef?
629
+ AXUIElementCopyAttributeValue(childElement, kAXRoleAttribute as CFString, &role)
630
+ let roleStr = (role as? String) ?? "unknown"
631
+ return ["type": "element", "role": roleStr]
632
+ }
633
+
634
+ // Fallback: string description
635
+ return String(describing: v)
636
+ }
637
+
638
+ // MARK: - Interaction Commands
639
+
640
+ func handleClickElement(_ params: [String: AnyCodable]?) -> CommandResponse {
641
+ let (app, identifier) = resolveApp(params)
642
+
643
+ guard !identifier.isEmpty else {
644
+ return .fail("clickElement requires 'app' parameter (app name or bundleId)")
645
+ }
646
+ guard let app = app else {
647
+ return .fail("App not running: \(identifier)")
648
+ }
649
+
650
+ let pid = app.processIdentifier
651
+ let appElement = AXUIElementCreateApplication(pid)
652
+
653
+ let role = params?["role"]?.value as? String
654
+ let title = params?["title"]?.value as? String
655
+ let value = params?["value"]?.value as? String
656
+ let identifierParam = params?["identifier"]?.value as? String
657
+ let matchType = (params?["matchType"]?.value as? String) ?? "contains"
658
+
659
+ guard role != nil || title != nil || value != nil || identifierParam != nil else {
660
+ return .fail("clickElement requires at least one element criterion (role, title, value, or identifier)")
661
+ }
662
+
663
+ // Find the element using the shared DFS search, limit to 1 match
664
+ let (matches, _, _) = findMatchingElements(
665
+ root: appElement,
666
+ role: role,
667
+ title: title,
668
+ value: value,
669
+ identifier: identifierParam,
670
+ matchType: matchType,
671
+ maxDepth: 10,
672
+ maxCount: 1
673
+ )
674
+
675
+ guard !matches.isEmpty else {
676
+ var criteria: [String] = []
677
+ if let r = role { criteria.append("role=\(r)") }
678
+ if let t = title { criteria.append("title=\(t)") }
679
+ if let v = value { criteria.append("value=\(v)") }
680
+ if let i = identifierParam { criteria.append("identifier=\(i)") }
681
+ return .fail("No element found matching criteria: \(criteria.joined(separator: ", ")) in app '\(identifier)'")
682
+ }
683
+
684
+ // We need the actual AXUIElement handle to perform the action
685
+ let targetElement = findFirstAXUIElement(
686
+ root: appElement,
687
+ role: role,
688
+ title: title,
689
+ value: value,
690
+ identifier: identifierParam,
691
+ matchType: matchType,
692
+ maxDepth: 10
693
+ )
694
+
695
+ guard let element = targetElement else {
696
+ return .fail("Element found in search but could not re-acquire handle")
697
+ }
698
+
699
+ // Check available actions
700
+ var actionNames: CFArray?
701
+ AXUIElementCopyActionNames(element, &actionNames)
702
+ let actions = (actionNames as? [String]) ?? []
703
+
704
+ // Try AXPress
705
+ let pressErr = AXUIElementPerformAction(element, kAXPressAction as CFString)
706
+ if pressErr == .success {
707
+ // Read element attributes after click for post-action inspection
708
+ let postAttrs = getElementAttributes(element)
709
+ return .ok([
710
+ "clicked": true,
711
+ "element": postAttrs
712
+ ] as [String: Any])
713
+ }
714
+
715
+ // AXPress failed — return actionable error with available actions
716
+ return .fail("AXPress action failed (error \(pressErr.rawValue)) on element matching criteria. Available actions: \(actions.isEmpty ? "none" : actions.joined(separator: ", "))")
717
+ }
718
+
719
+ func handleTypeText(_ params: [String: AnyCodable]?) -> CommandResponse {
720
+ let (app, identifier) = resolveApp(params)
721
+
722
+ guard !identifier.isEmpty else {
723
+ return .fail("typeText requires 'app' parameter (app name or bundleId)")
724
+ }
725
+ guard let app = app else {
726
+ return .fail("App not running: \(identifier)")
727
+ }
728
+
729
+ guard let text = params?["text"]?.value as? String else {
730
+ return .fail("typeText requires 'text' parameter (string to type)")
731
+ }
732
+
733
+ let pid = app.processIdentifier
734
+ let appElement = AXUIElementCreateApplication(pid)
735
+
736
+ let role = params?["role"]?.value as? String
737
+ let title = params?["title"]?.value as? String
738
+ let value = params?["value"]?.value as? String
739
+ let identifierParam = params?["identifier"]?.value as? String
740
+ let matchType = (params?["matchType"]?.value as? String) ?? "contains"
741
+
742
+ guard role != nil || title != nil || value != nil || identifierParam != nil else {
743
+ return .fail("typeText requires at least one element criterion (role, title, value, or identifier)")
744
+ }
745
+
746
+ let targetElement = findFirstAXUIElement(
747
+ root: appElement,
748
+ role: role,
749
+ title: title,
750
+ value: value,
751
+ identifier: identifierParam,
752
+ matchType: matchType,
753
+ maxDepth: 10
754
+ )
755
+
756
+ guard let element = targetElement else {
757
+ var criteria: [String] = []
758
+ if let r = role { criteria.append("role=\(r)") }
759
+ if let t = title { criteria.append("title=\(t)") }
760
+ if let v = value { criteria.append("value=\(v)") }
761
+ if let i = identifierParam { criteria.append("identifier=\(i)") }
762
+ return .fail("No element found matching criteria: \(criteria.joined(separator: ", ")) in app '\(identifier)'")
763
+ }
764
+
765
+ // Set the AXValue attribute
766
+ let setErr = AXUIElementSetAttributeValue(element, kAXValueAttribute as CFString, text as CFTypeRef)
767
+ if setErr != .success {
768
+ return .fail("Failed to set AXValue on element (error \(setErr.rawValue)). The element may be read-only or not support text input.")
769
+ }
770
+
771
+ // Read back the value for verification
772
+ var readBack: CFTypeRef?
773
+ let readErr = AXUIElementCopyAttributeValue(element, kAXValueAttribute as CFString, &readBack)
774
+ let readValue: Any
775
+ if readErr == .success, let v = readBack {
776
+ if let s = v as? String { readValue = s }
777
+ else if let n = v as? NSNumber { readValue = n.stringValue }
778
+ else { readValue = String(describing: v) }
779
+ } else {
780
+ readValue = NSNull()
781
+ }
782
+
783
+ let elementAttrs = getElementAttributes(element)
784
+ return .ok([
785
+ "typed": true,
786
+ "value": readValue,
787
+ "element": elementAttrs
788
+ ] as [String: Any])
789
+ }
790
+
791
+ func handleReadAttribute(_ params: [String: AnyCodable]?) -> CommandResponse {
792
+ let (app, identifier) = resolveApp(params)
793
+
794
+ guard !identifier.isEmpty else {
795
+ return .fail("readAttribute requires 'app' parameter (app name or bundleId)")
796
+ }
797
+ guard let app = app else {
798
+ return .fail("App not running: \(identifier)")
799
+ }
800
+
801
+ // Support single "attribute" or multiple "attributes"
802
+ let singleAttr = params?["attribute"]?.value as? String
803
+ var multiAttrs: [String]? = nil
804
+ if let arr = params?["attributes"]?.value as? [Any] {
805
+ multiAttrs = arr.compactMap { $0 as? String }
806
+ }
807
+
808
+ guard singleAttr != nil || (multiAttrs != nil && !multiAttrs!.isEmpty) else {
809
+ return .fail("readAttribute requires 'attribute' (string) or 'attributes' (array of strings) parameter")
810
+ }
811
+
812
+ let pid = app.processIdentifier
813
+ let appElement = AXUIElementCreateApplication(pid)
814
+
815
+ let role = params?["role"]?.value as? String
816
+ let title = params?["title"]?.value as? String
817
+ let value = params?["value"]?.value as? String
818
+ let identifierParam = params?["identifier"]?.value as? String
819
+ let matchType = (params?["matchType"]?.value as? String) ?? "contains"
820
+
821
+ guard role != nil || title != nil || value != nil || identifierParam != nil else {
822
+ return .fail("readAttribute requires at least one element criterion (role, title, value, or identifier)")
823
+ }
824
+
825
+ let targetElement = findFirstAXUIElement(
826
+ root: appElement,
827
+ role: role,
828
+ title: title,
829
+ value: value,
830
+ identifier: identifierParam,
831
+ matchType: matchType,
832
+ maxDepth: 10
833
+ )
834
+
835
+ guard let element = targetElement else {
836
+ var criteria: [String] = []
837
+ if let r = role { criteria.append("role=\(r)") }
838
+ if let t = title { criteria.append("title=\(t)") }
839
+ if let v = value { criteria.append("value=\(v)") }
840
+ if let i = identifierParam { criteria.append("identifier=\(i)") }
841
+ return .fail("No element found matching criteria: \(criteria.joined(separator: ", ")) in app '\(identifier)'")
842
+ }
843
+
844
+ let elementAttrs = getElementAttributes(element)
845
+
846
+ // Single attribute mode
847
+ if let attr = singleAttr {
848
+ let val = readElementAttribute(element, attribute: attr)
849
+ return .ok([
850
+ "value": val ?? NSNull(),
851
+ "element": elementAttrs
852
+ ] as [String: Any])
853
+ }
854
+
855
+ // Multiple attributes mode
856
+ if let attrs = multiAttrs {
857
+ var values: [String: Any] = [:]
858
+ for attr in attrs {
859
+ values[attr] = readElementAttribute(element, attribute: attr) ?? NSNull()
860
+ }
861
+ return .ok([
862
+ "values": values,
863
+ "element": elementAttrs
864
+ ] as [String: Any])
865
+ }
866
+
867
+ return .fail("Internal error: no attribute specified")
868
+ }
869
+
870
+ func handleGetFocusedElement(_ params: [String: AnyCodable]?) -> CommandResponse {
871
+ let (app, identifier) = resolveApp(params)
872
+
873
+ guard !identifier.isEmpty else {
874
+ return .fail("getFocusedElement requires 'app' parameter (app name or bundleId)")
875
+ }
876
+ guard let app = app else {
877
+ return .fail("App not running: \(identifier)")
878
+ }
879
+
880
+ let pid = app.processIdentifier
881
+ let appElement = AXUIElementCreateApplication(pid)
882
+
883
+ // Attempt to get the focused element
884
+ var focusedValue: CFTypeRef?
885
+ let err = AXUIElementCopyAttributeValue(appElement, kAXFocusedUIElementAttribute as CFString, &focusedValue)
886
+
887
+ if err == .success, let focused = focusedValue {
888
+ // If it works (unlikely from CLI context), return element attributes
889
+ if CFGetTypeID(focused) == AXUIElementGetTypeID() {
890
+ let focusedElement = focused as! AXUIElement
891
+ let attrs = getElementAttributes(focusedElement)
892
+ return .ok([
893
+ "focused": true,
894
+ "element": attrs
895
+ ] as [String: Any])
896
+ }
897
+ return .ok(["focused": true, "value": String(describing: focused)])
898
+ }
899
+
900
+ // Expected failure from CLI context — return actionable error
901
+ return .fail("getFocusedElement failed (AX error \(err.rawValue)). " +
902
+ "This is a known macOS limitation: kAXFocusedUIElementAttribute returns error -25212 (notImplemented) " +
903
+ "when called from a CLI process that is not the frontmost app. " +
904
+ "Workaround: use findElements with role/title criteria to locate specific elements, " +
905
+ "or use getTree to discover the element hierarchy.")
906
+ }
907
+
908
+ /// Find the first AXUIElement matching the given criteria via DFS.
909
+ /// Returns the AXUIElement handle (not just attributes) for performing actions.
910
+ func findFirstAXUIElement(
911
+ root: AXUIElement,
912
+ role: String?,
913
+ title: String?,
914
+ value: String?,
915
+ identifier: String?,
916
+ matchType: String,
917
+ maxDepth: Int
918
+ ) -> AXUIElement? {
919
+ func matchesString(_ actual: String?, _ expected: String?, _ matchType: String) -> Bool {
920
+ guard let expected = expected else { return true }
921
+ guard let actual = actual else { return false }
922
+ if matchType == "exact" {
923
+ return actual == expected
924
+ } else {
925
+ return actual.lowercased().contains(expected.lowercased())
926
+ }
927
+ }
928
+
929
+ func dfs(_ element: AXUIElement, depth: Int) -> AXUIElement? {
930
+ let attrs = getElementAttributes(element)
931
+ let elementRole = attrs["role"] as? String
932
+ let elementTitle = attrs["title"] as? String
933
+ let elementValue = attrs["value"] as? String
934
+ let elementIdent = attrs["identifier"] as? String
935
+
936
+ let roleMatch = matchesString(elementRole, role, matchType)
937
+ let titleMatch = matchesString(elementTitle, title, matchType)
938
+ let valueMatch = matchesString(elementValue, value, matchType)
939
+ let identMatch = matchesString(elementIdent, identifier, matchType)
940
+
941
+ let hasCriteria = role != nil || title != nil || value != nil || identifier != nil
942
+ if hasCriteria && roleMatch && titleMatch && valueMatch && identMatch {
943
+ return element
944
+ }
945
+
946
+ if depth < maxDepth {
947
+ for child in getChildren(element) {
948
+ if let found = dfs(child, depth: depth + 1) {
949
+ return found
950
+ }
951
+ }
952
+ }
953
+
954
+ return nil
955
+ }
956
+
957
+ return dfs(root, depth: 0)
958
+ }
959
+
960
+ // MARK: - Window Commands
961
+
962
+ func handleListWindows(_ params: [String: AnyCodable]?) -> CommandResponse {
963
+ let appIdentifier = params?["app"]?.value as? String
964
+
965
+ guard let appIdentifier = appIdentifier, !appIdentifier.isEmpty else {
966
+ return .fail("listWindows requires 'app' parameter (app name or bundleId)")
967
+ }
968
+
969
+ // Resolve app to PID
970
+ let apps = NSWorkspace.shared.runningApplications
971
+ var targetApp: NSRunningApplication?
972
+ for app in apps {
973
+ if app.bundleIdentifier == appIdentifier {
974
+ targetApp = app
975
+ break
976
+ }
977
+ if let name = app.localizedName, name.lowercased() == appIdentifier.lowercased() {
978
+ targetApp = app
979
+ break
980
+ }
981
+ }
982
+
983
+ guard let app = targetApp else {
984
+ return .fail("App not running: \(appIdentifier)")
985
+ }
986
+
987
+ let targetPid = Int(app.processIdentifier)
988
+
989
+ // Get on-screen windows via CGWindowListCopyWindowInfo
990
+ guard let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly, .excludeDesktopElements], kCGNullWindowID) as? [[String: Any]] else {
991
+ return .ok(["windows": [] as [Any], "app": app.localizedName ?? appIdentifier, "pid": targetPid])
992
+ }
993
+
994
+ var windows: [[String: Any]] = []
995
+ for win in windowList {
996
+ guard let ownerPid = win[kCGWindowOwnerPID as String] as? Int,
997
+ ownerPid == targetPid else { continue }
998
+
999
+ // Skip windows with no title or empty title that are at layer 0
1000
+ // (these are often AXScrollArea-type artifacts, e.g. Finder desktop)
1001
+ let title = win[kCGWindowName as String] as? String ?? ""
1002
+ let layer = win[kCGWindowLayer as String] as? Int ?? 0
1003
+ let windowId = win[kCGWindowNumber as String] as? Int ?? 0
1004
+ let isOnScreen = win[kCGWindowIsOnscreen as String] as? Bool ?? true
1005
+
1006
+ // Get bounds
1007
+ var bounds: [String: Any] = [:]
1008
+ if let boundsDict = win[kCGWindowBounds as String] as? [String: Any] {
1009
+ bounds = [
1010
+ "x": boundsDict["X"] as? Double ?? 0.0,
1011
+ "y": boundsDict["Y"] as? Double ?? 0.0,
1012
+ "width": boundsDict["Width"] as? Double ?? 0.0,
1013
+ "height": boundsDict["Height"] as? Double ?? 0.0
1014
+ ]
1015
+ }
1016
+
1017
+ let entry: [String: Any] = [
1018
+ "windowId": windowId,
1019
+ "title": title,
1020
+ "bounds": bounds,
1021
+ "isOnScreen": isOnScreen,
1022
+ "layer": layer
1023
+ ]
1024
+ windows.append(entry)
1025
+ }
1026
+
1027
+ return .ok(["windows": windows, "app": app.localizedName ?? appIdentifier, "pid": targetPid] as [String: Any])
1028
+ }
1029
+
1030
+ func handleGetWindowInfo(_ params: [String: AnyCodable]?) -> CommandResponse {
1031
+ let windowIdValue = params?["windowId"]?.value
1032
+ let windowId: Int
1033
+
1034
+ // Handle both Int and Double (JSON numbers can decode as either)
1035
+ if let intVal = windowIdValue as? Int {
1036
+ windowId = intVal
1037
+ } else if let doubleVal = windowIdValue as? Double {
1038
+ windowId = Int(doubleVal)
1039
+ } else {
1040
+ return .fail("getWindowInfo requires 'windowId' parameter (number)")
1041
+ }
1042
+
1043
+ // Get all windows including off-screen
1044
+ guard let windowList = CGWindowListCopyWindowInfo([.optionAll], kCGNullWindowID) as? [[String: Any]] else {
1045
+ return .fail("Failed to retrieve window list from CGWindowListCopyWindowInfo")
1046
+ }
1047
+
1048
+ for win in windowList {
1049
+ guard let winNum = win[kCGWindowNumber as String] as? Int,
1050
+ winNum == windowId else { continue }
1051
+
1052
+ let title = win[kCGWindowName as String] as? String ?? ""
1053
+ let ownerName = win[kCGWindowOwnerName as String] as? String ?? ""
1054
+ let ownerPid = win[kCGWindowOwnerPID as String] as? Int ?? 0
1055
+ let layer = win[kCGWindowLayer as String] as? Int ?? 0
1056
+ let isOnScreen = win[kCGWindowIsOnscreen as String] as? Bool ?? false
1057
+ let alpha = win[kCGWindowAlpha as String] as? Double ?? 1.0
1058
+ let memoryUsage = win[kCGWindowMemoryUsage as String] as? Int ?? 0
1059
+
1060
+ var bounds: [String: Any] = [:]
1061
+ if let boundsDict = win[kCGWindowBounds as String] as? [String: Any] {
1062
+ bounds = [
1063
+ "x": boundsDict["X"] as? Double ?? 0.0,
1064
+ "y": boundsDict["Y"] as? Double ?? 0.0,
1065
+ "width": boundsDict["Width"] as? Double ?? 0.0,
1066
+ "height": boundsDict["Height"] as? Double ?? 0.0
1067
+ ]
1068
+ }
1069
+
1070
+ let result: [String: Any] = [
1071
+ "windowId": windowId,
1072
+ "title": title,
1073
+ "bounds": bounds,
1074
+ "ownerName": ownerName,
1075
+ "ownerPid": ownerPid,
1076
+ "layer": layer,
1077
+ "isOnScreen": isOnScreen,
1078
+ "alpha": alpha,
1079
+ "memoryUsage": memoryUsage
1080
+ ]
1081
+ return .ok(result)
1082
+ }
1083
+
1084
+ return .fail("Window not found: \(windowId)")
1085
+ }
1086
+
1087
+ // MARK: - Screenshot Commands
1088
+
1089
+ func handleScreenshotWindow(_ params: [String: AnyCodable]?) -> CommandResponse {
1090
+ // Check Screen Recording permission first
1091
+ guard CGPreflightScreenCaptureAccess() else {
1092
+ return .fail("Screen Recording permission not granted. " +
1093
+ "Go to System Settings → Privacy & Security → Screen Recording and enable this app. " +
1094
+ "You may need to add the terminal or shell that runs mac-agent.")
1095
+ }
1096
+
1097
+ // Parse windowId (handle both Int and Double from JSON)
1098
+ let windowIdValue = params?["windowId"]?.value
1099
+ let windowId: UInt32
1100
+
1101
+ if let intVal = windowIdValue as? Int {
1102
+ windowId = UInt32(intVal)
1103
+ } else if let doubleVal = windowIdValue as? Double {
1104
+ windowId = UInt32(doubleVal)
1105
+ } else {
1106
+ return .fail("screenshotWindow requires 'windowId' parameter (number)")
1107
+ }
1108
+
1109
+ // Parse optional parameters
1110
+ let format = (params?["format"]?.value as? String) ?? "jpeg"
1111
+ let quality: Double
1112
+ if let q = params?["quality"]?.value as? Double {
1113
+ quality = q
1114
+ } else {
1115
+ quality = 0.8
1116
+ }
1117
+ let retina = (params?["retina"]?.value as? Bool) ?? false
1118
+
1119
+ guard format == "jpeg" || format == "png" else {
1120
+ return .fail("Unsupported format '\(format)'. Use 'jpeg' or 'png'.")
1121
+ }
1122
+
1123
+ debug("screenshotWindow: windowId=\(windowId) format=\(format) quality=\(quality) retina=\(retina)")
1124
+
1125
+ // Get available windows via SCShareableContent
1126
+ let semaphore = DispatchSemaphore(value: 0)
1127
+ var scContent: SCShareableContent?
1128
+ var scError: Error?
1129
+
1130
+ Task {
1131
+ do {
1132
+ scContent = try await SCShareableContent.current
1133
+ } catch {
1134
+ scError = error
1135
+ }
1136
+ semaphore.signal()
1137
+ }
1138
+ semaphore.wait()
1139
+
1140
+ if let error = scError {
1141
+ return .fail("Failed to get shareable content: \(error.localizedDescription)")
1142
+ }
1143
+
1144
+ guard let content = scContent else {
1145
+ return .fail("SCShareableContent returned nil")
1146
+ }
1147
+
1148
+ // Find the window matching windowId
1149
+ guard let targetWindow = content.windows.first(where: { $0.windowID == windowId }) else {
1150
+ debug("screenshotWindow: Window not found. Available window IDs: \(content.windows.prefix(20).map { $0.windowID })")
1151
+ return .fail("Window not found with ID \(windowId). Use 'listWindows' to get valid window IDs.")
1152
+ }
1153
+
1154
+ debug("screenshotWindow: Found window '\(targetWindow.title ?? "untitled")' (\(targetWindow.frame.width)x\(targetWindow.frame.height))")
1155
+
1156
+ // Configure capture
1157
+ let config = SCStreamConfiguration()
1158
+ config.captureResolution = retina ? .best : .nominal
1159
+ // Set dimensions to match the window frame
1160
+ config.width = Int(targetWindow.frame.width)
1161
+ config.height = Int(targetWindow.frame.height)
1162
+
1163
+ // Capture the image
1164
+ let captureSemaphore = DispatchSemaphore(value: 0)
1165
+ var capturedImage: CGImage?
1166
+ var captureError: Error?
1167
+
1168
+ let captureStart = CFAbsoluteTimeGetCurrent()
1169
+
1170
+ Task {
1171
+ do {
1172
+ capturedImage = try await SCScreenshotManager.captureImage(
1173
+ contentFilter: SCContentFilter(desktopIndependentWindow: targetWindow),
1174
+ configuration: config
1175
+ )
1176
+ } catch {
1177
+ captureError = error
1178
+ }
1179
+ captureSemaphore.signal()
1180
+ }
1181
+ captureSemaphore.wait()
1182
+
1183
+ let captureDuration = CFAbsoluteTimeGetCurrent() - captureStart
1184
+ debug("screenshotWindow: Capture took \(String(format: "%.3f", captureDuration))s")
1185
+
1186
+ if let error = captureError {
1187
+ return .fail("Screenshot capture failed: \(error.localizedDescription)")
1188
+ }
1189
+
1190
+ guard let image = capturedImage else {
1191
+ return .fail("Screenshot capture returned nil image for window \(windowId)")
1192
+ }
1193
+
1194
+ let imageWidth = image.width
1195
+ let imageHeight = image.height
1196
+ debug("screenshotWindow: Captured image \(imageWidth)x\(imageHeight)")
1197
+
1198
+ // Encode to JPEG or PNG using CGImageDestination
1199
+ let imageData = NSMutableData()
1200
+ let uti = (format == "png") ? UTType.png.identifier as CFString : UTType.jpeg.identifier as CFString
1201
+
1202
+ guard let destination = CGImageDestinationCreateWithData(imageData as CFMutableData, uti, 1, nil) else {
1203
+ return .fail("Failed to create image destination for encoding")
1204
+ }
1205
+
1206
+ var options: [CFString: Any] = [:]
1207
+ if format == "jpeg" {
1208
+ options[kCGImageDestinationLossyCompressionQuality] = quality
1209
+ }
1210
+
1211
+ CGImageDestinationAddImage(destination, image, options as CFDictionary)
1212
+
1213
+ guard CGImageDestinationFinalize(destination) else {
1214
+ return .fail("Failed to encode image to \(format)")
1215
+ }
1216
+
1217
+ // Base64 encode
1218
+ let base64String = (imageData as Data).base64EncodedString()
1219
+ debug("screenshotWindow: Encoded \(format) data size: \(base64String.count) chars (\(imageData.length) bytes raw)")
1220
+
1221
+ return .ok([
1222
+ "imageData": base64String,
1223
+ "format": format,
1224
+ "width": imageWidth,
1225
+ "height": imageHeight
1226
+ ] as [String: Any])
1227
+ }
1228
+
1229
+ // MARK: - Command Dispatch
1230
+
1231
+ func dispatch(_ request: CommandRequest) -> CommandResponse {
1232
+ debug("Dispatching command: \(request.command)")
1233
+
1234
+ switch request.command {
1235
+ case "ping":
1236
+ return handlePing()
1237
+ case "checkPermissions":
1238
+ return handleCheckPermissions()
1239
+ case "listApps":
1240
+ return handleListApps(request.params)
1241
+ case "launchApp":
1242
+ return handleLaunchApp(request.params)
1243
+ case "activateApp":
1244
+ return handleActivateApp(request.params)
1245
+ case "quitApp":
1246
+ return handleQuitApp(request.params)
1247
+ case "listWindows":
1248
+ return handleListWindows(request.params)
1249
+ case "getWindowInfo":
1250
+ return handleGetWindowInfo(request.params)
1251
+ case "screenshotWindow":
1252
+ return handleScreenshotWindow(request.params)
1253
+ case "findElements":
1254
+ return handleFindElements(request.params)
1255
+ case "getTree":
1256
+ return handleGetTree(request.params)
1257
+ case "clickElement":
1258
+ return handleClickElement(request.params)
1259
+ case "typeText":
1260
+ return handleTypeText(request.params)
1261
+ case "readAttribute":
1262
+ return handleReadAttribute(request.params)
1263
+ case "getFocusedElement":
1264
+ return handleGetFocusedElement(request.params)
1265
+ default:
1266
+ return .fail("Unknown command: \(request.command)")
1267
+ }
1268
+ }
1269
+
1270
+ // MARK: - Main Entry Point
1271
+
1272
+ func main() {
1273
+ // Initialize NSApplication — required for ScreenCaptureKit's WindowServer connection.
1274
+ // Must happen before any SCShareableContent or SCScreenshotManager calls.
1275
+ // Verified to not break JSON stdin/stdout protocol.
1276
+ let _ = NSApplication.shared
1277
+
1278
+ // Read all of stdin
1279
+ let inputData = FileHandle.standardInput.readDataToEndOfFile()
1280
+
1281
+ guard !inputData.isEmpty else {
1282
+ let response = CommandResponse.fail("No input received on stdin")
1283
+ writeResponse(response)
1284
+ return
1285
+ }
1286
+
1287
+ // Parse the command request
1288
+ let decoder = JSONDecoder()
1289
+ let request: CommandRequest
1290
+ do {
1291
+ request = try decoder.decode(CommandRequest.self, from: inputData)
1292
+ } catch {
1293
+ let response = CommandResponse.fail("Invalid JSON input: \(error.localizedDescription)")
1294
+ writeResponse(response)
1295
+ return
1296
+ }
1297
+
1298
+ // Dispatch and respond
1299
+ let response = dispatch(request)
1300
+ writeResponse(response)
1301
+ }
1302
+
1303
+ func writeResponse(_ response: CommandResponse) {
1304
+ let encoder = JSONEncoder()
1305
+ encoder.outputFormatting = [.sortedKeys]
1306
+ do {
1307
+ let data = try encoder.encode(response)
1308
+ FileHandle.standardOutput.write(data)
1309
+ FileHandle.standardOutput.write(Data("\n".utf8))
1310
+ } catch {
1311
+ // Last-resort fallback — write error JSON manually
1312
+ let fallback = #"{"success":false,"error":"Failed to encode response: \#(error.localizedDescription)"}"#
1313
+ FileHandle.standardOutput.write(Data(fallback.utf8))
1314
+ FileHandle.standardOutput.write(Data("\n".utf8))
1315
+ }
1316
+ }
1317
+
1318
+ main()