zig-mobile-runner 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/CHANGELOG.md +484 -0
  2. package/CONTRIBUTING.md +42 -0
  3. package/FEATURES.md +112 -0
  4. package/LICENSE +21 -0
  5. package/README.md +255 -0
  6. package/SECURITY.md +34 -0
  7. package/build.zig +38 -0
  8. package/build.zig.zon +7 -0
  9. package/clients/README.md +144 -0
  10. package/clients/go/README.md +24 -0
  11. package/clients/go/examples/fake-session/main.go +93 -0
  12. package/clients/go/go.mod +3 -0
  13. package/clients/go/zmr/client.go +432 -0
  14. package/clients/kotlin/README.md +35 -0
  15. package/clients/kotlin/build.gradle.kts +35 -0
  16. package/clients/kotlin/settings.gradle.kts +15 -0
  17. package/clients/kotlin/src/main/kotlin/dev/zmr/FakeSession.kt +86 -0
  18. package/clients/kotlin/src/main/kotlin/dev/zmr/ZmrClient.kt +67 -0
  19. package/clients/python/README.md +29 -0
  20. package/clients/python/examples/fake_session.py +48 -0
  21. package/clients/python/pyproject.toml +13 -0
  22. package/clients/python/zmr_client.py +202 -0
  23. package/clients/rust/Cargo.lock +107 -0
  24. package/clients/rust/Cargo.toml +10 -0
  25. package/clients/rust/README.md +19 -0
  26. package/clients/rust/examples/fake_session.rs +70 -0
  27. package/clients/rust/src/lib.rs +461 -0
  28. package/clients/swift/Package.swift +16 -0
  29. package/clients/swift/README.md +36 -0
  30. package/clients/swift/Sources/ZMRClient/ZMRClient.swift +114 -0
  31. package/clients/swift/Sources/ZMRFakeSession/main.swift +86 -0
  32. package/clients/typescript/README.md +34 -0
  33. package/clients/typescript/examples/fake-session.mjs +36 -0
  34. package/clients/typescript/index.d.ts +144 -0
  35. package/clients/typescript/index.mjs +192 -0
  36. package/clients/typescript/package.json +8 -0
  37. package/docs/adr/0001-agent-native-runner-boundary.md +31 -0
  38. package/docs/adr/0002-app-local-zmr-contract.md +39 -0
  39. package/docs/adr/0003-ios-simulator-xctest-shim.md +41 -0
  40. package/docs/adr/0004-benchmark-claims-and-baseline-collection.md +37 -0
  41. package/docs/adr/README.md +12 -0
  42. package/docs/ai-agents.md +156 -0
  43. package/docs/app-integration.md +316 -0
  44. package/docs/benchmarking.md +275 -0
  45. package/docs/client-installation.md +141 -0
  46. package/docs/clients.md +98 -0
  47. package/docs/config.md +175 -0
  48. package/docs/demo.md +259 -0
  49. package/docs/dsl.md +57 -0
  50. package/docs/install.md +233 -0
  51. package/docs/market-positioning.md +70 -0
  52. package/docs/npm.md +359 -0
  53. package/docs/protocol-fixtures/README.md +8 -0
  54. package/docs/protocol-fixtures/core-session.requests.jsonl +8 -0
  55. package/docs/protocol-fixtures/core-session.responses.jsonl +8 -0
  56. package/docs/protocol-versioning.md +65 -0
  57. package/docs/protocol.md +560 -0
  58. package/docs/publication.md +77 -0
  59. package/docs/release-audit.md +99 -0
  60. package/docs/release-candidate.md +111 -0
  61. package/docs/release-evidence.md +188 -0
  62. package/docs/release-notes-template.md +58 -0
  63. package/docs/roadmap.md +334 -0
  64. package/docs/scenario-authoring.md +88 -0
  65. package/docs/shipping.md +170 -0
  66. package/docs/trace-privacy.md +88 -0
  67. package/docs/troubleshooting.md +256 -0
  68. package/examples/android-app-auth-probe.json +89 -0
  69. package/examples/android-app-error-state.json +13 -0
  70. package/examples/android-app-login-smoke.json +192 -0
  71. package/examples/android-app-onboarding.json +12 -0
  72. package/examples/android-app-referral-deep-link.json +12 -0
  73. package/examples/android-shim-smoke.json +19 -0
  74. package/examples/demo-failure.json +12 -0
  75. package/examples/demo-fake.json +14 -0
  76. package/examples/ios-dev-client-open-link.json +26 -0
  77. package/examples/ios-dev-client-route-snapshot.json +24 -0
  78. package/examples/ios-shim-smoke.json +23 -0
  79. package/examples/ios-smoke.json +9 -0
  80. package/go.work +3 -0
  81. package/npm/agents.mjs +183 -0
  82. package/npm/app-config.mjs +95 -0
  83. package/npm/build-zmr.mjs +21 -0
  84. package/npm/commands.mjs +104 -0
  85. package/npm/generated-files.mjs +50 -0
  86. package/npm/index.mjs +75 -0
  87. package/npm/init-app.mjs +80 -0
  88. package/npm/package-scripts.mjs +72 -0
  89. package/npm/postinstall.mjs +21 -0
  90. package/npm/scaffold.mjs +179 -0
  91. package/npm/scenarios.mjs +93 -0
  92. package/npm/setup.mjs +69 -0
  93. package/npm/wizard.mjs +117 -0
  94. package/npm/zmr.mjs +23 -0
  95. package/package.json +114 -0
  96. package/prebuilds/darwin-arm64/zmr +0 -0
  97. package/prebuilds/darwin-x64/zmr +0 -0
  98. package/prebuilds/linux-arm64/zmr +0 -0
  99. package/prebuilds/linux-x64/zmr +0 -0
  100. package/schemas/README.md +26 -0
  101. package/schemas/action-result.schema.json +27 -0
  102. package/schemas/capabilities-output.schema.json +98 -0
  103. package/schemas/devices-output.schema.json +25 -0
  104. package/schemas/doctor-output.schema.json +51 -0
  105. package/schemas/explain-output.schema.json +51 -0
  106. package/schemas/import-output.schema.json +23 -0
  107. package/schemas/init-output.schema.json +71 -0
  108. package/schemas/json-rpc.schema.json +55 -0
  109. package/schemas/release-manifest.schema.json +43 -0
  110. package/schemas/release-readiness-output.schema.json +127 -0
  111. package/schemas/run-output.schema.json +43 -0
  112. package/schemas/scenario.schema.json +128 -0
  113. package/schemas/schemas-output.schema.json +26 -0
  114. package/schemas/semantic-snapshot.schema.json +116 -0
  115. package/schemas/snapshot.schema.json +60 -0
  116. package/schemas/trace-event.schema.json +14 -0
  117. package/schemas/trace-manifest.schema.json +59 -0
  118. package/schemas/validate-output.schema.json +42 -0
  119. package/schemas/version-output.schema.json +23 -0
  120. package/schemas/zmr-config.schema.json +75 -0
  121. package/scripts/android-emulator.sh +126 -0
  122. package/scripts/assert-ios-physical-ready.sh +213 -0
  123. package/scripts/benchmark-command.sh +307 -0
  124. package/scripts/benchmark.sh +359 -0
  125. package/scripts/benchmark_gate.py +117 -0
  126. package/scripts/benchmark_result_row.py +88 -0
  127. package/scripts/compare-benchmarks.py +288 -0
  128. package/scripts/create-android-demo-app.sh +342 -0
  129. package/scripts/create-ios-demo-app.sh +261 -0
  130. package/scripts/demo-android-real.sh +232 -0
  131. package/scripts/demo-ios-real.sh +270 -0
  132. package/scripts/demo.sh +464 -0
  133. package/scripts/device-matrix.sh +338 -0
  134. package/scripts/ensure-ios-shim-target.rb +237 -0
  135. package/scripts/install-android-shim.sh +281 -0
  136. package/scripts/install-ios-shim.sh +589 -0
  137. package/scripts/pilot-gate.sh +560 -0
  138. package/scripts/release-readiness.py +838 -0
  139. package/scripts/release-readiness.sh +91 -0
  140. package/scripts/run-android-pilot.sh +561 -0
  141. package/scripts/run-ios-pilot.sh +509 -0
  142. package/shims/android/README.md +21 -0
  143. package/shims/android/ZMRShimInstrumentedTest.java +152 -0
  144. package/shims/android/protocol.md +18 -0
  145. package/shims/ios/README.md +50 -0
  146. package/shims/ios/ZMRShim.swift +110 -0
  147. package/shims/ios/ZMRShimUITestCase.swift +475 -0
  148. package/shims/ios/protocol.md +74 -0
  149. package/skills/zmr-mobile-testing/SKILL.md +127 -0
  150. package/src/android.zig +344 -0
  151. package/src/android_device_info.zig +99 -0
  152. package/src/android_emulator.zig +154 -0
  153. package/src/android_screen_recording.zig +112 -0
  154. package/src/android_shell.zig +112 -0
  155. package/src/bundle.zig +124 -0
  156. package/src/bundle_redaction.zig +272 -0
  157. package/src/bundle_tar.zig +123 -0
  158. package/src/cli_devices.zig +97 -0
  159. package/src/cli_doctor.zig +114 -0
  160. package/src/cli_import.zig +70 -0
  161. package/src/cli_info.zig +39 -0
  162. package/src/cli_init.zig +72 -0
  163. package/src/cli_output.zig +467 -0
  164. package/src/cli_run.zig +259 -0
  165. package/src/cli_serve.zig +287 -0
  166. package/src/cli_trace.zig +111 -0
  167. package/src/cli_validate.zig +41 -0
  168. package/src/command.zig +211 -0
  169. package/src/config.zig +305 -0
  170. package/src/config_diagnostics.zig +212 -0
  171. package/src/config_paths.zig +49 -0
  172. package/src/device_registry.zig +37 -0
  173. package/src/doctor.zig +412 -0
  174. package/src/doctor_hints.zig +52 -0
  175. package/src/errors.zig +55 -0
  176. package/src/fake_device.zig +163 -0
  177. package/src/health.zig +28 -0
  178. package/src/importer.zig +343 -0
  179. package/src/importer_json.zig +100 -0
  180. package/src/importer_model.zig +103 -0
  181. package/src/ios.zig +399 -0
  182. package/src/ios_devices.zig +219 -0
  183. package/src/ios_lifecycle.zig +72 -0
  184. package/src/ios_shim.zig +242 -0
  185. package/src/ios_snapshot.zig +20 -0
  186. package/src/json_fields.zig +80 -0
  187. package/src/json_rpc.zig +150 -0
  188. package/src/json_rpc_methods.zig +318 -0
  189. package/src/json_rpc_observation.zig +31 -0
  190. package/src/json_rpc_params.zig +52 -0
  191. package/src/json_rpc_protocol.zig +110 -0
  192. package/src/json_rpc_trace.zig +73 -0
  193. package/src/main.zig +135 -0
  194. package/src/mcp.zig +234 -0
  195. package/src/mcp_protocol.zig +64 -0
  196. package/src/mcp_trace.zig +83 -0
  197. package/src/report.zig +346 -0
  198. package/src/report_html.zig +63 -0
  199. package/src/report_values.zig +27 -0
  200. package/src/run_options.zig +152 -0
  201. package/src/runner.zig +280 -0
  202. package/src/runner_actions.zig +109 -0
  203. package/src/runner_config.zig +6 -0
  204. package/src/runner_diagnostics.zig +268 -0
  205. package/src/runner_events.zig +170 -0
  206. package/src/runner_native.zig +88 -0
  207. package/src/runner_waits.zig +300 -0
  208. package/src/scaffold.zig +472 -0
  209. package/src/scenario.zig +346 -0
  210. package/src/scenario_fields.zig +50 -0
  211. package/src/schema_registry.zig +53 -0
  212. package/src/selector.zig +84 -0
  213. package/src/semantic.zig +171 -0
  214. package/src/trace.zig +315 -0
  215. package/src/trace_json.zig +340 -0
  216. package/src/trace_summary.zig +218 -0
  217. package/src/trace_summary_diagnostic.zig +202 -0
  218. package/src/types.zig +120 -0
  219. package/src/uiautomator.zig +164 -0
  220. package/src/validation.zig +187 -0
  221. package/src/version.zig +22 -0
  222. package/viewer/app.js +373 -0
  223. package/viewer/index.html +126 -0
  224. package/viewer/parser.js +233 -0
  225. package/viewer/styles.css +585 -0
@@ -0,0 +1,838 @@
1
+ #!/usr/bin/env python3
2
+ import json
3
+ import os
4
+ import shlex
5
+ import sys
6
+
7
+ target, json_mode = sys.argv[1], sys.argv[2] == "1"
8
+ evidence_paths = sys.argv[3:]
9
+
10
+ rows = []
11
+ missing_evidence_files = []
12
+ invalid_evidence_lines = []
13
+ for evidence_path in evidence_paths:
14
+ if not os.path.isfile(evidence_path):
15
+ if json_mode:
16
+ missing_evidence_files.append(evidence_path)
17
+ continue
18
+ print(f"error: evidence file not found: {evidence_path}", file=sys.stderr)
19
+ sys.exit(2)
20
+ with open(evidence_path, encoding="utf-8") as handle:
21
+ for line_number, line in enumerate(handle, 1):
22
+ line = line.strip()
23
+ if not line:
24
+ continue
25
+ try:
26
+ rows.append(json.loads(line))
27
+ except json.JSONDecodeError as exc:
28
+ if json_mode:
29
+ invalid_evidence_lines.append((evidence_path, line_number, str(exc)))
30
+ continue
31
+ print(f"error: invalid evidence JSONL in {evidence_path} at line {line_number}: {exc}", file=sys.stderr)
32
+ sys.exit(2)
33
+
34
+ def unique_names(names):
35
+ seen = set()
36
+ unique = []
37
+ for name in names:
38
+ if name in seen:
39
+ continue
40
+ seen.add(name)
41
+ unique.append(name)
42
+ return unique
43
+
44
+
45
+ failed = unique_names(row.get("name", "<unnamed>") for row in rows if row.get("status") == "failed")
46
+ planned = unique_names(row.get("name", "<unnamed>") for row in rows if row.get("status") == "planned")
47
+
48
+
49
+ def command_flags(row):
50
+ command = row.get("command")
51
+ if not isinstance(command, str):
52
+ return {}
53
+ try:
54
+ parts = shlex.split(command)
55
+ except ValueError:
56
+ return {}
57
+ flags = {}
58
+ index = 0
59
+ while index < len(parts):
60
+ part = parts[index]
61
+ if part.startswith("--"):
62
+ if "=" in part:
63
+ key, value = part.split("=", 1)
64
+ flags[key] = value
65
+ elif index + 1 < len(parts) and not parts[index + 1].startswith("--"):
66
+ flags[part] = parts[index + 1]
67
+ index += 1
68
+ else:
69
+ flags[part] = "true"
70
+ index += 1
71
+ return flags
72
+
73
+
74
+ def numeric_value(row, field, flag):
75
+ value = row.get(field)
76
+ if value is None:
77
+ value = command_flags(row).get(flag)
78
+ try:
79
+ return float(value)
80
+ except (TypeError, ValueError):
81
+ return None
82
+
83
+
84
+ def structured_numeric_value(row, field):
85
+ try:
86
+ return float(row.get(field))
87
+ except (TypeError, ValueError):
88
+ return None
89
+
90
+
91
+ def concrete_value(value):
92
+ return isinstance(value, str) and value.strip() and not value.strip().startswith("<")
93
+
94
+
95
+ def concrete_physical_device_value(value):
96
+ if not concrete_value(value):
97
+ return False
98
+ return value.strip().lower() not in {"booted", "simulator", "iphonesimulator"}
99
+
100
+
101
+ def pilot_app_id_value(label, row):
102
+ flags = command_flags(row)
103
+ if label == "Android hardware pilot":
104
+ candidates = [
105
+ row.get("androidAppId"),
106
+ row.get("appId"),
107
+ flags.get("--android-app-id"),
108
+ flags.get("--app-id"),
109
+ ]
110
+ else:
111
+ candidates = [
112
+ row.get("iosAppId"),
113
+ row.get("appId"),
114
+ flags.get("--ios-app-id"),
115
+ flags.get("--app-id"),
116
+ ]
117
+ for candidate in candidates:
118
+ if concrete_value(candidate):
119
+ return candidate
120
+ return None
121
+
122
+
123
+ def pilot_app_root_value(label, row):
124
+ flags = command_flags(row)
125
+ if label == "Android hardware pilot":
126
+ candidates = [
127
+ row.get("androidAppRoot"),
128
+ row.get("appRoot"),
129
+ flags.get("--android-app-root"),
130
+ flags.get("--app-root"),
131
+ ]
132
+ else:
133
+ candidates = [
134
+ row.get("iosAppRoot"),
135
+ row.get("appRoot"),
136
+ flags.get("--ios-app-root"),
137
+ flags.get("--app-root"),
138
+ ]
139
+ for candidate in candidates:
140
+ if concrete_value(candidate):
141
+ return candidate
142
+ return None
143
+
144
+
145
+ def pilot_app_artifact_value(label, row):
146
+ if label == "Android hardware pilot":
147
+ return pilot_app_root_value(label, row)
148
+ flags = command_flags(row)
149
+ candidates = [
150
+ row.get("iosAppPath"),
151
+ row.get("appPath"),
152
+ flags.get("--ios-app-path"),
153
+ flags.get("--app-path"),
154
+ ]
155
+ for candidate in candidates:
156
+ if concrete_value(candidate):
157
+ return candidate
158
+ return None
159
+
160
+
161
+ def physical_ios_device_value(row):
162
+ flags = command_flags(row)
163
+ candidates = [
164
+ row.get("iosDeviceId"),
165
+ row.get("deviceId"),
166
+ row.get("device"),
167
+ flags.get("--ios-device"),
168
+ flags.get("--device"),
169
+ ]
170
+ for candidate in candidates:
171
+ if concrete_physical_device_value(candidate):
172
+ return candidate
173
+ return None
174
+
175
+
176
+ def ios_device_value(row):
177
+ flags = command_flags(row)
178
+ candidates = [
179
+ row.get("iosDeviceId"),
180
+ row.get("deviceId"),
181
+ row.get("device"),
182
+ flags.get("--ios-device"),
183
+ flags.get("--device"),
184
+ ]
185
+ for candidate in candidates:
186
+ if concrete_value(candidate):
187
+ return candidate
188
+ return None
189
+
190
+
191
+ def android_device_value(row):
192
+ flags = command_flags(row)
193
+ candidates = [
194
+ row.get("androidDeviceId"),
195
+ row.get("deviceId"),
196
+ row.get("device"),
197
+ flags.get("--android-device"),
198
+ flags.get("--device"),
199
+ ]
200
+ for candidate in candidates:
201
+ if concrete_value(candidate):
202
+ return candidate
203
+ return None
204
+
205
+
206
+ def pilot_thresholds_pass(label, row):
207
+ runs = structured_numeric_value(row, "runs")
208
+ min_pass_rate = structured_numeric_value(row, "minPassRate")
209
+ max_failures = structured_numeric_value(row, "maxFailures")
210
+ device_ok = True
211
+ if label == "Android hardware pilot":
212
+ device_ok = android_device_value(row) is not None
213
+ if label == "iOS simulator hardware pilot":
214
+ device_ok = ios_device_value(row) is not None
215
+ if label == "iOS physical hardware pilot":
216
+ device_ok = physical_ios_device_value(row) is not None
217
+ return (
218
+ runs is not None
219
+ and runs >= 20
220
+ and min_pass_rate is not None
221
+ and min_pass_rate >= 100
222
+ and max_failures is not None
223
+ and max_failures <= 0
224
+ and pilot_app_id_value(label, row) is not None
225
+ and pilot_app_root_value(label, row) is not None
226
+ and pilot_app_artifact_value(label, row) is not None
227
+ and device_ok
228
+ )
229
+
230
+
231
+ def pilot_threshold_reason(label, row):
232
+ reasons = []
233
+ runs = structured_numeric_value(row, "runs")
234
+ min_pass_rate = structured_numeric_value(row, "minPassRate")
235
+ max_failures = structured_numeric_value(row, "maxFailures")
236
+ if runs is None:
237
+ reasons.append("structured runs evidence present")
238
+ elif runs < 20:
239
+ reasons.append("runs >= 20")
240
+ if min_pass_rate is None:
241
+ reasons.append("structured minPassRate evidence present")
242
+ elif min_pass_rate < 100:
243
+ reasons.append("minPassRate >= 100")
244
+ if max_failures is None:
245
+ reasons.append("structured maxFailures evidence present")
246
+ elif max_failures > 0:
247
+ reasons.append("maxFailures <= 0")
248
+ if pilot_app_id_value(label, row) is None:
249
+ reasons.append("appId present")
250
+ if pilot_app_root_value(label, row) is None:
251
+ reasons.append("app root evidence present")
252
+ if pilot_app_artifact_value(label, row) is None:
253
+ reasons.append("app artifact evidence present")
254
+ if label == "Android hardware pilot" and android_device_value(row) is None:
255
+ reasons.append("Android device identifier present")
256
+ if label == "iOS simulator hardware pilot" and ios_device_value(row) is None:
257
+ reasons.append("iOS simulator device identifier present")
258
+ if label == "iOS physical hardware pilot" and physical_ios_device_value(row) is None:
259
+ reasons.append("physical device identifier present")
260
+ return "requires " + ", ".join(reasons)
261
+
262
+
263
+ def benchmark_thresholds_pass(row):
264
+ min_candidate_pass_rate = numeric_value(row, "minCandidatePassRate", "--min-candidate-pass-rate")
265
+ max_candidate_failures = numeric_value(row, "maxCandidateFailures", "--max-candidate-failures")
266
+ min_mean_speedup = numeric_value(row, "minMeanSpeedup", "--min-mean-speedup")
267
+ min_p95_speedup = numeric_value(row, "minP95Speedup", "--min-p95-speedup")
268
+ candidate_pass_rate = structured_numeric_value(row, "candidatePassRate")
269
+ candidate_failures = structured_numeric_value(row, "candidateFailures")
270
+ candidate_runs = structured_numeric_value(row, "candidateRuns")
271
+ baseline_runs = structured_numeric_value(row, "baselineRuns")
272
+ mean_speedup = structured_numeric_value(row, "meanSpeedup")
273
+ p95_speedup = structured_numeric_value(row, "p95Speedup")
274
+ return (
275
+ min_candidate_pass_rate is not None
276
+ and min_candidate_pass_rate >= 100
277
+ and max_candidate_failures is not None
278
+ and max_candidate_failures <= 0
279
+ and min_mean_speedup is not None
280
+ and min_mean_speedup >= 1.25
281
+ and min_p95_speedup is not None
282
+ and min_p95_speedup >= 1.25
283
+ and benchmark_candidate_value(row) is not None
284
+ and benchmark_baseline_value(row) is not None
285
+ and benchmark_results_value(row) is not None
286
+ and benchmark_same_context_pass(row)
287
+ and candidate_runs is not None
288
+ and candidate_runs >= 20
289
+ and baseline_runs is not None
290
+ and baseline_runs >= 20
291
+ and candidate_pass_rate is not None
292
+ and candidate_pass_rate >= min_candidate_pass_rate
293
+ and candidate_failures is not None
294
+ and candidate_failures <= max_candidate_failures
295
+ and mean_speedup is not None
296
+ and mean_speedup >= min_mean_speedup
297
+ and p95_speedup is not None
298
+ and p95_speedup >= min_p95_speedup
299
+ )
300
+
301
+
302
+ def benchmark_same_context_pass(row):
303
+ if row.get("sameContext") is not True:
304
+ return False
305
+ context = row.get("context")
306
+ if not isinstance(context, dict):
307
+ return False
308
+ required = ("platform", "device", "appId", "scenario", "appBuild")
309
+ return all(concrete_value(context.get(field)) for field in required)
310
+
311
+
312
+ def benchmark_candidate_value(row):
313
+ flags = command_flags(row)
314
+ candidates = [
315
+ row.get("candidate"),
316
+ row.get("candidateName"),
317
+ flags.get("--candidate"),
318
+ ]
319
+ for candidate in candidates:
320
+ if concrete_value(candidate):
321
+ return candidate
322
+ return None
323
+
324
+
325
+ def benchmark_baseline_value(row):
326
+ flags = command_flags(row)
327
+ candidates = [
328
+ row.get("baseline"),
329
+ row.get("baselineName"),
330
+ flags.get("--baseline"),
331
+ ]
332
+ for candidate in candidates:
333
+ if concrete_value(candidate):
334
+ return candidate
335
+ return None
336
+
337
+
338
+ def benchmark_results_value(row):
339
+ flags = command_flags(row)
340
+ candidates = [
341
+ row.get("results"),
342
+ row.get("resultsPath"),
343
+ flags.get("--results"),
344
+ ]
345
+ for candidate in candidates:
346
+ if concrete_value(candidate):
347
+ return candidate
348
+ return None
349
+
350
+
351
+ def benchmark_threshold_reason(row):
352
+ reasons = []
353
+ min_candidate_pass_rate = numeric_value(row, "minCandidatePassRate", "--min-candidate-pass-rate")
354
+ max_candidate_failures = numeric_value(row, "maxCandidateFailures", "--max-candidate-failures")
355
+ min_mean_speedup = numeric_value(row, "minMeanSpeedup", "--min-mean-speedup")
356
+ min_p95_speedup = numeric_value(row, "minP95Speedup", "--min-p95-speedup")
357
+ candidate_pass_rate = structured_numeric_value(row, "candidatePassRate")
358
+ candidate_failures = structured_numeric_value(row, "candidateFailures")
359
+ candidate_runs = structured_numeric_value(row, "candidateRuns")
360
+ baseline_runs = structured_numeric_value(row, "baselineRuns")
361
+ mean_speedup = structured_numeric_value(row, "meanSpeedup")
362
+ p95_speedup = structured_numeric_value(row, "p95Speedup")
363
+ if min_candidate_pass_rate is None or min_candidate_pass_rate < 100:
364
+ reasons.append("minCandidatePassRate >= 100")
365
+ if max_candidate_failures is None or max_candidate_failures > 0:
366
+ reasons.append("maxCandidateFailures <= 0")
367
+ if min_mean_speedup is None or min_mean_speedup < 1.25:
368
+ reasons.append("minMeanSpeedup >= 1.25")
369
+ if min_p95_speedup is None or min_p95_speedup < 1.25:
370
+ reasons.append("minP95Speedup >= 1.25")
371
+ if benchmark_candidate_value(row) is None:
372
+ reasons.append("candidate name present")
373
+ if benchmark_baseline_value(row) is None:
374
+ reasons.append("baseline name present")
375
+ if benchmark_results_value(row) is None:
376
+ reasons.append("results path present")
377
+ if not benchmark_same_context_pass(row):
378
+ reasons.append("same benchmark context evidence present")
379
+ if candidate_runs is None or candidate_runs < 20:
380
+ reasons.append("candidateRuns >= 20")
381
+ if baseline_runs is None or baseline_runs < 20:
382
+ reasons.append("baselineRuns >= 20")
383
+ if (
384
+ min_candidate_pass_rate is not None
385
+ and candidate_pass_rate is None
386
+ or (
387
+ min_candidate_pass_rate is not None
388
+ and candidate_pass_rate is not None
389
+ and candidate_pass_rate < min_candidate_pass_rate
390
+ )
391
+ ):
392
+ reasons.append("candidatePassRate >= minCandidatePassRate")
393
+ if (
394
+ max_candidate_failures is not None
395
+ and candidate_failures is None
396
+ or (
397
+ max_candidate_failures is not None
398
+ and candidate_failures is not None
399
+ and candidate_failures > max_candidate_failures
400
+ )
401
+ ):
402
+ reasons.append("candidateFailures <= maxCandidateFailures")
403
+ if (
404
+ min_mean_speedup is not None
405
+ and mean_speedup is None
406
+ or (
407
+ min_mean_speedup is not None
408
+ and mean_speedup is not None
409
+ and mean_speedup < min_mean_speedup
410
+ )
411
+ ):
412
+ reasons.append("meanSpeedup >= minMeanSpeedup")
413
+ if (
414
+ min_p95_speedup is not None
415
+ and p95_speedup is None
416
+ or (
417
+ min_p95_speedup is not None
418
+ and p95_speedup is not None
419
+ and p95_speedup < min_p95_speedup
420
+ )
421
+ ):
422
+ reasons.append("p95Speedup >= minP95Speedup")
423
+ return "requires " + ", ".join(reasons)
424
+
425
+
426
+ def row_satisfies(label, row):
427
+ if row.get("status") != "passed":
428
+ return False
429
+ if label in {
430
+ "Android hardware pilot",
431
+ "iOS simulator hardware pilot",
432
+ "iOS physical hardware pilot",
433
+ }:
434
+ return pilot_thresholds_pass(label, row)
435
+ if label == "physical iOS readiness":
436
+ return physical_ios_device_value(row) is not None
437
+ if label == "competitive benchmark comparison":
438
+ return benchmark_thresholds_pass(row)
439
+ return True
440
+
441
+
442
+ def has_passed_evidence(label, names):
443
+ if isinstance(names, str):
444
+ names = (names,)
445
+ return any(row.get("name") in names and row_satisfies(label, row) for row in rows)
446
+
447
+
448
+ def requirement_status(label, names):
449
+ if isinstance(names, str):
450
+ names = (names,)
451
+ matches = [row for row in rows if row.get("name") in names]
452
+ for row in matches:
453
+ if row_satisfies(label, row):
454
+ return {
455
+ "name": label,
456
+ "status": "satisfied",
457
+ "evidenceName": row.get("name", ""),
458
+ }
459
+ for row in matches:
460
+ if row.get("status") == "failed":
461
+ return {
462
+ "name": label,
463
+ "status": "failed",
464
+ "evidenceName": row.get("name", ""),
465
+ "reason": "evidence row failed",
466
+ }
467
+ for row in matches:
468
+ if row.get("status") == "planned":
469
+ return {
470
+ "name": label,
471
+ "status": "planned",
472
+ "evidenceName": row.get("name", ""),
473
+ "reason": "evidence row is planned but not executed",
474
+ }
475
+ for row in matches:
476
+ if row.get("status") == "passed":
477
+ reason = "passed evidence row does not satisfy this requirement"
478
+ if label in {
479
+ "Android hardware pilot",
480
+ "iOS simulator hardware pilot",
481
+ "iOS physical hardware pilot",
482
+ }:
483
+ reason = pilot_threshold_reason(label, row)
484
+ elif label == "physical iOS readiness":
485
+ reason = "requires concrete physical device identifier evidence"
486
+ elif label == "competitive benchmark comparison":
487
+ reason = benchmark_threshold_reason(row)
488
+ return {
489
+ "name": label,
490
+ "status": "insufficient",
491
+ "evidenceName": row.get("name", ""),
492
+ "reason": reason,
493
+ }
494
+ return {
495
+ "name": label,
496
+ "status": "missing",
497
+ "reason": "no matching passed evidence row",
498
+ }
499
+
500
+
501
+ passed_names = {row.get("name") for row in rows if row.get("status") == "passed"}
502
+
503
+ requirements = [
504
+ ("local release gate", "local release gate"),
505
+ ("public Android demo", ("public Android emulator demo", "public Android demo app build")),
506
+ ("public iOS simulator demo", "public iOS simulator demo"),
507
+ ]
508
+
509
+ if target in ("production", "market-claim"):
510
+ requirements.extend([
511
+ ("physical iOS readiness", "physical iOS readiness"),
512
+ ("Android hardware pilot", "Android hardware pilot"),
513
+ ("iOS simulator hardware pilot", "iOS simulator hardware pilot"),
514
+ ("iOS physical hardware pilot", "iOS physical hardware pilot"),
515
+ ])
516
+
517
+ if target == "market-claim":
518
+ requirements.append(("competitive benchmark comparison", ("competitive benchmark comparison", "benchmark comparison")))
519
+
520
+ missing_file_labels = [f"evidence file not found: {path}" for path in missing_evidence_files]
521
+ invalid_evidence_labels = [
522
+ f"invalid evidence JSONL in {path} at line {line}: {error}"
523
+ for path, line, error in invalid_evidence_lines
524
+ ]
525
+ evidence_issue_labels = missing_file_labels + invalid_evidence_labels
526
+
527
+ requirement_results = [requirement_status(label, names) for label, names in requirements]
528
+ missing = evidence_issue_labels + [
529
+ item["name"] for item in requirement_results if item.get("status") == "missing"
530
+ ]
531
+ insufficient = [
532
+ item["name"] for item in requirement_results if item.get("status") == "insufficient"
533
+ ]
534
+ failed_evidence_labels = [f"failed evidence: {name}" for name in failed]
535
+ planned_evidence_labels = [f"planned evidence: {name}" for name in planned]
536
+ blocked = (
537
+ evidence_issue_labels
538
+ + failed_evidence_labels
539
+ + planned_evidence_labels
540
+ + [item["name"] for item in requirement_results if item.get("status") != "satisfied"]
541
+ )
542
+ ok = not blocked
543
+ status = "ready" if ok else "blocked"
544
+
545
+ def grouped_simulator_pilot_command(evidence_out):
546
+ return (
547
+ "zmr-pilot-gate --android --ios "
548
+ "--android-app-root /path/to/mobile-app "
549
+ "--android-app-id <android-app-id> "
550
+ "--android-device <android-serial> "
551
+ "--ios-app-root /path/to/mobile-app "
552
+ "--ios-app-path /path/to/mobile-app/build/Debug-iphonesimulator/Sample.app "
553
+ "--ios-app-id <ios-app-id> "
554
+ "--ios-device booted "
555
+ "--ios-shim /path/to/mobile-app/.zmr/ios-shim "
556
+ "--runs 20 "
557
+ "--min-pass-rate 100 "
558
+ "--max-failures 0 "
559
+ f"--evidence-out {evidence_out}"
560
+ )
561
+
562
+
563
+ def physical_ios_pilot_command(evidence_out):
564
+ return (
565
+ "zmr-pilot-gate --ios "
566
+ "--ios-device-type physical "
567
+ "--ios-device <physical-device-id> "
568
+ "--ios-app-root /path/to/mobile-app "
569
+ "--ios-app-path /path/to/mobile-app/build/Release-iphoneos/Sample.ipa "
570
+ "--ios-app-id <ios-app-id> "
571
+ "--ios-shim /path/to/mobile-app/.zmr/ios-shim "
572
+ "--runs 20 "
573
+ "--min-pass-rate 100 "
574
+ "--max-failures 0 "
575
+ f"--evidence-out {evidence_out}"
576
+ )
577
+
578
+
579
+ default_pilot_evidence = "/path/to/mobile-app/traces/zmr-pilots/evidence.jsonl"
580
+
581
+ next_step_commands = {
582
+ "local release gate": ["./scripts/release-candidate.sh --mode local"],
583
+ "public Android demo": ["zmr-demo-android --runs 5"],
584
+ "public iOS simulator demo": ["zmr-demo-ios --runs 5"],
585
+ "physical iOS readiness": [physical_ios_pilot_command(default_pilot_evidence)],
586
+ "Android hardware pilot": [grouped_simulator_pilot_command(default_pilot_evidence)],
587
+ "iOS simulator hardware pilot": [grouped_simulator_pilot_command(default_pilot_evidence)],
588
+ "iOS physical hardware pilot": [physical_ios_pilot_command(default_pilot_evidence)],
589
+ "competitive benchmark comparison": [
590
+ "zmr-benchmark --zmr .zmr/android-smoke.json --platform <platform> --device <device-id> --app-id <app-id> --app-build <build-id-or-artifact> --runs 20 --trace-root traces/bench-comparison/zmr --results traces/bench-comparison/results.jsonl --replace --min-pass-rate 100 --max-failures 0",
591
+ "zmr-benchmark-command --tool <baseline-name> --platform <platform> --device <device-id> --app-id <app-id> --scenario .zmr/android-smoke.json --app-build <build-id-or-artifact> --runs 20 --trace-root traces/bench-comparison/baseline --results traces/bench-comparison/results.jsonl -- <baseline command>",
592
+ "zmr-compare-benchmarks --results traces/bench-comparison/results.jsonl --candidate zmr --baseline <baseline-name> --min-candidate-pass-rate 100 --max-candidate-failures 0 --min-mean-speedup 1.25 --min-p95-speedup 1.25 --out traces/bench-comparison/report.md --evidence-out traces/bench-comparison/evidence.jsonl",
593
+ ],
594
+ }
595
+
596
+
597
+ def fallback_next_step_commands(item):
598
+ if item.startswith("evidence file not found: "):
599
+ evidence_path = item.removeprefix("evidence file not found: ")
600
+ if target == "dev-preview":
601
+ evidence_dir = os.path.dirname(evidence_path) or "."
602
+ return [f"./scripts/release-candidate.sh --mode local --evidence-dir {shlex.quote(evidence_dir)}"]
603
+ quoted_evidence_path = shlex.quote(evidence_path)
604
+ commands = [
605
+ grouped_simulator_pilot_command(quoted_evidence_path),
606
+ physical_ios_pilot_command(quoted_evidence_path),
607
+ ]
608
+ if target == "market-claim":
609
+ commands.extend([
610
+ "zmr-benchmark --zmr .zmr/android-smoke.json --platform <platform> --device <device-id> --app-id <app-id> --app-build <build-id-or-artifact> --runs 20 --trace-root traces/bench-comparison/zmr --results traces/bench-comparison/results.jsonl --replace --min-pass-rate 100 --max-failures 0",
611
+ "zmr-benchmark-command --tool <baseline-name> --platform <platform> --device <device-id> --app-id <app-id> --scenario .zmr/android-smoke.json --app-build <build-id-or-artifact> --runs 20 --trace-root traces/bench-comparison/baseline --results traces/bench-comparison/results.jsonl -- <baseline command>",
612
+ "zmr-compare-benchmarks --results traces/bench-comparison/results.jsonl --candidate zmr --baseline <baseline-name> --min-candidate-pass-rate 100 --max-candidate-failures 0 --min-mean-speedup 1.25 --min-p95-speedup 1.25 --out traces/bench-comparison/report.md "
613
+ f"--evidence-out {quoted_evidence_path}",
614
+ ])
615
+ return commands
616
+ if item.startswith("invalid evidence JSONL in "):
617
+ evidence_path = item.removeprefix("invalid evidence JSONL in ").split(" at line ", 1)[0]
618
+ quoted_evidence_path = shlex.quote(evidence_path)
619
+ return [f"zmr-release-readiness --evidence {quoted_evidence_path} --target {target} --json"]
620
+ if item.startswith("failed evidence: "):
621
+ evidence_name = item.removeprefix("failed evidence: ")
622
+ command = evidence_command("failed", evidence_name)
623
+ if command is not None:
624
+ return [command]
625
+ return [f"zmr-release-readiness --target {target} --json"]
626
+ if item.startswith("planned evidence: "):
627
+ evidence_name = item.removeprefix("planned evidence: ")
628
+ command = evidence_command("planned", evidence_name)
629
+ if command is not None:
630
+ return [command]
631
+ return [f"zmr-release-readiness --target {target} --json"]
632
+ return [f"zmr-pilot-gate --android --ios --evidence-out {shlex.quote(item)}"]
633
+
634
+
635
+ def evidence_command(status, name):
636
+ for row in rows:
637
+ if row.get("status") != status or row.get("name") != name:
638
+ continue
639
+ command = row.get("command")
640
+ if concrete_value(command):
641
+ return command
642
+ return None
643
+
644
+
645
+ def make_next_step(requirement, commands, covers=None):
646
+ if covers is None:
647
+ covers = [requirement]
648
+ return {
649
+ "requirement": requirement,
650
+ "command": " && ".join(commands),
651
+ "commands": commands,
652
+ "covers": covers,
653
+ }
654
+
655
+
656
+ def append_next_step(next_steps, requirement, commands, covers=None):
657
+ next_steps.append(make_next_step(requirement, commands, covers))
658
+
659
+
660
+ def append_grouped_next_steps(blocked_items):
661
+ next_steps = []
662
+ handled = set()
663
+
664
+ def missing_file_covers(item):
665
+ covers = [item]
666
+ if target in ("production", "market-claim"):
667
+ covers.extend([
668
+ "physical iOS readiness",
669
+ "Android hardware pilot",
670
+ "iOS simulator hardware pilot",
671
+ "iOS physical hardware pilot",
672
+ ])
673
+ if target == "market-claim":
674
+ covers.append("competitive benchmark comparison")
675
+ return [cover for cover in covers if cover in blocked_items]
676
+
677
+ for item in blocked_items:
678
+ if not item.startswith("evidence file not found: "):
679
+ continue
680
+ commands = fallback_next_step_commands(item)
681
+ covers = missing_file_covers(item)
682
+ append_next_step(next_steps, item, commands, covers)
683
+ handled.update(covers)
684
+
685
+ def maybe_group(requirements, label, commands):
686
+ present = [item for item in requirements if item in blocked_items]
687
+ if any(item in handled for item in present):
688
+ return
689
+ if len(present) == len(requirements):
690
+ append_next_step(next_steps, label, commands, present)
691
+ handled.update(present)
692
+
693
+ maybe_group(
694
+ ["Android hardware pilot", "iOS simulator hardware pilot"],
695
+ "Android hardware pilot + iOS simulator hardware pilot",
696
+ [grouped_simulator_pilot_command(default_pilot_evidence)],
697
+ )
698
+ maybe_group(
699
+ ["physical iOS readiness", "iOS physical hardware pilot"],
700
+ "physical iOS readiness + iOS physical hardware pilot",
701
+ [physical_ios_pilot_command(default_pilot_evidence)],
702
+ )
703
+
704
+ for item in blocked_items:
705
+ if item in handled:
706
+ continue
707
+ commands = next_step_commands.get(item) or fallback_next_step_commands(item)
708
+ append_next_step(next_steps, item, commands)
709
+ return next_steps
710
+
711
+
712
+ next_steps = append_grouped_next_steps(blocked)
713
+
714
+ def claim_label(target):
715
+ if target == "market-claim":
716
+ return "market claim"
717
+ if target == "dev-preview":
718
+ return "developer-preview claim"
719
+ return f"{target} claim"
720
+
721
+
722
+ def claim_guidance(target, ok, missing, insufficient, invalid_evidence, failed, planned):
723
+ limitations = []
724
+ if not ok:
725
+ missing_for_guidance = [item for item in missing if item not in invalid_evidence]
726
+ if missing_for_guidance:
727
+ limitations.append("missing evidence")
728
+ if insufficient:
729
+ limitations.append("insufficient evidence")
730
+ if invalid_evidence:
731
+ limitations.append("invalid evidence")
732
+ if failed:
733
+ limitations.append("failed evidence")
734
+ if planned:
735
+ limitations.append("planned evidence is not proof")
736
+ blockers = []
737
+ if missing_for_guidance:
738
+ blockers.append(f"Missing evidence: {', '.join(missing_for_guidance)}.")
739
+ if insufficient:
740
+ blockers.append(f"Insufficient evidence: {', '.join(insufficient)}.")
741
+ if invalid_evidence:
742
+ blockers.append(f"Invalid evidence: {', '.join(invalid_evidence)}.")
743
+ if failed:
744
+ blockers.append(f"Failed evidence: {', '.join(failed)}.")
745
+ if planned:
746
+ blockers.append(f"Planned evidence is not proof: {', '.join(planned)}.")
747
+ return (
748
+ f"Do not publish the {claim_label(target)} yet. {' '.join(blockers)}"
749
+ ), limitations
750
+ if target == "dev-preview":
751
+ return (
752
+ "ZMR is ready to publish as a public developer preview. Do not describe it as production-stable or competitively better without production and market-claim evidence."
753
+ ), ["production-stable", "competitive leadership"]
754
+ if target == "production":
755
+ return (
756
+ "ZMR has evidence for production readiness for the checked app/device matrix. Do not make competitive claims without market-claim evidence."
757
+ ), ["competitive leadership"]
758
+ return (
759
+ "ZMR has evidence for the checked competitive claim. Publish the benchmark report, device state, app path, thresholds, and trace evidence with the claim."
760
+ ), []
761
+
762
+ recommended_wording, claim_limitations = claim_guidance(target, ok, missing, insufficient, invalid_evidence_labels, failed, planned)
763
+ satisfied = [item["name"] for item in requirement_results if item.get("status") == "satisfied"]
764
+
765
+ result = {
766
+ "ok": ok,
767
+ "target": target,
768
+ "status": status,
769
+ "evidence": evidence_paths[0],
770
+ "evidenceFiles": evidence_paths,
771
+ "passed": sorted(name for name in passed_names if name),
772
+ "satisfied": satisfied,
773
+ "failed": failed,
774
+ "planned": planned,
775
+ "missing": missing,
776
+ "insufficient": insufficient,
777
+ "blocked": blocked,
778
+ "requirements": requirement_results,
779
+ "nextSteps": next_steps,
780
+ "recommendedWording": recommended_wording,
781
+ "claimLimitations": claim_limitations,
782
+ }
783
+
784
+ if json_mode:
785
+ print(json.dumps(result, separators=(",", ":")))
786
+ else:
787
+ print(f"ZMR release readiness: {status}")
788
+ print(f"target: {target}")
789
+ if len(evidence_paths) == 1:
790
+ print(f"evidence: {evidence_paths[0]}")
791
+ else:
792
+ print("evidence:")
793
+ for evidence_path in evidence_paths:
794
+ print(f"- {evidence_path}")
795
+ if satisfied:
796
+ print("")
797
+ print("Satisfied requirements:")
798
+ for item in satisfied:
799
+ print(f"- {item}")
800
+ if blocked:
801
+ print("")
802
+ print("Blocked requirements:")
803
+ for item in requirement_results:
804
+ if item.get("status") == "satisfied":
805
+ continue
806
+ reason = item.get("reason")
807
+ if reason:
808
+ print(f"- {item['name']}: {item['status']} - {reason}")
809
+ else:
810
+ print(f"- {item['name']}: {item['status']}")
811
+ if missing:
812
+ print("")
813
+ print("Missing evidence:")
814
+ for item in missing:
815
+ print(f"- {item}")
816
+ if failed:
817
+ print("")
818
+ print("Failed evidence:")
819
+ for item in failed:
820
+ print(f"- {item}")
821
+ if planned:
822
+ print("")
823
+ print("Planned but not executed:")
824
+ for item in planned:
825
+ print(f"- {item}")
826
+ print("")
827
+ print(f"Recommended wording: {recommended_wording}")
828
+ if claim_limitations:
829
+ print("Claim limitations:")
830
+ for item in claim_limitations:
831
+ print(f"- {item}")
832
+ if next_steps:
833
+ print("")
834
+ print("Next steps:")
835
+ for item in next_steps:
836
+ print(f"- {item['requirement']}: {item['command']}")
837
+
838
+ sys.exit(0 if ok else 1)