zeno-mobile-runner 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/CHANGELOG.md +497 -0
  2. package/CONTRIBUTING.md +42 -0
  3. package/FEATURES.md +111 -0
  4. package/LICENSE +21 -0
  5. package/README.md +176 -0
  6. package/SECURITY.md +34 -0
  7. package/build.zig +38 -0
  8. package/build.zig.zon +7 -0
  9. package/clients/README.md +149 -0
  10. package/clients/go/README.md +24 -0
  11. package/clients/go/examples/fake-session/main.go +93 -0
  12. package/clients/go/go.mod +3 -0
  13. package/clients/go/zmr/client.go +432 -0
  14. package/clients/kotlin/README.md +35 -0
  15. package/clients/kotlin/build.gradle.kts +35 -0
  16. package/clients/kotlin/settings.gradle.kts +15 -0
  17. package/clients/kotlin/src/main/kotlin/dev/zmr/FakeSession.kt +86 -0
  18. package/clients/kotlin/src/main/kotlin/dev/zmr/ZmrClient.kt +67 -0
  19. package/clients/python/README.md +29 -0
  20. package/clients/python/examples/fake_session.py +48 -0
  21. package/clients/python/pyproject.toml +13 -0
  22. package/clients/python/zmr_client.py +202 -0
  23. package/clients/rust/Cargo.lock +107 -0
  24. package/clients/rust/Cargo.toml +10 -0
  25. package/clients/rust/README.md +19 -0
  26. package/clients/rust/examples/fake_session.rs +70 -0
  27. package/clients/rust/src/lib.rs +461 -0
  28. package/clients/swift/Package.swift +16 -0
  29. package/clients/swift/README.md +36 -0
  30. package/clients/swift/Sources/ZMRClient/ZMRClient.swift +114 -0
  31. package/clients/swift/Sources/ZMRFakeSession/main.swift +86 -0
  32. package/clients/typescript/README.md +34 -0
  33. package/clients/typescript/examples/fake-session.mjs +36 -0
  34. package/clients/typescript/index.d.ts +144 -0
  35. package/clients/typescript/index.mjs +192 -0
  36. package/clients/typescript/package.json +8 -0
  37. package/docs/adr/0001-agent-native-runner-boundary.md +31 -0
  38. package/docs/adr/0002-app-local-zmr-contract.md +39 -0
  39. package/docs/adr/0003-ios-simulator-xctest-shim.md +41 -0
  40. package/docs/adr/0004-benchmark-claims-and-baseline-collection.md +37 -0
  41. package/docs/adr/README.md +12 -0
  42. package/docs/ai-agents.md +154 -0
  43. package/docs/app-integration.md +330 -0
  44. package/docs/benchmarking.md +273 -0
  45. package/docs/client-installation.md +133 -0
  46. package/docs/clients.md +98 -0
  47. package/docs/config.md +175 -0
  48. package/docs/demo.md +259 -0
  49. package/docs/frameworks.md +72 -0
  50. package/docs/install.md +95 -0
  51. package/docs/npm.md +356 -0
  52. package/docs/protocol-fixtures/README.md +8 -0
  53. package/docs/protocol-fixtures/core-session.requests.jsonl +8 -0
  54. package/docs/protocol-fixtures/core-session.responses.jsonl +8 -0
  55. package/docs/protocol-versioning.md +65 -0
  56. package/docs/protocol.md +560 -0
  57. package/docs/scenario-authoring.md +88 -0
  58. package/docs/trace-privacy.md +88 -0
  59. package/docs/troubleshooting.md +256 -0
  60. package/examples/android-app-auth-probe.json +89 -0
  61. package/examples/android-app-error-state.json +13 -0
  62. package/examples/android-app-login-smoke.json +192 -0
  63. package/examples/android-app-onboarding.json +12 -0
  64. package/examples/android-app-referral-deep-link.json +12 -0
  65. package/examples/android-shim-smoke.json +19 -0
  66. package/examples/demo-failure.json +12 -0
  67. package/examples/demo-fake.json +14 -0
  68. package/examples/ios-dev-client-open-link.json +26 -0
  69. package/examples/ios-dev-client-route-snapshot.json +24 -0
  70. package/examples/ios-shim-smoke.json +23 -0
  71. package/examples/ios-smoke.json +9 -0
  72. package/go.work +3 -0
  73. package/npm/agents.mjs +183 -0
  74. package/npm/app-config.mjs +95 -0
  75. package/npm/build-zmr.mjs +21 -0
  76. package/npm/commands.mjs +104 -0
  77. package/npm/generated-files.mjs +50 -0
  78. package/npm/index.mjs +75 -0
  79. package/npm/init-app.mjs +80 -0
  80. package/npm/package-scripts.mjs +72 -0
  81. package/npm/postinstall.mjs +21 -0
  82. package/npm/scaffold.mjs +179 -0
  83. package/npm/scenarios.mjs +93 -0
  84. package/npm/setup.mjs +69 -0
  85. package/npm/wizard.mjs +117 -0
  86. package/npm/zmr.mjs +23 -0
  87. package/package.json +118 -0
  88. package/schemas/README.md +26 -0
  89. package/schemas/action-result.schema.json +27 -0
  90. package/schemas/capabilities-output.schema.json +98 -0
  91. package/schemas/devices-output.schema.json +25 -0
  92. package/schemas/doctor-output.schema.json +51 -0
  93. package/schemas/explain-output.schema.json +51 -0
  94. package/schemas/import-output.schema.json +23 -0
  95. package/schemas/init-output.schema.json +71 -0
  96. package/schemas/json-rpc.schema.json +55 -0
  97. package/schemas/release-manifest.schema.json +43 -0
  98. package/schemas/release-readiness-output.schema.json +127 -0
  99. package/schemas/run-output.schema.json +43 -0
  100. package/schemas/scenario.schema.json +128 -0
  101. package/schemas/schemas-output.schema.json +26 -0
  102. package/schemas/semantic-snapshot.schema.json +116 -0
  103. package/schemas/snapshot.schema.json +60 -0
  104. package/schemas/trace-event.schema.json +14 -0
  105. package/schemas/trace-manifest.schema.json +59 -0
  106. package/schemas/validate-output.schema.json +42 -0
  107. package/schemas/version-output.schema.json +23 -0
  108. package/schemas/zmr-config.schema.json +75 -0
  109. package/scripts/android-emulator.sh +126 -0
  110. package/scripts/assert-ios-physical-ready.sh +213 -0
  111. package/scripts/benchmark-command.sh +307 -0
  112. package/scripts/benchmark.sh +359 -0
  113. package/scripts/benchmark_gate.py +117 -0
  114. package/scripts/benchmark_result_row.py +88 -0
  115. package/scripts/compare-benchmarks.py +288 -0
  116. package/scripts/create-android-demo-app.sh +342 -0
  117. package/scripts/create-ios-demo-app.sh +261 -0
  118. package/scripts/demo-android-real.sh +232 -0
  119. package/scripts/demo-ios-real.sh +270 -0
  120. package/scripts/demo.sh +464 -0
  121. package/scripts/device-matrix.sh +338 -0
  122. package/scripts/ensure-ios-shim-target.rb +237 -0
  123. package/scripts/install-android-shim.sh +281 -0
  124. package/scripts/install-ios-shim.sh +589 -0
  125. package/scripts/pilot-gate.sh +560 -0
  126. package/scripts/release-readiness.py +838 -0
  127. package/scripts/release-readiness.sh +91 -0
  128. package/scripts/run-android-pilot.sh +561 -0
  129. package/scripts/run-ios-pilot.sh +509 -0
  130. package/shims/android/README.md +21 -0
  131. package/shims/android/ZMRShimInstrumentedTest.java +152 -0
  132. package/shims/android/protocol.md +18 -0
  133. package/shims/ios/README.md +50 -0
  134. package/shims/ios/ZMRShim.swift +110 -0
  135. package/shims/ios/ZMRShimUITestCase.swift +518 -0
  136. package/shims/ios/protocol.md +74 -0
  137. package/skills/zmr-mobile-testing/SKILL.md +127 -0
  138. package/src/android.zig +344 -0
  139. package/src/android_device_info.zig +99 -0
  140. package/src/android_emulator.zig +154 -0
  141. package/src/android_screen_recording.zig +112 -0
  142. package/src/android_shell.zig +112 -0
  143. package/src/bundle.zig +124 -0
  144. package/src/bundle_redaction.zig +272 -0
  145. package/src/bundle_tar.zig +123 -0
  146. package/src/cli_devices.zig +97 -0
  147. package/src/cli_doctor.zig +114 -0
  148. package/src/cli_import.zig +70 -0
  149. package/src/cli_info.zig +39 -0
  150. package/src/cli_init.zig +72 -0
  151. package/src/cli_output.zig +467 -0
  152. package/src/cli_run.zig +259 -0
  153. package/src/cli_serve.zig +287 -0
  154. package/src/cli_trace.zig +111 -0
  155. package/src/cli_validate.zig +41 -0
  156. package/src/command.zig +211 -0
  157. package/src/config.zig +305 -0
  158. package/src/config_diagnostics.zig +212 -0
  159. package/src/config_paths.zig +49 -0
  160. package/src/device_registry.zig +37 -0
  161. package/src/doctor.zig +412 -0
  162. package/src/doctor_hints.zig +52 -0
  163. package/src/errors.zig +55 -0
  164. package/src/fake_device.zig +163 -0
  165. package/src/health.zig +28 -0
  166. package/src/importer.zig +343 -0
  167. package/src/importer_json.zig +100 -0
  168. package/src/importer_model.zig +103 -0
  169. package/src/ios.zig +399 -0
  170. package/src/ios_devices.zig +219 -0
  171. package/src/ios_lifecycle.zig +72 -0
  172. package/src/ios_shim.zig +242 -0
  173. package/src/ios_snapshot.zig +20 -0
  174. package/src/json_fields.zig +80 -0
  175. package/src/json_rpc.zig +150 -0
  176. package/src/json_rpc_methods.zig +318 -0
  177. package/src/json_rpc_observation.zig +31 -0
  178. package/src/json_rpc_params.zig +52 -0
  179. package/src/json_rpc_protocol.zig +110 -0
  180. package/src/json_rpc_trace.zig +73 -0
  181. package/src/main.zig +131 -0
  182. package/src/mcp.zig +234 -0
  183. package/src/mcp_protocol.zig +64 -0
  184. package/src/mcp_trace.zig +83 -0
  185. package/src/report.zig +346 -0
  186. package/src/report_html.zig +63 -0
  187. package/src/report_values.zig +27 -0
  188. package/src/run_options.zig +152 -0
  189. package/src/runner.zig +280 -0
  190. package/src/runner_actions.zig +109 -0
  191. package/src/runner_config.zig +6 -0
  192. package/src/runner_diagnostics.zig +268 -0
  193. package/src/runner_events.zig +170 -0
  194. package/src/runner_native.zig +88 -0
  195. package/src/runner_waits.zig +300 -0
  196. package/src/scaffold.zig +472 -0
  197. package/src/scenario.zig +346 -0
  198. package/src/scenario_fields.zig +50 -0
  199. package/src/schema_registry.zig +53 -0
  200. package/src/selector.zig +84 -0
  201. package/src/semantic.zig +171 -0
  202. package/src/trace.zig +315 -0
  203. package/src/trace_json.zig +340 -0
  204. package/src/trace_summary.zig +218 -0
  205. package/src/trace_summary_diagnostic.zig +202 -0
  206. package/src/types.zig +120 -0
  207. package/src/uiautomator.zig +164 -0
  208. package/src/validation.zig +187 -0
  209. package/src/version.zig +22 -0
  210. package/viewer/app.js +373 -0
  211. package/viewer/index.html +126 -0
  212. package/viewer/parser.js +233 -0
  213. package/viewer/styles.css +585 -0
@@ -0,0 +1,307 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SOURCE="${BASH_SOURCE[0]}"
5
+ while [[ -h "$SOURCE" ]]; do
6
+ SOURCE_DIR="$(cd -P "$(dirname "$SOURCE")" && pwd)"
7
+ SOURCE="$(readlink "$SOURCE")"
8
+ if [[ "$SOURCE" != /* ]]; then
9
+ SOURCE="$SOURCE_DIR/$SOURCE"
10
+ fi
11
+ done
12
+
13
+ ROOT="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
14
+ CALLER_CWD="$(pwd -P)"
15
+
16
+ # Some sandboxed environments do not allow writing to the default temp directory
17
+ # (/var/folders, /tmp). Use a repo-local TMPDIR so adb/xcrun/mktemp/heredocs work.
18
+ if [[ -z "${TMPDIR:-}" || ! -w "${TMPDIR:-/nonexistent}" ]]; then
19
+ TMPDIR="$ROOT/traces/tmp"
20
+ mkdir -p "$TMPDIR"
21
+ export TMPDIR
22
+ fi
23
+
24
+ TOOL="${TOOL:-baseline}"
25
+ RUNS="${RUNS:-5}"
26
+ TRACE_ROOT="${TRACE_ROOT:-$CALLER_CWD/traces/bench-command-$(date +%Y%m%d-%H%M%S)}"
27
+ RESULTS=""
28
+ CWD=""
29
+ REPLACE=0
30
+ PLATFORM="${PLATFORM:-}"
31
+ DEVICE="${DEVICE:-}"
32
+ APP_ID="${APP_ID:-}"
33
+ SCENARIO="${SCENARIO:-}"
34
+ APP_BUILD="${APP_BUILD:-}"
35
+ MIN_PASS_RATE="${MIN_PASS_RATE:-}"
36
+ MAX_FAILURES="${MAX_FAILURES:-}"
37
+ MAX_MEAN_MS="${MAX_MEAN_MS:-}"
38
+ MAX_P95_MS="${MAX_P95_MS:-}"
39
+
40
+ usage() {
41
+ cat <<'USAGE'
42
+ Usage:
43
+ scripts/benchmark-command.sh --tool <label> [options] -- <command> [args...]
44
+
45
+ Runs any local command repeatedly and appends normalized benchmark rows that can
46
+ be compared with ZMR rows through zmr-compare-benchmarks.
47
+
48
+ Options:
49
+ --tool <label> Baseline tool label, for example runner-a or runner-b.
50
+ --runs <n> Number of command runs. Default: 5.
51
+ --trace-root <dir> Directory for stdout/stderr logs. Default: traces/bench-command-<timestamp> in the caller directory.
52
+ --results <path> Results JSONL path. Defaults to <trace-root>/results.jsonl.
53
+ Explicit results paths are appended by default.
54
+ --replace Truncate --results before writing.
55
+ --cwd <dir> Run the command from this working directory.
56
+ --platform <name> Platform context, for example android or ios.
57
+ --device <id> Device context shared with candidate rows.
58
+ --app-id <id> App id/bundle id context shared with candidate rows.
59
+ --scenario <path> Scenario or flow identifier used by this command.
60
+ --app-build <id> App build fingerprint, artifact path, or CI build id.
61
+ --min-pass-rate <pct> Optional gate minimum.
62
+ --max-failures <n> Optional gate maximum.
63
+ --max-mean-ms <ms> Optional mean duration maximum.
64
+ --max-p95-ms <ms> Optional p95 duration maximum.
65
+ -h, --help Show this help.
66
+
67
+ Example:
68
+ zmr-benchmark-command \
69
+ --tool runner-a \
70
+ --runs 20 \
71
+ --trace-root traces/runner-a-login \
72
+ --results traces/comparison/results.jsonl \
73
+ -- runner-a test .runner-a/login.yaml
74
+ USAGE
75
+ }
76
+
77
+ die() {
78
+ echo "error: $*" >&2
79
+ exit 2
80
+ }
81
+
82
+ require_value() {
83
+ local flag="$1"
84
+ local value="${2-}"
85
+ if [[ -z "$value" || "$value" == --* ]]; then
86
+ die "$flag requires a value"
87
+ fi
88
+ printf '%s\n' "$value"
89
+ }
90
+
91
+ quote_cmd() {
92
+ local quoted=()
93
+ local arg
94
+ for arg in "$@"; do
95
+ quoted+=("$(printf '%q' "$arg")")
96
+ done
97
+ printf '%s\n' "${quoted[*]}"
98
+ }
99
+
100
+ RESULTS_EXPLICIT=0
101
+ while [[ $# -gt 0 ]]; do
102
+ case "$1" in
103
+ --tool)
104
+ TOOL="$(require_value "$1" "${2-}")"
105
+ shift 2
106
+ ;;
107
+ --runs)
108
+ RUNS="$(require_value "$1" "${2-}")"
109
+ shift 2
110
+ ;;
111
+ --trace-root)
112
+ TRACE_ROOT="$(require_value "$1" "${2-}")"
113
+ shift 2
114
+ ;;
115
+ --results)
116
+ RESULTS="$(require_value "$1" "${2-}")"
117
+ RESULTS_EXPLICIT=1
118
+ shift 2
119
+ ;;
120
+ --replace)
121
+ REPLACE=1
122
+ shift
123
+ ;;
124
+ --cwd)
125
+ CWD="$(require_value "$1" "${2-}")"
126
+ shift 2
127
+ ;;
128
+ --platform)
129
+ PLATFORM="$(require_value "$1" "${2-}")"
130
+ shift 2
131
+ ;;
132
+ --device)
133
+ DEVICE="$(require_value "$1" "${2-}")"
134
+ shift 2
135
+ ;;
136
+ --app-id)
137
+ APP_ID="$(require_value "$1" "${2-}")"
138
+ shift 2
139
+ ;;
140
+ --scenario)
141
+ SCENARIO="$(require_value "$1" "${2-}")"
142
+ shift 2
143
+ ;;
144
+ --app-build)
145
+ APP_BUILD="$(require_value "$1" "${2-}")"
146
+ shift 2
147
+ ;;
148
+ --min-pass-rate)
149
+ MIN_PASS_RATE="$(require_value "$1" "${2-}")"
150
+ shift 2
151
+ ;;
152
+ --max-failures)
153
+ MAX_FAILURES="$(require_value "$1" "${2-}")"
154
+ shift 2
155
+ ;;
156
+ --max-mean-ms)
157
+ MAX_MEAN_MS="$(require_value "$1" "${2-}")"
158
+ shift 2
159
+ ;;
160
+ --max-p95-ms)
161
+ MAX_P95_MS="$(require_value "$1" "${2-}")"
162
+ shift 2
163
+ ;;
164
+ --)
165
+ shift
166
+ break
167
+ ;;
168
+ -h|--help)
169
+ usage
170
+ exit 0
171
+ ;;
172
+ *)
173
+ die "unknown argument before --: $1"
174
+ ;;
175
+ esac
176
+ done
177
+
178
+ [[ -n "$TOOL" ]] || die "--tool cannot be empty"
179
+ [[ "$RUNS" =~ ^[0-9]+$ && "$RUNS" -ge 1 ]] || die "--runs must be a positive integer"
180
+ [[ $# -gt 0 ]] || die "command is required after --"
181
+ if [[ -n "$CWD" && ! -d "$CWD" ]]; then
182
+ die "--cwd directory not found: $CWD"
183
+ fi
184
+
185
+ validate_optional_number() {
186
+ local name="$1"
187
+ local value="$2"
188
+ if [[ -n "$value" && ! "$value" =~ ^[0-9]+([.][0-9]+)?$ ]]; then
189
+ die "$name must be a non-negative number"
190
+ fi
191
+ }
192
+
193
+ validate_optional_integer() {
194
+ local name="$1"
195
+ local value="$2"
196
+ if [[ -n "$value" && ! "$value" =~ ^[0-9]+$ ]]; then
197
+ die "$name must be a non-negative integer"
198
+ fi
199
+ }
200
+
201
+ validate_optional_number "--min-pass-rate" "$MIN_PASS_RATE"
202
+ validate_optional_integer "--max-failures" "$MAX_FAILURES"
203
+ validate_optional_integer "--max-mean-ms" "$MAX_MEAN_MS"
204
+ validate_optional_integer "--max-p95-ms" "$MAX_P95_MS"
205
+
206
+ mkdir -p "$TRACE_ROOT"
207
+ if [[ -z "$RESULTS" ]]; then
208
+ RESULTS="$TRACE_ROOT/results.jsonl"
209
+ fi
210
+ mkdir -p "$(dirname "$RESULTS")"
211
+ if [[ "$REPLACE" -eq 1 || "$RESULTS_EXPLICIT" -eq 0 ]]; then
212
+ : > "$RESULTS"
213
+ else
214
+ touch "$RESULTS"
215
+ fi
216
+
217
+ COMMAND=("$@")
218
+ metadata_args=()
219
+ if [[ -n "$PLATFORM" ]]; then
220
+ metadata_args+=(--platform "$PLATFORM")
221
+ fi
222
+ if [[ -n "$DEVICE" ]]; then
223
+ metadata_args+=(--device "$DEVICE")
224
+ fi
225
+ if [[ -n "$APP_ID" ]]; then
226
+ metadata_args+=(--app-id "$APP_ID")
227
+ fi
228
+ if [[ -n "$SCENARIO" ]]; then
229
+ metadata_args+=(--scenario "$SCENARIO")
230
+ fi
231
+ if [[ -n "$APP_BUILD" ]]; then
232
+ metadata_args+=(--app-build "$APP_BUILD")
233
+ fi
234
+ echo "Benchmark command output: $TRACE_ROOT"
235
+ echo "Results: $RESULTS"
236
+ echo "Tool: $TOOL"
237
+ echo "+ $(quote_cmd "${COMMAND[@]}")"
238
+
239
+ for run in $(seq 1 "$RUNS"); do
240
+ run_dir="$TRACE_ROOT/$TOOL-$run"
241
+ mkdir -p "$run_dir"
242
+ printf '%s\n' "$(quote_cmd "${COMMAND[@]}")" > "$run_dir/command.txt"
243
+
244
+ command_status=0
245
+ start_ms="$(python3 -c 'import time; print(int(time.time() * 1000))')"
246
+ if [[ -n "$CWD" ]]; then
247
+ (cd "$CWD" && "${COMMAND[@]}") > "$run_dir/stdout.log" 2> "$run_dir/stderr.log" || command_status=$?
248
+ else
249
+ "${COMMAND[@]}" > "$run_dir/stdout.log" 2> "$run_dir/stderr.log" || command_status=$?
250
+ fi
251
+ end_ms="$(python3 -c 'import time; print(int(time.time() * 1000))')"
252
+ duration_ms=$((end_ms - start_ms))
253
+
254
+ if [[ "${#metadata_args[@]}" -gt 0 ]]; then
255
+ "$ROOT/scripts/benchmark_result_row.py" \
256
+ --tool "$TOOL" \
257
+ --run "$run" \
258
+ --command-status "$command_status" \
259
+ --duration-ms "$duration_ms" \
260
+ --trace-dir "$run_dir" \
261
+ "${metadata_args[@]}" >> "$RESULTS"
262
+ else
263
+ "$ROOT/scripts/benchmark_result_row.py" \
264
+ --tool "$TOOL" \
265
+ --run "$run" \
266
+ --command-status "$command_status" \
267
+ --duration-ms "$duration_ms" \
268
+ --trace-dir "$run_dir" >> "$RESULTS"
269
+ fi
270
+ done
271
+
272
+ python3 - "$RESULTS" "$TOOL" <<'PY'
273
+ import json
274
+ import math
275
+ import statistics
276
+ import sys
277
+
278
+ path, tool = sys.argv[1], sys.argv[2]
279
+ rows = [
280
+ json.loads(line)
281
+ for line in open(path, encoding="utf-8")
282
+ if line.strip() and json.loads(line).get("tool") == tool
283
+ ]
284
+ durations = [int(row.get("durationMs", 0)) for row in rows]
285
+ failures = sum(1 for row in rows if row.get("status") != "ok")
286
+ mean = round(statistics.mean(durations)) if durations else 0
287
+ p95 = sorted(durations)[max(0, math.ceil(len(durations) * 0.95) - 1)] if durations else 0
288
+ print(f"{tool}: runs={len(rows)} failures={failures} meanMs={mean} p95Ms={p95}")
289
+ PY
290
+
291
+ gate_args=()
292
+ if [[ -n "$MIN_PASS_RATE" ]]; then
293
+ gate_args+=(--min-pass-rate "$MIN_PASS_RATE")
294
+ fi
295
+ if [[ -n "$MAX_FAILURES" ]]; then
296
+ gate_args+=(--max-failures "$MAX_FAILURES")
297
+ fi
298
+ if [[ -n "$MAX_MEAN_MS" ]]; then
299
+ gate_args+=(--max-mean-ms "$MAX_MEAN_MS")
300
+ fi
301
+ if [[ -n "$MAX_P95_MS" ]]; then
302
+ gate_args+=(--max-p95-ms "$MAX_P95_MS")
303
+ fi
304
+
305
+ if [[ "${#gate_args[@]}" -gt 0 ]]; then
306
+ "$ROOT/scripts/benchmark_gate.py" --results "$RESULTS" "${gate_args[@]}"
307
+ fi
@@ -0,0 +1,359 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SOURCE="${BASH_SOURCE[0]}"
5
+ while [[ -h "$SOURCE" ]]; do
6
+ SOURCE_DIR="$(cd -P "$(dirname "$SOURCE")" && pwd)"
7
+ SOURCE="$(readlink "$SOURCE")"
8
+ if [[ "$SOURCE" != /* ]]; then
9
+ SOURCE="$SOURCE_DIR/$SOURCE"
10
+ fi
11
+ done
12
+
13
+ ROOT="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
14
+ CALLER_CWD="$(pwd -P)"
15
+
16
+ # Some sandboxed environments do not allow writing to the default temp directory
17
+ # (/var/folders, /tmp). Use a repo-local TMPDIR so adb/xcrun/mktemp/heredocs work.
18
+ if [[ -z "${TMPDIR:-}" || ! -w "${TMPDIR:-/nonexistent}" ]]; then
19
+ TMPDIR="$ROOT/traces/tmp"
20
+ mkdir -p "$TMPDIR"
21
+ export TMPDIR
22
+ fi
23
+
24
+ ZMR_BIN="${ZMR_BIN:-$(command -v zmr 2>/dev/null || printf '%s' "$ROOT/zig-out/bin/zmr")}"
25
+ RUNS="${RUNS:-5}"
26
+ DEVICE="${DEVICE:-}"
27
+ TRACE_ROOT="${TRACE_ROOT:-$CALLER_CWD/traces/bench-$(date +%Y%m%d-%H%M%S)}"
28
+ RESULTS=""
29
+ RESULTS_EXPLICIT=0
30
+ REPLACE=0
31
+ ZMR_SCENARIO=""
32
+ PLATFORM="${PLATFORM:-}"
33
+ APP_ID="${APP_ID:-}"
34
+ ADB="${ADB:-}"
35
+ ANDROID_SHIM="${ANDROID_SHIM:-}"
36
+ XCRUN="${XCRUN:-}"
37
+ IOS_SHIM="${IOS_SHIM:-}"
38
+ IOS_DEVICE_TYPE="${IOS_DEVICE_TYPE:-}"
39
+ APP_BUILD="${APP_BUILD:-}"
40
+ MIN_PASS_RATE="${MIN_PASS_RATE:-}"
41
+ MAX_FAILURES="${MAX_FAILURES:-}"
42
+ MAX_MEAN_MS="${MAX_MEAN_MS:-}"
43
+ MAX_P95_MS="${MAX_P95_MS:-}"
44
+
45
+ usage() {
46
+ cat <<'USAGE'
47
+ Usage:
48
+ scripts/benchmark.sh --zmr <scenario.json> --device <serial> [--runs 10] [--trace-root <dir>] [--results <path>] [gate options]
49
+
50
+ Gate options:
51
+ --min-pass-rate <pct> Minimum pass rate percentage, for example 100.
52
+ --max-failures <n> Maximum allowed failed runs.
53
+ --max-mean-ms <ms> Maximum allowed mean run duration.
54
+ --max-p95-ms <ms> Maximum allowed p95 run duration.
55
+
56
+ Output options:
57
+ --results <path> Results JSONL path. Defaults to <trace-root>/results.jsonl.
58
+ Explicit results paths are appended by default.
59
+ --replace Truncate --results before writing.
60
+
61
+ Forwarded ZMR options:
62
+ --platform <android|ios>
63
+ --app-id <id>
64
+ --adb <path>
65
+ --android-shim <path>
66
+ --xcrun <path>
67
+ --ios-shim <path>
68
+ --ios-device-type <simulator|physical>
69
+ --app-build <id> App build fingerprint, artifact path, or CI build id for comparison context.
70
+
71
+ Environment:
72
+ ZMR_BIN Path to zmr binary. Defaults to ./zig-out/bin/zmr.
73
+ RUNS Default run count when --runs is omitted.
74
+ DEVICE Default Android serial when --device is omitted.
75
+ TRACE_ROOT Default benchmark output root. Otherwise traces/bench-<timestamp> in the caller directory.
76
+ PLATFORM, APP_ID, ADB, ANDROID_SHIM, XCRUN, IOS_SHIM, IOS_DEVICE_TYPE, APP_BUILD
77
+ Default forwarded ZMR options when matching flags are omitted.
78
+ MIN_PASS_RATE, MAX_FAILURES, MAX_MEAN_MS, MAX_P95_MS
79
+ Default gate thresholds when matching flags are omitted.
80
+ USAGE
81
+ }
82
+
83
+ die() {
84
+ echo "error: $*" >&2
85
+ exit 2
86
+ }
87
+
88
+ require_value() {
89
+ local flag="$1"
90
+ local value="${2-}"
91
+ if [[ -z "$value" || "$value" == --* ]]; then
92
+ die "$flag requires a value"
93
+ fi
94
+ printf '%s\n' "$value"
95
+ }
96
+
97
+ while [[ $# -gt 0 ]]; do
98
+ case "$1" in
99
+ --zmr)
100
+ ZMR_SCENARIO="$(require_value "$1" "${2-}")"
101
+ shift 2
102
+ ;;
103
+ --device)
104
+ DEVICE="$(require_value "$1" "${2-}")"
105
+ shift 2
106
+ ;;
107
+ --runs)
108
+ RUNS="$(require_value "$1" "${2-}")"
109
+ shift 2
110
+ ;;
111
+ --trace-root)
112
+ TRACE_ROOT="$(require_value "$1" "${2-}")"
113
+ shift 2
114
+ ;;
115
+ --results)
116
+ RESULTS="$(require_value "$1" "${2-}")"
117
+ RESULTS_EXPLICIT=1
118
+ shift 2
119
+ ;;
120
+ --replace)
121
+ REPLACE=1
122
+ shift
123
+ ;;
124
+ --platform)
125
+ PLATFORM="$(require_value "$1" "${2-}")"
126
+ shift 2
127
+ ;;
128
+ --app-id)
129
+ APP_ID="$(require_value "$1" "${2-}")"
130
+ shift 2
131
+ ;;
132
+ --adb)
133
+ ADB="$(require_value "$1" "${2-}")"
134
+ shift 2
135
+ ;;
136
+ --android-shim)
137
+ ANDROID_SHIM="$(require_value "$1" "${2-}")"
138
+ shift 2
139
+ ;;
140
+ --xcrun)
141
+ XCRUN="$(require_value "$1" "${2-}")"
142
+ shift 2
143
+ ;;
144
+ --ios-shim)
145
+ IOS_SHIM="$(require_value "$1" "${2-}")"
146
+ shift 2
147
+ ;;
148
+ --ios-device-type)
149
+ IOS_DEVICE_TYPE="$(require_value "$1" "${2-}")"
150
+ shift 2
151
+ ;;
152
+ --app-build)
153
+ APP_BUILD="$(require_value "$1" "${2-}")"
154
+ shift 2
155
+ ;;
156
+ --min-pass-rate)
157
+ MIN_PASS_RATE="$(require_value "$1" "${2-}")"
158
+ shift 2
159
+ ;;
160
+ --max-failures)
161
+ MAX_FAILURES="$(require_value "$1" "${2-}")"
162
+ shift 2
163
+ ;;
164
+ --max-mean-ms)
165
+ MAX_MEAN_MS="$(require_value "$1" "${2-}")"
166
+ shift 2
167
+ ;;
168
+ --max-p95-ms)
169
+ MAX_P95_MS="$(require_value "$1" "${2-}")"
170
+ shift 2
171
+ ;;
172
+ -h|--help)
173
+ usage
174
+ exit 0
175
+ ;;
176
+ *)
177
+ die "unknown argument: $1"
178
+ ;;
179
+ esac
180
+ done
181
+
182
+ if [[ -z "$ZMR_SCENARIO" ]]; then
183
+ echo "error: --zmr is required" >&2
184
+ usage >&2
185
+ exit 2
186
+ fi
187
+
188
+ if [[ -z "$DEVICE" ]]; then
189
+ echo "error: --device or DEVICE is required" >&2
190
+ usage >&2
191
+ exit 2
192
+ fi
193
+
194
+ if [[ ! "$RUNS" =~ ^[0-9]+$ || "$RUNS" -lt 1 ]]; then
195
+ die "--runs must be a positive integer"
196
+ fi
197
+
198
+ if [[ ! -x "$ZMR_BIN" ]]; then
199
+ die "zmr binary is not executable: $ZMR_BIN"
200
+ fi
201
+
202
+ validate_optional_number() {
203
+ local name="$1"
204
+ local value="$2"
205
+ if [[ -n "$value" && ! "$value" =~ ^[0-9]+([.][0-9]+)?$ ]]; then
206
+ echo "$name must be a non-negative number" >&2
207
+ exit 2
208
+ fi
209
+ }
210
+
211
+ validate_optional_integer() {
212
+ local name="$1"
213
+ local value="$2"
214
+ if [[ -n "$value" && ! "$value" =~ ^[0-9]+$ ]]; then
215
+ echo "$name must be a non-negative integer" >&2
216
+ exit 2
217
+ fi
218
+ }
219
+
220
+ validate_optional_number "--min-pass-rate" "$MIN_PASS_RATE"
221
+ validate_optional_integer "--max-failures" "$MAX_FAILURES"
222
+ validate_optional_integer "--max-mean-ms" "$MAX_MEAN_MS"
223
+ validate_optional_integer "--max-p95-ms" "$MAX_P95_MS"
224
+ if [[ -n "$IOS_DEVICE_TYPE" && "$IOS_DEVICE_TYPE" != "simulator" && "$IOS_DEVICE_TYPE" != "physical" ]]; then
225
+ echo "--ios-device-type must be simulator or physical" >&2
226
+ exit 2
227
+ fi
228
+
229
+ mkdir -p "$TRACE_ROOT"
230
+ if [[ -z "$RESULTS" ]]; then
231
+ RESULTS="$TRACE_ROOT/results.jsonl"
232
+ fi
233
+ mkdir -p "$(dirname "$RESULTS")"
234
+ if [[ "$REPLACE" -eq 1 || "$RESULTS_EXPLICIT" -eq 0 ]]; then
235
+ : > "$RESULTS"
236
+ else
237
+ touch "$RESULTS"
238
+ fi
239
+
240
+ run_one() {
241
+ local tool="$1"
242
+ local run="$2"
243
+ local command_status=0
244
+ local start_ms end_ms duration_ms trace_dir
245
+ local -a zmr_args=()
246
+ local -a metadata_args=()
247
+
248
+ trace_dir="$TRACE_ROOT/$tool-$run"
249
+ mkdir -p "$trace_dir"
250
+ if [[ -n "$PLATFORM" ]]; then
251
+ zmr_args+=(--platform "$PLATFORM")
252
+ fi
253
+ if [[ -n "$APP_ID" ]]; then
254
+ zmr_args+=(--app-id "$APP_ID")
255
+ fi
256
+ if [[ -n "$ADB" ]]; then
257
+ zmr_args+=(--adb "$ADB")
258
+ fi
259
+ if [[ -n "$ANDROID_SHIM" ]]; then
260
+ zmr_args+=(--android-shim "$ANDROID_SHIM")
261
+ fi
262
+ if [[ -n "$XCRUN" ]]; then
263
+ zmr_args+=(--xcrun "$XCRUN")
264
+ fi
265
+ if [[ -n "$IOS_SHIM" ]]; then
266
+ zmr_args+=(--ios-shim "$IOS_SHIM")
267
+ fi
268
+ if [[ -n "$IOS_DEVICE_TYPE" ]]; then
269
+ zmr_args+=(--ios-device-type "$IOS_DEVICE_TYPE")
270
+ fi
271
+ if [[ -n "$PLATFORM" ]]; then
272
+ metadata_args+=(--platform "$PLATFORM")
273
+ fi
274
+ if [[ -n "$DEVICE" ]]; then
275
+ metadata_args+=(--device "$DEVICE")
276
+ fi
277
+ if [[ -n "$APP_ID" ]]; then
278
+ metadata_args+=(--app-id "$APP_ID")
279
+ fi
280
+ if [[ -n "$ZMR_SCENARIO" ]]; then
281
+ metadata_args+=(--scenario "$ZMR_SCENARIO")
282
+ fi
283
+ if [[ -n "$APP_BUILD" ]]; then
284
+ metadata_args+=(--app-build "$APP_BUILD")
285
+ fi
286
+ start_ms="$(python3 -c 'import time; print(int(time.time() * 1000))')"
287
+ if [[ "${#zmr_args[@]}" -gt 0 ]]; then
288
+ "$ZMR_BIN" run "$ZMR_SCENARIO" --device "$DEVICE" "${zmr_args[@]}" --trace-dir "$trace_dir" || command_status=$?
289
+ else
290
+ "$ZMR_BIN" run "$ZMR_SCENARIO" --device "$DEVICE" --trace-dir "$trace_dir" || command_status=$?
291
+ fi
292
+
293
+ end_ms="$(python3 -c 'import time; print(int(time.time() * 1000))')"
294
+ duration_ms=$((end_ms - start_ms))
295
+
296
+ if [[ "${#metadata_args[@]}" -gt 0 ]]; then
297
+ "$ROOT/scripts/benchmark_result_row.py" \
298
+ --tool "$tool" \
299
+ --run "$run" \
300
+ --command-status "$command_status" \
301
+ --duration-ms "$duration_ms" \
302
+ --trace-dir "$trace_dir" \
303
+ "${metadata_args[@]}" >> "$RESULTS"
304
+ else
305
+ "$ROOT/scripts/benchmark_result_row.py" \
306
+ --tool "$tool" \
307
+ --run "$run" \
308
+ --command-status "$command_status" \
309
+ --duration-ms "$duration_ms" \
310
+ --trace-dir "$trace_dir" >> "$RESULTS"
311
+ fi
312
+
313
+ return "$command_status"
314
+ }
315
+
316
+ for run in $(seq 1 "$RUNS"); do
317
+ run_one zmr "$run" || true
318
+ done
319
+
320
+ python3 - "$RESULTS" <<'PY'
321
+ import json
322
+ import math
323
+ import statistics
324
+ import sys
325
+ from collections import defaultdict
326
+
327
+ path = sys.argv[1]
328
+ rows = [json.loads(line) for line in open(path, encoding="utf-8") if line.strip()]
329
+ by_tool = defaultdict(list)
330
+ for row in rows:
331
+ by_tool[row["tool"]].append(row)
332
+
333
+ for tool, items in sorted(by_tool.items()):
334
+ durations = [item["durationMs"] for item in items]
335
+ failures = sum(1 for item in items if item["status"] != "ok")
336
+ mean = round(statistics.mean(durations)) if durations else 0
337
+ p95 = sorted(durations)[max(0, math.ceil(len(durations) * 0.95) - 1)] if durations else 0
338
+ print(f"{tool}: runs={len(items)} failures={failures} meanMs={mean} p95Ms={p95}")
339
+
340
+ print(f"results={path}")
341
+ PY
342
+
343
+ gate_args=()
344
+ if [[ -n "$MIN_PASS_RATE" ]]; then
345
+ gate_args+=(--min-pass-rate "$MIN_PASS_RATE")
346
+ fi
347
+ if [[ -n "$MAX_FAILURES" ]]; then
348
+ gate_args+=(--max-failures "$MAX_FAILURES")
349
+ fi
350
+ if [[ -n "$MAX_MEAN_MS" ]]; then
351
+ gate_args+=(--max-mean-ms "$MAX_MEAN_MS")
352
+ fi
353
+ if [[ -n "$MAX_P95_MS" ]]; then
354
+ gate_args+=(--max-p95-ms "$MAX_P95_MS")
355
+ fi
356
+
357
+ if [[ "${#gate_args[@]}" -gt 0 ]]; then
358
+ "$ROOT/scripts/benchmark_gate.py" --results "$RESULTS" "${gate_args[@]}"
359
+ fi