zeno-mobile-runner 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +497 -0
- package/CONTRIBUTING.md +42 -0
- package/FEATURES.md +111 -0
- package/LICENSE +21 -0
- package/README.md +176 -0
- package/SECURITY.md +34 -0
- package/build.zig +38 -0
- package/build.zig.zon +7 -0
- package/clients/README.md +149 -0
- package/clients/go/README.md +24 -0
- package/clients/go/examples/fake-session/main.go +93 -0
- package/clients/go/go.mod +3 -0
- package/clients/go/zmr/client.go +432 -0
- package/clients/kotlin/README.md +35 -0
- package/clients/kotlin/build.gradle.kts +35 -0
- package/clients/kotlin/settings.gradle.kts +15 -0
- package/clients/kotlin/src/main/kotlin/dev/zmr/FakeSession.kt +86 -0
- package/clients/kotlin/src/main/kotlin/dev/zmr/ZmrClient.kt +67 -0
- package/clients/python/README.md +29 -0
- package/clients/python/examples/fake_session.py +48 -0
- package/clients/python/pyproject.toml +13 -0
- package/clients/python/zmr_client.py +202 -0
- package/clients/rust/Cargo.lock +107 -0
- package/clients/rust/Cargo.toml +10 -0
- package/clients/rust/README.md +19 -0
- package/clients/rust/examples/fake_session.rs +70 -0
- package/clients/rust/src/lib.rs +461 -0
- package/clients/swift/Package.swift +16 -0
- package/clients/swift/README.md +36 -0
- package/clients/swift/Sources/ZMRClient/ZMRClient.swift +114 -0
- package/clients/swift/Sources/ZMRFakeSession/main.swift +86 -0
- package/clients/typescript/README.md +34 -0
- package/clients/typescript/examples/fake-session.mjs +36 -0
- package/clients/typescript/index.d.ts +144 -0
- package/clients/typescript/index.mjs +192 -0
- package/clients/typescript/package.json +8 -0
- package/docs/adr/0001-agent-native-runner-boundary.md +31 -0
- package/docs/adr/0002-app-local-zmr-contract.md +39 -0
- package/docs/adr/0003-ios-simulator-xctest-shim.md +41 -0
- package/docs/adr/0004-benchmark-claims-and-baseline-collection.md +37 -0
- package/docs/adr/README.md +12 -0
- package/docs/ai-agents.md +154 -0
- package/docs/app-integration.md +330 -0
- package/docs/benchmarking.md +273 -0
- package/docs/client-installation.md +133 -0
- package/docs/clients.md +98 -0
- package/docs/config.md +175 -0
- package/docs/demo.md +259 -0
- package/docs/frameworks.md +72 -0
- package/docs/install.md +95 -0
- package/docs/npm.md +356 -0
- package/docs/protocol-fixtures/README.md +8 -0
- package/docs/protocol-fixtures/core-session.requests.jsonl +8 -0
- package/docs/protocol-fixtures/core-session.responses.jsonl +8 -0
- package/docs/protocol-versioning.md +65 -0
- package/docs/protocol.md +560 -0
- package/docs/scenario-authoring.md +88 -0
- package/docs/trace-privacy.md +88 -0
- package/docs/troubleshooting.md +256 -0
- package/examples/android-app-auth-probe.json +89 -0
- package/examples/android-app-error-state.json +13 -0
- package/examples/android-app-login-smoke.json +192 -0
- package/examples/android-app-onboarding.json +12 -0
- package/examples/android-app-referral-deep-link.json +12 -0
- package/examples/android-shim-smoke.json +19 -0
- package/examples/demo-failure.json +12 -0
- package/examples/demo-fake.json +14 -0
- package/examples/ios-dev-client-open-link.json +26 -0
- package/examples/ios-dev-client-route-snapshot.json +24 -0
- package/examples/ios-shim-smoke.json +23 -0
- package/examples/ios-smoke.json +9 -0
- package/go.work +3 -0
- package/npm/agents.mjs +183 -0
- package/npm/app-config.mjs +95 -0
- package/npm/build-zmr.mjs +21 -0
- package/npm/commands.mjs +104 -0
- package/npm/generated-files.mjs +50 -0
- package/npm/index.mjs +75 -0
- package/npm/init-app.mjs +80 -0
- package/npm/package-scripts.mjs +72 -0
- package/npm/postinstall.mjs +21 -0
- package/npm/scaffold.mjs +179 -0
- package/npm/scenarios.mjs +93 -0
- package/npm/setup.mjs +69 -0
- package/npm/wizard.mjs +117 -0
- package/npm/zmr.mjs +23 -0
- package/package.json +118 -0
- package/schemas/README.md +26 -0
- package/schemas/action-result.schema.json +27 -0
- package/schemas/capabilities-output.schema.json +98 -0
- package/schemas/devices-output.schema.json +25 -0
- package/schemas/doctor-output.schema.json +51 -0
- package/schemas/explain-output.schema.json +51 -0
- package/schemas/import-output.schema.json +23 -0
- package/schemas/init-output.schema.json +71 -0
- package/schemas/json-rpc.schema.json +55 -0
- package/schemas/release-manifest.schema.json +43 -0
- package/schemas/release-readiness-output.schema.json +127 -0
- package/schemas/run-output.schema.json +43 -0
- package/schemas/scenario.schema.json +128 -0
- package/schemas/schemas-output.schema.json +26 -0
- package/schemas/semantic-snapshot.schema.json +116 -0
- package/schemas/snapshot.schema.json +60 -0
- package/schemas/trace-event.schema.json +14 -0
- package/schemas/trace-manifest.schema.json +59 -0
- package/schemas/validate-output.schema.json +42 -0
- package/schemas/version-output.schema.json +23 -0
- package/schemas/zmr-config.schema.json +75 -0
- package/scripts/android-emulator.sh +126 -0
- package/scripts/assert-ios-physical-ready.sh +213 -0
- package/scripts/benchmark-command.sh +307 -0
- package/scripts/benchmark.sh +359 -0
- package/scripts/benchmark_gate.py +117 -0
- package/scripts/benchmark_result_row.py +88 -0
- package/scripts/compare-benchmarks.py +288 -0
- package/scripts/create-android-demo-app.sh +342 -0
- package/scripts/create-ios-demo-app.sh +261 -0
- package/scripts/demo-android-real.sh +232 -0
- package/scripts/demo-ios-real.sh +270 -0
- package/scripts/demo.sh +464 -0
- package/scripts/device-matrix.sh +338 -0
- package/scripts/ensure-ios-shim-target.rb +237 -0
- package/scripts/install-android-shim.sh +281 -0
- package/scripts/install-ios-shim.sh +589 -0
- package/scripts/pilot-gate.sh +560 -0
- package/scripts/release-readiness.py +838 -0
- package/scripts/release-readiness.sh +91 -0
- package/scripts/run-android-pilot.sh +561 -0
- package/scripts/run-ios-pilot.sh +509 -0
- package/shims/android/README.md +21 -0
- package/shims/android/ZMRShimInstrumentedTest.java +152 -0
- package/shims/android/protocol.md +18 -0
- package/shims/ios/README.md +50 -0
- package/shims/ios/ZMRShim.swift +110 -0
- package/shims/ios/ZMRShimUITestCase.swift +518 -0
- package/shims/ios/protocol.md +74 -0
- package/skills/zmr-mobile-testing/SKILL.md +127 -0
- package/src/android.zig +344 -0
- package/src/android_device_info.zig +99 -0
- package/src/android_emulator.zig +154 -0
- package/src/android_screen_recording.zig +112 -0
- package/src/android_shell.zig +112 -0
- package/src/bundle.zig +124 -0
- package/src/bundle_redaction.zig +272 -0
- package/src/bundle_tar.zig +123 -0
- package/src/cli_devices.zig +97 -0
- package/src/cli_doctor.zig +114 -0
- package/src/cli_import.zig +70 -0
- package/src/cli_info.zig +39 -0
- package/src/cli_init.zig +72 -0
- package/src/cli_output.zig +467 -0
- package/src/cli_run.zig +259 -0
- package/src/cli_serve.zig +287 -0
- package/src/cli_trace.zig +111 -0
- package/src/cli_validate.zig +41 -0
- package/src/command.zig +211 -0
- package/src/config.zig +305 -0
- package/src/config_diagnostics.zig +212 -0
- package/src/config_paths.zig +49 -0
- package/src/device_registry.zig +37 -0
- package/src/doctor.zig +412 -0
- package/src/doctor_hints.zig +52 -0
- package/src/errors.zig +55 -0
- package/src/fake_device.zig +163 -0
- package/src/health.zig +28 -0
- package/src/importer.zig +343 -0
- package/src/importer_json.zig +100 -0
- package/src/importer_model.zig +103 -0
- package/src/ios.zig +399 -0
- package/src/ios_devices.zig +219 -0
- package/src/ios_lifecycle.zig +72 -0
- package/src/ios_shim.zig +242 -0
- package/src/ios_snapshot.zig +20 -0
- package/src/json_fields.zig +80 -0
- package/src/json_rpc.zig +150 -0
- package/src/json_rpc_methods.zig +318 -0
- package/src/json_rpc_observation.zig +31 -0
- package/src/json_rpc_params.zig +52 -0
- package/src/json_rpc_protocol.zig +110 -0
- package/src/json_rpc_trace.zig +73 -0
- package/src/main.zig +131 -0
- package/src/mcp.zig +234 -0
- package/src/mcp_protocol.zig +64 -0
- package/src/mcp_trace.zig +83 -0
- package/src/report.zig +346 -0
- package/src/report_html.zig +63 -0
- package/src/report_values.zig +27 -0
- package/src/run_options.zig +152 -0
- package/src/runner.zig +280 -0
- package/src/runner_actions.zig +109 -0
- package/src/runner_config.zig +6 -0
- package/src/runner_diagnostics.zig +268 -0
- package/src/runner_events.zig +170 -0
- package/src/runner_native.zig +88 -0
- package/src/runner_waits.zig +300 -0
- package/src/scaffold.zig +472 -0
- package/src/scenario.zig +346 -0
- package/src/scenario_fields.zig +50 -0
- package/src/schema_registry.zig +53 -0
- package/src/selector.zig +84 -0
- package/src/semantic.zig +171 -0
- package/src/trace.zig +315 -0
- package/src/trace_json.zig +340 -0
- package/src/trace_summary.zig +218 -0
- package/src/trace_summary_diagnostic.zig +202 -0
- package/src/types.zig +120 -0
- package/src/uiautomator.zig +164 -0
- package/src/validation.zig +187 -0
- package/src/version.zig +22 -0
- package/viewer/app.js +373 -0
- package/viewer/index.html +126 -0
- package/viewer/parser.js +233 -0
- package/viewer/styles.css +585 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
SOURCE="${BASH_SOURCE[0]}"
|
|
5
|
+
while [[ -h "$SOURCE" ]]; do
|
|
6
|
+
SOURCE_DIR="$(cd -P "$(dirname "$SOURCE")" && pwd)"
|
|
7
|
+
SOURCE="$(readlink "$SOURCE")"
|
|
8
|
+
if [[ "$SOURCE" != /* ]]; then
|
|
9
|
+
SOURCE="$SOURCE_DIR/$SOURCE"
|
|
10
|
+
fi
|
|
11
|
+
done
|
|
12
|
+
|
|
13
|
+
ROOT="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
|
|
14
|
+
CALLER_CWD="$(pwd -P)"
|
|
15
|
+
|
|
16
|
+
# Some sandboxed environments do not allow writing to the default temp directory
|
|
17
|
+
# (/var/folders, /tmp). Use a repo-local TMPDIR so adb/xcrun/mktemp/heredocs work.
|
|
18
|
+
if [[ -z "${TMPDIR:-}" || ! -w "${TMPDIR:-/nonexistent}" ]]; then
|
|
19
|
+
TMPDIR="$ROOT/traces/tmp"
|
|
20
|
+
mkdir -p "$TMPDIR"
|
|
21
|
+
export TMPDIR
|
|
22
|
+
fi
|
|
23
|
+
|
|
24
|
+
TOOL="${TOOL:-baseline}"
|
|
25
|
+
RUNS="${RUNS:-5}"
|
|
26
|
+
TRACE_ROOT="${TRACE_ROOT:-$CALLER_CWD/traces/bench-command-$(date +%Y%m%d-%H%M%S)}"
|
|
27
|
+
RESULTS=""
|
|
28
|
+
CWD=""
|
|
29
|
+
REPLACE=0
|
|
30
|
+
PLATFORM="${PLATFORM:-}"
|
|
31
|
+
DEVICE="${DEVICE:-}"
|
|
32
|
+
APP_ID="${APP_ID:-}"
|
|
33
|
+
SCENARIO="${SCENARIO:-}"
|
|
34
|
+
APP_BUILD="${APP_BUILD:-}"
|
|
35
|
+
MIN_PASS_RATE="${MIN_PASS_RATE:-}"
|
|
36
|
+
MAX_FAILURES="${MAX_FAILURES:-}"
|
|
37
|
+
MAX_MEAN_MS="${MAX_MEAN_MS:-}"
|
|
38
|
+
MAX_P95_MS="${MAX_P95_MS:-}"
|
|
39
|
+
|
|
40
|
+
usage() {
|
|
41
|
+
cat <<'USAGE'
|
|
42
|
+
Usage:
|
|
43
|
+
scripts/benchmark-command.sh --tool <label> [options] -- <command> [args...]
|
|
44
|
+
|
|
45
|
+
Runs any local command repeatedly and appends normalized benchmark rows that can
|
|
46
|
+
be compared with ZMR rows through zmr-compare-benchmarks.
|
|
47
|
+
|
|
48
|
+
Options:
|
|
49
|
+
--tool <label> Baseline tool label, for example runner-a or runner-b.
|
|
50
|
+
--runs <n> Number of command runs. Default: 5.
|
|
51
|
+
--trace-root <dir> Directory for stdout/stderr logs. Default: traces/bench-command-<timestamp> in the caller directory.
|
|
52
|
+
--results <path> Results JSONL path. Defaults to <trace-root>/results.jsonl.
|
|
53
|
+
Explicit results paths are appended by default.
|
|
54
|
+
--replace Truncate --results before writing.
|
|
55
|
+
--cwd <dir> Run the command from this working directory.
|
|
56
|
+
--platform <name> Platform context, for example android or ios.
|
|
57
|
+
--device <id> Device context shared with candidate rows.
|
|
58
|
+
--app-id <id> App id/bundle id context shared with candidate rows.
|
|
59
|
+
--scenario <path> Scenario or flow identifier used by this command.
|
|
60
|
+
--app-build <id> App build fingerprint, artifact path, or CI build id.
|
|
61
|
+
--min-pass-rate <pct> Optional gate minimum.
|
|
62
|
+
--max-failures <n> Optional gate maximum.
|
|
63
|
+
--max-mean-ms <ms> Optional mean duration maximum.
|
|
64
|
+
--max-p95-ms <ms> Optional p95 duration maximum.
|
|
65
|
+
-h, --help Show this help.
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
zmr-benchmark-command \
|
|
69
|
+
--tool runner-a \
|
|
70
|
+
--runs 20 \
|
|
71
|
+
--trace-root traces/runner-a-login \
|
|
72
|
+
--results traces/comparison/results.jsonl \
|
|
73
|
+
-- runner-a test .runner-a/login.yaml
|
|
74
|
+
USAGE
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
die() {
|
|
78
|
+
echo "error: $*" >&2
|
|
79
|
+
exit 2
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
require_value() {
|
|
83
|
+
local flag="$1"
|
|
84
|
+
local value="${2-}"
|
|
85
|
+
if [[ -z "$value" || "$value" == --* ]]; then
|
|
86
|
+
die "$flag requires a value"
|
|
87
|
+
fi
|
|
88
|
+
printf '%s\n' "$value"
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
quote_cmd() {
|
|
92
|
+
local quoted=()
|
|
93
|
+
local arg
|
|
94
|
+
for arg in "$@"; do
|
|
95
|
+
quoted+=("$(printf '%q' "$arg")")
|
|
96
|
+
done
|
|
97
|
+
printf '%s\n' "${quoted[*]}"
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
RESULTS_EXPLICIT=0
|
|
101
|
+
while [[ $# -gt 0 ]]; do
|
|
102
|
+
case "$1" in
|
|
103
|
+
--tool)
|
|
104
|
+
TOOL="$(require_value "$1" "${2-}")"
|
|
105
|
+
shift 2
|
|
106
|
+
;;
|
|
107
|
+
--runs)
|
|
108
|
+
RUNS="$(require_value "$1" "${2-}")"
|
|
109
|
+
shift 2
|
|
110
|
+
;;
|
|
111
|
+
--trace-root)
|
|
112
|
+
TRACE_ROOT="$(require_value "$1" "${2-}")"
|
|
113
|
+
shift 2
|
|
114
|
+
;;
|
|
115
|
+
--results)
|
|
116
|
+
RESULTS="$(require_value "$1" "${2-}")"
|
|
117
|
+
RESULTS_EXPLICIT=1
|
|
118
|
+
shift 2
|
|
119
|
+
;;
|
|
120
|
+
--replace)
|
|
121
|
+
REPLACE=1
|
|
122
|
+
shift
|
|
123
|
+
;;
|
|
124
|
+
--cwd)
|
|
125
|
+
CWD="$(require_value "$1" "${2-}")"
|
|
126
|
+
shift 2
|
|
127
|
+
;;
|
|
128
|
+
--platform)
|
|
129
|
+
PLATFORM="$(require_value "$1" "${2-}")"
|
|
130
|
+
shift 2
|
|
131
|
+
;;
|
|
132
|
+
--device)
|
|
133
|
+
DEVICE="$(require_value "$1" "${2-}")"
|
|
134
|
+
shift 2
|
|
135
|
+
;;
|
|
136
|
+
--app-id)
|
|
137
|
+
APP_ID="$(require_value "$1" "${2-}")"
|
|
138
|
+
shift 2
|
|
139
|
+
;;
|
|
140
|
+
--scenario)
|
|
141
|
+
SCENARIO="$(require_value "$1" "${2-}")"
|
|
142
|
+
shift 2
|
|
143
|
+
;;
|
|
144
|
+
--app-build)
|
|
145
|
+
APP_BUILD="$(require_value "$1" "${2-}")"
|
|
146
|
+
shift 2
|
|
147
|
+
;;
|
|
148
|
+
--min-pass-rate)
|
|
149
|
+
MIN_PASS_RATE="$(require_value "$1" "${2-}")"
|
|
150
|
+
shift 2
|
|
151
|
+
;;
|
|
152
|
+
--max-failures)
|
|
153
|
+
MAX_FAILURES="$(require_value "$1" "${2-}")"
|
|
154
|
+
shift 2
|
|
155
|
+
;;
|
|
156
|
+
--max-mean-ms)
|
|
157
|
+
MAX_MEAN_MS="$(require_value "$1" "${2-}")"
|
|
158
|
+
shift 2
|
|
159
|
+
;;
|
|
160
|
+
--max-p95-ms)
|
|
161
|
+
MAX_P95_MS="$(require_value "$1" "${2-}")"
|
|
162
|
+
shift 2
|
|
163
|
+
;;
|
|
164
|
+
--)
|
|
165
|
+
shift
|
|
166
|
+
break
|
|
167
|
+
;;
|
|
168
|
+
-h|--help)
|
|
169
|
+
usage
|
|
170
|
+
exit 0
|
|
171
|
+
;;
|
|
172
|
+
*)
|
|
173
|
+
die "unknown argument before --: $1"
|
|
174
|
+
;;
|
|
175
|
+
esac
|
|
176
|
+
done
|
|
177
|
+
|
|
178
|
+
[[ -n "$TOOL" ]] || die "--tool cannot be empty"
|
|
179
|
+
[[ "$RUNS" =~ ^[0-9]+$ && "$RUNS" -ge 1 ]] || die "--runs must be a positive integer"
|
|
180
|
+
[[ $# -gt 0 ]] || die "command is required after --"
|
|
181
|
+
if [[ -n "$CWD" && ! -d "$CWD" ]]; then
|
|
182
|
+
die "--cwd directory not found: $CWD"
|
|
183
|
+
fi
|
|
184
|
+
|
|
185
|
+
validate_optional_number() {
|
|
186
|
+
local name="$1"
|
|
187
|
+
local value="$2"
|
|
188
|
+
if [[ -n "$value" && ! "$value" =~ ^[0-9]+([.][0-9]+)?$ ]]; then
|
|
189
|
+
die "$name must be a non-negative number"
|
|
190
|
+
fi
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
validate_optional_integer() {
|
|
194
|
+
local name="$1"
|
|
195
|
+
local value="$2"
|
|
196
|
+
if [[ -n "$value" && ! "$value" =~ ^[0-9]+$ ]]; then
|
|
197
|
+
die "$name must be a non-negative integer"
|
|
198
|
+
fi
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
validate_optional_number "--min-pass-rate" "$MIN_PASS_RATE"
|
|
202
|
+
validate_optional_integer "--max-failures" "$MAX_FAILURES"
|
|
203
|
+
validate_optional_integer "--max-mean-ms" "$MAX_MEAN_MS"
|
|
204
|
+
validate_optional_integer "--max-p95-ms" "$MAX_P95_MS"
|
|
205
|
+
|
|
206
|
+
mkdir -p "$TRACE_ROOT"
|
|
207
|
+
if [[ -z "$RESULTS" ]]; then
|
|
208
|
+
RESULTS="$TRACE_ROOT/results.jsonl"
|
|
209
|
+
fi
|
|
210
|
+
mkdir -p "$(dirname "$RESULTS")"
|
|
211
|
+
if [[ "$REPLACE" -eq 1 || "$RESULTS_EXPLICIT" -eq 0 ]]; then
|
|
212
|
+
: > "$RESULTS"
|
|
213
|
+
else
|
|
214
|
+
touch "$RESULTS"
|
|
215
|
+
fi
|
|
216
|
+
|
|
217
|
+
COMMAND=("$@")
|
|
218
|
+
metadata_args=()
|
|
219
|
+
if [[ -n "$PLATFORM" ]]; then
|
|
220
|
+
metadata_args+=(--platform "$PLATFORM")
|
|
221
|
+
fi
|
|
222
|
+
if [[ -n "$DEVICE" ]]; then
|
|
223
|
+
metadata_args+=(--device "$DEVICE")
|
|
224
|
+
fi
|
|
225
|
+
if [[ -n "$APP_ID" ]]; then
|
|
226
|
+
metadata_args+=(--app-id "$APP_ID")
|
|
227
|
+
fi
|
|
228
|
+
if [[ -n "$SCENARIO" ]]; then
|
|
229
|
+
metadata_args+=(--scenario "$SCENARIO")
|
|
230
|
+
fi
|
|
231
|
+
if [[ -n "$APP_BUILD" ]]; then
|
|
232
|
+
metadata_args+=(--app-build "$APP_BUILD")
|
|
233
|
+
fi
|
|
234
|
+
echo "Benchmark command output: $TRACE_ROOT"
|
|
235
|
+
echo "Results: $RESULTS"
|
|
236
|
+
echo "Tool: $TOOL"
|
|
237
|
+
echo "+ $(quote_cmd "${COMMAND[@]}")"
|
|
238
|
+
|
|
239
|
+
for run in $(seq 1 "$RUNS"); do
|
|
240
|
+
run_dir="$TRACE_ROOT/$TOOL-$run"
|
|
241
|
+
mkdir -p "$run_dir"
|
|
242
|
+
printf '%s\n' "$(quote_cmd "${COMMAND[@]}")" > "$run_dir/command.txt"
|
|
243
|
+
|
|
244
|
+
command_status=0
|
|
245
|
+
start_ms="$(python3 -c 'import time; print(int(time.time() * 1000))')"
|
|
246
|
+
if [[ -n "$CWD" ]]; then
|
|
247
|
+
(cd "$CWD" && "${COMMAND[@]}") > "$run_dir/stdout.log" 2> "$run_dir/stderr.log" || command_status=$?
|
|
248
|
+
else
|
|
249
|
+
"${COMMAND[@]}" > "$run_dir/stdout.log" 2> "$run_dir/stderr.log" || command_status=$?
|
|
250
|
+
fi
|
|
251
|
+
end_ms="$(python3 -c 'import time; print(int(time.time() * 1000))')"
|
|
252
|
+
duration_ms=$((end_ms - start_ms))
|
|
253
|
+
|
|
254
|
+
if [[ "${#metadata_args[@]}" -gt 0 ]]; then
|
|
255
|
+
"$ROOT/scripts/benchmark_result_row.py" \
|
|
256
|
+
--tool "$TOOL" \
|
|
257
|
+
--run "$run" \
|
|
258
|
+
--command-status "$command_status" \
|
|
259
|
+
--duration-ms "$duration_ms" \
|
|
260
|
+
--trace-dir "$run_dir" \
|
|
261
|
+
"${metadata_args[@]}" >> "$RESULTS"
|
|
262
|
+
else
|
|
263
|
+
"$ROOT/scripts/benchmark_result_row.py" \
|
|
264
|
+
--tool "$TOOL" \
|
|
265
|
+
--run "$run" \
|
|
266
|
+
--command-status "$command_status" \
|
|
267
|
+
--duration-ms "$duration_ms" \
|
|
268
|
+
--trace-dir "$run_dir" >> "$RESULTS"
|
|
269
|
+
fi
|
|
270
|
+
done
|
|
271
|
+
|
|
272
|
+
python3 - "$RESULTS" "$TOOL" <<'PY'
|
|
273
|
+
import json
|
|
274
|
+
import math
|
|
275
|
+
import statistics
|
|
276
|
+
import sys
|
|
277
|
+
|
|
278
|
+
path, tool = sys.argv[1], sys.argv[2]
|
|
279
|
+
rows = [
|
|
280
|
+
json.loads(line)
|
|
281
|
+
for line in open(path, encoding="utf-8")
|
|
282
|
+
if line.strip() and json.loads(line).get("tool") == tool
|
|
283
|
+
]
|
|
284
|
+
durations = [int(row.get("durationMs", 0)) for row in rows]
|
|
285
|
+
failures = sum(1 for row in rows if row.get("status") != "ok")
|
|
286
|
+
mean = round(statistics.mean(durations)) if durations else 0
|
|
287
|
+
p95 = sorted(durations)[max(0, math.ceil(len(durations) * 0.95) - 1)] if durations else 0
|
|
288
|
+
print(f"{tool}: runs={len(rows)} failures={failures} meanMs={mean} p95Ms={p95}")
|
|
289
|
+
PY
|
|
290
|
+
|
|
291
|
+
gate_args=()
|
|
292
|
+
if [[ -n "$MIN_PASS_RATE" ]]; then
|
|
293
|
+
gate_args+=(--min-pass-rate "$MIN_PASS_RATE")
|
|
294
|
+
fi
|
|
295
|
+
if [[ -n "$MAX_FAILURES" ]]; then
|
|
296
|
+
gate_args+=(--max-failures "$MAX_FAILURES")
|
|
297
|
+
fi
|
|
298
|
+
if [[ -n "$MAX_MEAN_MS" ]]; then
|
|
299
|
+
gate_args+=(--max-mean-ms "$MAX_MEAN_MS")
|
|
300
|
+
fi
|
|
301
|
+
if [[ -n "$MAX_P95_MS" ]]; then
|
|
302
|
+
gate_args+=(--max-p95-ms "$MAX_P95_MS")
|
|
303
|
+
fi
|
|
304
|
+
|
|
305
|
+
if [[ "${#gate_args[@]}" -gt 0 ]]; then
|
|
306
|
+
"$ROOT/scripts/benchmark_gate.py" --results "$RESULTS" "${gate_args[@]}"
|
|
307
|
+
fi
|
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
SOURCE="${BASH_SOURCE[0]}"
|
|
5
|
+
while [[ -h "$SOURCE" ]]; do
|
|
6
|
+
SOURCE_DIR="$(cd -P "$(dirname "$SOURCE")" && pwd)"
|
|
7
|
+
SOURCE="$(readlink "$SOURCE")"
|
|
8
|
+
if [[ "$SOURCE" != /* ]]; then
|
|
9
|
+
SOURCE="$SOURCE_DIR/$SOURCE"
|
|
10
|
+
fi
|
|
11
|
+
done
|
|
12
|
+
|
|
13
|
+
ROOT="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
|
|
14
|
+
CALLER_CWD="$(pwd -P)"
|
|
15
|
+
|
|
16
|
+
# Some sandboxed environments do not allow writing to the default temp directory
|
|
17
|
+
# (/var/folders, /tmp). Use a repo-local TMPDIR so adb/xcrun/mktemp/heredocs work.
|
|
18
|
+
if [[ -z "${TMPDIR:-}" || ! -w "${TMPDIR:-/nonexistent}" ]]; then
|
|
19
|
+
TMPDIR="$ROOT/traces/tmp"
|
|
20
|
+
mkdir -p "$TMPDIR"
|
|
21
|
+
export TMPDIR
|
|
22
|
+
fi
|
|
23
|
+
|
|
24
|
+
ZMR_BIN="${ZMR_BIN:-$(command -v zmr 2>/dev/null || printf '%s' "$ROOT/zig-out/bin/zmr")}"
|
|
25
|
+
RUNS="${RUNS:-5}"
|
|
26
|
+
DEVICE="${DEVICE:-}"
|
|
27
|
+
TRACE_ROOT="${TRACE_ROOT:-$CALLER_CWD/traces/bench-$(date +%Y%m%d-%H%M%S)}"
|
|
28
|
+
RESULTS=""
|
|
29
|
+
RESULTS_EXPLICIT=0
|
|
30
|
+
REPLACE=0
|
|
31
|
+
ZMR_SCENARIO=""
|
|
32
|
+
PLATFORM="${PLATFORM:-}"
|
|
33
|
+
APP_ID="${APP_ID:-}"
|
|
34
|
+
ADB="${ADB:-}"
|
|
35
|
+
ANDROID_SHIM="${ANDROID_SHIM:-}"
|
|
36
|
+
XCRUN="${XCRUN:-}"
|
|
37
|
+
IOS_SHIM="${IOS_SHIM:-}"
|
|
38
|
+
IOS_DEVICE_TYPE="${IOS_DEVICE_TYPE:-}"
|
|
39
|
+
APP_BUILD="${APP_BUILD:-}"
|
|
40
|
+
MIN_PASS_RATE="${MIN_PASS_RATE:-}"
|
|
41
|
+
MAX_FAILURES="${MAX_FAILURES:-}"
|
|
42
|
+
MAX_MEAN_MS="${MAX_MEAN_MS:-}"
|
|
43
|
+
MAX_P95_MS="${MAX_P95_MS:-}"
|
|
44
|
+
|
|
45
|
+
usage() {
|
|
46
|
+
cat <<'USAGE'
|
|
47
|
+
Usage:
|
|
48
|
+
scripts/benchmark.sh --zmr <scenario.json> --device <serial> [--runs 10] [--trace-root <dir>] [--results <path>] [gate options]
|
|
49
|
+
|
|
50
|
+
Gate options:
|
|
51
|
+
--min-pass-rate <pct> Minimum pass rate percentage, for example 100.
|
|
52
|
+
--max-failures <n> Maximum allowed failed runs.
|
|
53
|
+
--max-mean-ms <ms> Maximum allowed mean run duration.
|
|
54
|
+
--max-p95-ms <ms> Maximum allowed p95 run duration.
|
|
55
|
+
|
|
56
|
+
Output options:
|
|
57
|
+
--results <path> Results JSONL path. Defaults to <trace-root>/results.jsonl.
|
|
58
|
+
Explicit results paths are appended by default.
|
|
59
|
+
--replace Truncate --results before writing.
|
|
60
|
+
|
|
61
|
+
Forwarded ZMR options:
|
|
62
|
+
--platform <android|ios>
|
|
63
|
+
--app-id <id>
|
|
64
|
+
--adb <path>
|
|
65
|
+
--android-shim <path>
|
|
66
|
+
--xcrun <path>
|
|
67
|
+
--ios-shim <path>
|
|
68
|
+
--ios-device-type <simulator|physical>
|
|
69
|
+
--app-build <id> App build fingerprint, artifact path, or CI build id for comparison context.
|
|
70
|
+
|
|
71
|
+
Environment:
|
|
72
|
+
ZMR_BIN Path to zmr binary. Defaults to ./zig-out/bin/zmr.
|
|
73
|
+
RUNS Default run count when --runs is omitted.
|
|
74
|
+
DEVICE Default Android serial when --device is omitted.
|
|
75
|
+
TRACE_ROOT Default benchmark output root. Otherwise traces/bench-<timestamp> in the caller directory.
|
|
76
|
+
PLATFORM, APP_ID, ADB, ANDROID_SHIM, XCRUN, IOS_SHIM, IOS_DEVICE_TYPE, APP_BUILD
|
|
77
|
+
Default forwarded ZMR options when matching flags are omitted.
|
|
78
|
+
MIN_PASS_RATE, MAX_FAILURES, MAX_MEAN_MS, MAX_P95_MS
|
|
79
|
+
Default gate thresholds when matching flags are omitted.
|
|
80
|
+
USAGE
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
die() {
|
|
84
|
+
echo "error: $*" >&2
|
|
85
|
+
exit 2
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
require_value() {
|
|
89
|
+
local flag="$1"
|
|
90
|
+
local value="${2-}"
|
|
91
|
+
if [[ -z "$value" || "$value" == --* ]]; then
|
|
92
|
+
die "$flag requires a value"
|
|
93
|
+
fi
|
|
94
|
+
printf '%s\n' "$value"
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
while [[ $# -gt 0 ]]; do
|
|
98
|
+
case "$1" in
|
|
99
|
+
--zmr)
|
|
100
|
+
ZMR_SCENARIO="$(require_value "$1" "${2-}")"
|
|
101
|
+
shift 2
|
|
102
|
+
;;
|
|
103
|
+
--device)
|
|
104
|
+
DEVICE="$(require_value "$1" "${2-}")"
|
|
105
|
+
shift 2
|
|
106
|
+
;;
|
|
107
|
+
--runs)
|
|
108
|
+
RUNS="$(require_value "$1" "${2-}")"
|
|
109
|
+
shift 2
|
|
110
|
+
;;
|
|
111
|
+
--trace-root)
|
|
112
|
+
TRACE_ROOT="$(require_value "$1" "${2-}")"
|
|
113
|
+
shift 2
|
|
114
|
+
;;
|
|
115
|
+
--results)
|
|
116
|
+
RESULTS="$(require_value "$1" "${2-}")"
|
|
117
|
+
RESULTS_EXPLICIT=1
|
|
118
|
+
shift 2
|
|
119
|
+
;;
|
|
120
|
+
--replace)
|
|
121
|
+
REPLACE=1
|
|
122
|
+
shift
|
|
123
|
+
;;
|
|
124
|
+
--platform)
|
|
125
|
+
PLATFORM="$(require_value "$1" "${2-}")"
|
|
126
|
+
shift 2
|
|
127
|
+
;;
|
|
128
|
+
--app-id)
|
|
129
|
+
APP_ID="$(require_value "$1" "${2-}")"
|
|
130
|
+
shift 2
|
|
131
|
+
;;
|
|
132
|
+
--adb)
|
|
133
|
+
ADB="$(require_value "$1" "${2-}")"
|
|
134
|
+
shift 2
|
|
135
|
+
;;
|
|
136
|
+
--android-shim)
|
|
137
|
+
ANDROID_SHIM="$(require_value "$1" "${2-}")"
|
|
138
|
+
shift 2
|
|
139
|
+
;;
|
|
140
|
+
--xcrun)
|
|
141
|
+
XCRUN="$(require_value "$1" "${2-}")"
|
|
142
|
+
shift 2
|
|
143
|
+
;;
|
|
144
|
+
--ios-shim)
|
|
145
|
+
IOS_SHIM="$(require_value "$1" "${2-}")"
|
|
146
|
+
shift 2
|
|
147
|
+
;;
|
|
148
|
+
--ios-device-type)
|
|
149
|
+
IOS_DEVICE_TYPE="$(require_value "$1" "${2-}")"
|
|
150
|
+
shift 2
|
|
151
|
+
;;
|
|
152
|
+
--app-build)
|
|
153
|
+
APP_BUILD="$(require_value "$1" "${2-}")"
|
|
154
|
+
shift 2
|
|
155
|
+
;;
|
|
156
|
+
--min-pass-rate)
|
|
157
|
+
MIN_PASS_RATE="$(require_value "$1" "${2-}")"
|
|
158
|
+
shift 2
|
|
159
|
+
;;
|
|
160
|
+
--max-failures)
|
|
161
|
+
MAX_FAILURES="$(require_value "$1" "${2-}")"
|
|
162
|
+
shift 2
|
|
163
|
+
;;
|
|
164
|
+
--max-mean-ms)
|
|
165
|
+
MAX_MEAN_MS="$(require_value "$1" "${2-}")"
|
|
166
|
+
shift 2
|
|
167
|
+
;;
|
|
168
|
+
--max-p95-ms)
|
|
169
|
+
MAX_P95_MS="$(require_value "$1" "${2-}")"
|
|
170
|
+
shift 2
|
|
171
|
+
;;
|
|
172
|
+
-h|--help)
|
|
173
|
+
usage
|
|
174
|
+
exit 0
|
|
175
|
+
;;
|
|
176
|
+
*)
|
|
177
|
+
die "unknown argument: $1"
|
|
178
|
+
;;
|
|
179
|
+
esac
|
|
180
|
+
done
|
|
181
|
+
|
|
182
|
+
if [[ -z "$ZMR_SCENARIO" ]]; then
|
|
183
|
+
echo "error: --zmr is required" >&2
|
|
184
|
+
usage >&2
|
|
185
|
+
exit 2
|
|
186
|
+
fi
|
|
187
|
+
|
|
188
|
+
if [[ -z "$DEVICE" ]]; then
|
|
189
|
+
echo "error: --device or DEVICE is required" >&2
|
|
190
|
+
usage >&2
|
|
191
|
+
exit 2
|
|
192
|
+
fi
|
|
193
|
+
|
|
194
|
+
if [[ ! "$RUNS" =~ ^[0-9]+$ || "$RUNS" -lt 1 ]]; then
|
|
195
|
+
die "--runs must be a positive integer"
|
|
196
|
+
fi
|
|
197
|
+
|
|
198
|
+
if [[ ! -x "$ZMR_BIN" ]]; then
|
|
199
|
+
die "zmr binary is not executable: $ZMR_BIN"
|
|
200
|
+
fi
|
|
201
|
+
|
|
202
|
+
validate_optional_number() {
|
|
203
|
+
local name="$1"
|
|
204
|
+
local value="$2"
|
|
205
|
+
if [[ -n "$value" && ! "$value" =~ ^[0-9]+([.][0-9]+)?$ ]]; then
|
|
206
|
+
echo "$name must be a non-negative number" >&2
|
|
207
|
+
exit 2
|
|
208
|
+
fi
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
validate_optional_integer() {
|
|
212
|
+
local name="$1"
|
|
213
|
+
local value="$2"
|
|
214
|
+
if [[ -n "$value" && ! "$value" =~ ^[0-9]+$ ]]; then
|
|
215
|
+
echo "$name must be a non-negative integer" >&2
|
|
216
|
+
exit 2
|
|
217
|
+
fi
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
validate_optional_number "--min-pass-rate" "$MIN_PASS_RATE"
|
|
221
|
+
validate_optional_integer "--max-failures" "$MAX_FAILURES"
|
|
222
|
+
validate_optional_integer "--max-mean-ms" "$MAX_MEAN_MS"
|
|
223
|
+
validate_optional_integer "--max-p95-ms" "$MAX_P95_MS"
|
|
224
|
+
if [[ -n "$IOS_DEVICE_TYPE" && "$IOS_DEVICE_TYPE" != "simulator" && "$IOS_DEVICE_TYPE" != "physical" ]]; then
|
|
225
|
+
echo "--ios-device-type must be simulator or physical" >&2
|
|
226
|
+
exit 2
|
|
227
|
+
fi
|
|
228
|
+
|
|
229
|
+
mkdir -p "$TRACE_ROOT"
|
|
230
|
+
if [[ -z "$RESULTS" ]]; then
|
|
231
|
+
RESULTS="$TRACE_ROOT/results.jsonl"
|
|
232
|
+
fi
|
|
233
|
+
mkdir -p "$(dirname "$RESULTS")"
|
|
234
|
+
if [[ "$REPLACE" -eq 1 || "$RESULTS_EXPLICIT" -eq 0 ]]; then
|
|
235
|
+
: > "$RESULTS"
|
|
236
|
+
else
|
|
237
|
+
touch "$RESULTS"
|
|
238
|
+
fi
|
|
239
|
+
|
|
240
|
+
run_one() {
|
|
241
|
+
local tool="$1"
|
|
242
|
+
local run="$2"
|
|
243
|
+
local command_status=0
|
|
244
|
+
local start_ms end_ms duration_ms trace_dir
|
|
245
|
+
local -a zmr_args=()
|
|
246
|
+
local -a metadata_args=()
|
|
247
|
+
|
|
248
|
+
trace_dir="$TRACE_ROOT/$tool-$run"
|
|
249
|
+
mkdir -p "$trace_dir"
|
|
250
|
+
if [[ -n "$PLATFORM" ]]; then
|
|
251
|
+
zmr_args+=(--platform "$PLATFORM")
|
|
252
|
+
fi
|
|
253
|
+
if [[ -n "$APP_ID" ]]; then
|
|
254
|
+
zmr_args+=(--app-id "$APP_ID")
|
|
255
|
+
fi
|
|
256
|
+
if [[ -n "$ADB" ]]; then
|
|
257
|
+
zmr_args+=(--adb "$ADB")
|
|
258
|
+
fi
|
|
259
|
+
if [[ -n "$ANDROID_SHIM" ]]; then
|
|
260
|
+
zmr_args+=(--android-shim "$ANDROID_SHIM")
|
|
261
|
+
fi
|
|
262
|
+
if [[ -n "$XCRUN" ]]; then
|
|
263
|
+
zmr_args+=(--xcrun "$XCRUN")
|
|
264
|
+
fi
|
|
265
|
+
if [[ -n "$IOS_SHIM" ]]; then
|
|
266
|
+
zmr_args+=(--ios-shim "$IOS_SHIM")
|
|
267
|
+
fi
|
|
268
|
+
if [[ -n "$IOS_DEVICE_TYPE" ]]; then
|
|
269
|
+
zmr_args+=(--ios-device-type "$IOS_DEVICE_TYPE")
|
|
270
|
+
fi
|
|
271
|
+
if [[ -n "$PLATFORM" ]]; then
|
|
272
|
+
metadata_args+=(--platform "$PLATFORM")
|
|
273
|
+
fi
|
|
274
|
+
if [[ -n "$DEVICE" ]]; then
|
|
275
|
+
metadata_args+=(--device "$DEVICE")
|
|
276
|
+
fi
|
|
277
|
+
if [[ -n "$APP_ID" ]]; then
|
|
278
|
+
metadata_args+=(--app-id "$APP_ID")
|
|
279
|
+
fi
|
|
280
|
+
if [[ -n "$ZMR_SCENARIO" ]]; then
|
|
281
|
+
metadata_args+=(--scenario "$ZMR_SCENARIO")
|
|
282
|
+
fi
|
|
283
|
+
if [[ -n "$APP_BUILD" ]]; then
|
|
284
|
+
metadata_args+=(--app-build "$APP_BUILD")
|
|
285
|
+
fi
|
|
286
|
+
start_ms="$(python3 -c 'import time; print(int(time.time() * 1000))')"
|
|
287
|
+
if [[ "${#zmr_args[@]}" -gt 0 ]]; then
|
|
288
|
+
"$ZMR_BIN" run "$ZMR_SCENARIO" --device "$DEVICE" "${zmr_args[@]}" --trace-dir "$trace_dir" || command_status=$?
|
|
289
|
+
else
|
|
290
|
+
"$ZMR_BIN" run "$ZMR_SCENARIO" --device "$DEVICE" --trace-dir "$trace_dir" || command_status=$?
|
|
291
|
+
fi
|
|
292
|
+
|
|
293
|
+
end_ms="$(python3 -c 'import time; print(int(time.time() * 1000))')"
|
|
294
|
+
duration_ms=$((end_ms - start_ms))
|
|
295
|
+
|
|
296
|
+
if [[ "${#metadata_args[@]}" -gt 0 ]]; then
|
|
297
|
+
"$ROOT/scripts/benchmark_result_row.py" \
|
|
298
|
+
--tool "$tool" \
|
|
299
|
+
--run "$run" \
|
|
300
|
+
--command-status "$command_status" \
|
|
301
|
+
--duration-ms "$duration_ms" \
|
|
302
|
+
--trace-dir "$trace_dir" \
|
|
303
|
+
"${metadata_args[@]}" >> "$RESULTS"
|
|
304
|
+
else
|
|
305
|
+
"$ROOT/scripts/benchmark_result_row.py" \
|
|
306
|
+
--tool "$tool" \
|
|
307
|
+
--run "$run" \
|
|
308
|
+
--command-status "$command_status" \
|
|
309
|
+
--duration-ms "$duration_ms" \
|
|
310
|
+
--trace-dir "$trace_dir" >> "$RESULTS"
|
|
311
|
+
fi
|
|
312
|
+
|
|
313
|
+
return "$command_status"
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
for run in $(seq 1 "$RUNS"); do
|
|
317
|
+
run_one zmr "$run" || true
|
|
318
|
+
done
|
|
319
|
+
|
|
320
|
+
python3 - "$RESULTS" <<'PY'
|
|
321
|
+
import json
|
|
322
|
+
import math
|
|
323
|
+
import statistics
|
|
324
|
+
import sys
|
|
325
|
+
from collections import defaultdict
|
|
326
|
+
|
|
327
|
+
path = sys.argv[1]
|
|
328
|
+
rows = [json.loads(line) for line in open(path, encoding="utf-8") if line.strip()]
|
|
329
|
+
by_tool = defaultdict(list)
|
|
330
|
+
for row in rows:
|
|
331
|
+
by_tool[row["tool"]].append(row)
|
|
332
|
+
|
|
333
|
+
for tool, items in sorted(by_tool.items()):
|
|
334
|
+
durations = [item["durationMs"] for item in items]
|
|
335
|
+
failures = sum(1 for item in items if item["status"] != "ok")
|
|
336
|
+
mean = round(statistics.mean(durations)) if durations else 0
|
|
337
|
+
p95 = sorted(durations)[max(0, math.ceil(len(durations) * 0.95) - 1)] if durations else 0
|
|
338
|
+
print(f"{tool}: runs={len(items)} failures={failures} meanMs={mean} p95Ms={p95}")
|
|
339
|
+
|
|
340
|
+
print(f"results={path}")
|
|
341
|
+
PY
|
|
342
|
+
|
|
343
|
+
gate_args=()
|
|
344
|
+
if [[ -n "$MIN_PASS_RATE" ]]; then
|
|
345
|
+
gate_args+=(--min-pass-rate "$MIN_PASS_RATE")
|
|
346
|
+
fi
|
|
347
|
+
if [[ -n "$MAX_FAILURES" ]]; then
|
|
348
|
+
gate_args+=(--max-failures "$MAX_FAILURES")
|
|
349
|
+
fi
|
|
350
|
+
if [[ -n "$MAX_MEAN_MS" ]]; then
|
|
351
|
+
gate_args+=(--max-mean-ms "$MAX_MEAN_MS")
|
|
352
|
+
fi
|
|
353
|
+
if [[ -n "$MAX_P95_MS" ]]; then
|
|
354
|
+
gate_args+=(--max-p95-ms "$MAX_P95_MS")
|
|
355
|
+
fi
|
|
356
|
+
|
|
357
|
+
if [[ "${#gate_args[@]}" -gt 0 ]]; then
|
|
358
|
+
"$ROOT/scripts/benchmark_gate.py" --results "$RESULTS" "${gate_args[@]}"
|
|
359
|
+
fi
|