mobile-debug-mcp 0.21.5 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/AGENTS.md +74 -0
  2. package/README.md +24 -5
  3. package/dist/interact/classify.js +35 -0
  4. package/dist/interact/index.js +220 -13
  5. package/dist/network/index.js +232 -0
  6. package/dist/observe/ios.js +10 -3
  7. package/dist/server-core.js +822 -0
  8. package/dist/server.js +6 -693
  9. package/dist/utils/resolve-device.js +15 -3
  10. package/docs/CHANGELOG.md +10 -1
  11. package/docs/tools/interact.md +69 -30
  12. package/package.json +3 -3
  13. package/skills/README.md +35 -0
  14. package/skills/test-authoring/SKILL.md +57 -0
  15. package/skills/test-authoring/references/repo-test-layout.md +47 -0
  16. package/skills/test-authoring/references/test-authoring-workflow.md +73 -0
  17. package/skills/test-authoring/references/test-quality-checklist.md +39 -0
  18. package/src/interact/classify.ts +64 -0
  19. package/src/interact/index.ts +250 -13
  20. package/src/network/index.ts +268 -0
  21. package/src/observe/ios.ts +12 -3
  22. package/src/server-core.ts +879 -0
  23. package/src/server.ts +8 -754
  24. package/src/types.ts +10 -1
  25. package/src/utils/resolve-device.ts +19 -3
  26. package/test/device/automated/observe/capture_screenshot.android.smoke.ts +30 -0
  27. package/test/device/automated/observe/capture_screenshot.ios.smoke.ts +30 -0
  28. package/test/{observe/device → device/automated/observe}/get_logs.android.smoke.ts +1 -1
  29. package/test/{observe/device → device/automated/observe}/get_logs.ios.smoke.ts +1 -1
  30. package/test/device/automated/observe/get_ui_tree.android.smoke.ts +31 -0
  31. package/test/device/automated/observe/get_ui_tree.ios.smoke.ts +31 -0
  32. package/test/device/index.ts +52 -0
  33. package/test/{interact/device/smoke-test.ts → device/manual/interact/app_lifecycle.manual.ts} +5 -5
  34. package/test/{manage/device/run-build-install-ios.ts → device/manual/manage/build_install_ios.manual.ts} +1 -1
  35. package/test/{manage/device → device/manual/manage}/install.integration.ts +6 -6
  36. package/test/{manage/device/run-install-android.ts → device/manual/manage/install_android.manual.ts} +1 -1
  37. package/test/{manage/device/run-install-ios.ts → device/manual/manage/install_ios.manual.ts} +1 -1
  38. package/test/device/manual/observe/capture_screenshot.manual.ts +29 -0
  39. package/test/{helpers/run-get-logs.ts → device/manual/observe/get_logs.manual.ts} +1 -1
  40. package/test/device/manual/observe/get_ui_tree.manual.ts +29 -0
  41. package/test/{observe/device/logstream-real.ts → device/manual/observe/logstream.manual.ts} +1 -1
  42. package/test/{observe/device/run-screen-fingerprint.ts → device/manual/observe/screen_fingerprint.manual.ts} +1 -1
  43. package/test/{observe/device/run-scroll-test-android.ts → device/manual/observe/scroll_to_element_android.manual.ts} +1 -1
  44. package/test/{observe/device/test-ui-tree.ts → device/manual/observe/ui_tree.manual.ts} +6 -6
  45. package/test/unit/index.ts +47 -27
  46. package/test/unit/interact/classify_action_outcome.test.ts +110 -0
  47. package/test/unit/interact/handler_shapes.test.ts +55 -0
  48. package/test/unit/interact/tap_element.test.ts +170 -0
  49. package/test/unit/interact/wait_for_screen_change.test.ts +34 -0
  50. package/test/{interact/unit → unit/interact}/wait_for_ui_contract.test.ts +11 -10
  51. package/test/unit/interact/wait_for_ui_selector_matching.test.ts +76 -0
  52. package/test/unit/manage/handler_shapes.test.ts +43 -0
  53. package/test/unit/network/get_network_activity.test.ts +181 -0
  54. package/test/{observe/unit → unit/observe}/capture_debug_snapshot.test.ts +5 -1
  55. package/test/{observe/unit → unit/observe}/find_element.test.ts +12 -6
  56. package/test/unit/observe/get_screen_fingerprint.test.ts +71 -0
  57. package/test/unit/observe/ios-getlogs.test.ts +53 -0
  58. package/test/unit/observe/scroll_to_element.test.ts +127 -0
  59. package/test/unit/server/contract.test.ts +45 -0
  60. package/test/unit/server/response_shapes.test.ts +93 -0
  61. package/test/unit/system/adb_version.test.ts +35 -0
  62. package/test/unit/system/get_system_status.test.ts +20 -0
  63. package/test/unit/system/system_status.test.ts +141 -0
  64. package/test/{utils → unit/utils}/detect_java.test.ts +1 -1
  65. package/test/unit/utils/exec.test.ts +51 -0
  66. package/test/unit/utils/resolve_device.test.ts +63 -0
  67. package/tsconfig.json +2 -2
  68. package/test/interact/device/run-real-test.ts +0 -3
  69. package/test/interact/unit/wait_for_screen_change.test.ts +0 -32
  70. package/test/interact/unit/wait_for_ui.test.ts +0 -76
  71. package/test/interact/unit/wait_for_ui_new.test.ts +0 -57
  72. package/test/observe/device/wait_for_element_real.ts +0 -3
  73. package/test/observe/unit/get_screen_fingerprint.test.ts +0 -69
  74. package/test/observe/unit/ios-getlogs.test.ts +0 -67
  75. package/test/observe/unit/scroll_to_element.test.ts +0 -129
  76. package/test/observe/unit/wait_for_element_mock.ts +0 -2
  77. package/test/observe/unit/wait_for_ui_edge_cases.test.ts +0 -41
  78. package/test/observe/unit/wait_for_ui_stability.test.ts +0 -30
  79. package/test/system/adb_version.test.ts +0 -25
  80. package/test/system/get_system_status.test.ts +0 -52
  81. package/test/system/system_status.test.ts +0 -109
  82. /package/test/{manage/unit → unit/manage}/build.test.ts +0 -0
  83. /package/test/{manage/unit → unit/manage}/build_and_install.test.ts +0 -0
  84. /package/test/{manage/unit → unit/manage}/detection.test.ts +0 -0
  85. /package/test/{manage/unit → unit/manage}/diagnostics.test.ts +0 -0
  86. /package/test/{manage/unit → unit/manage}/install.test.ts +0 -0
  87. /package/test/{manage/unit → unit/manage}/mcp_disable_autodetect.test.ts +0 -0
  88. /package/test/{observe/unit → unit/observe}/get_logs.test.ts +0 -0
  89. /package/test/{observe/unit → unit/observe}/logparse.test.ts +0 -0
  90. /package/test/{observe/unit → unit/observe}/logstream.test.ts +0 -0
@@ -10,15 +10,27 @@ function parseNumericVersion(v) {
10
10
  const minor = parseInt(m[2] || "0", 10) || 0;
11
11
  return major + minor / 100;
12
12
  }
13
+ let androidDeviceLister = listAndroidDevices;
14
+ let iosDeviceLister = listIOSDevices;
15
+ export function _setDeviceListersForTests(overrides) {
16
+ if (overrides.listAndroidDevices)
17
+ androidDeviceLister = overrides.listAndroidDevices;
18
+ if (overrides.listIOSDevices)
19
+ iosDeviceLister = overrides.listIOSDevices;
20
+ }
21
+ export function _resetDeviceListersForTests() {
22
+ androidDeviceLister = listAndroidDevices;
23
+ iosDeviceLister = listIOSDevices;
24
+ }
13
25
  export async function listDevices(platform, appId) {
14
26
  if (!platform || platform === "android") {
15
- const android = await listAndroidDevices(appId);
27
+ const android = await androidDeviceLister(appId);
16
28
  if (platform === "android")
17
29
  return android;
18
- const ios = await listIOSDevices(appId);
30
+ const ios = await iosDeviceLister(appId);
19
31
  return [...android, ...ios];
20
32
  }
21
- return listIOSDevices(appId);
33
+ return iosDeviceLister(appId);
22
34
  }
23
35
  export async function resolveTargetDevice(opts) {
24
36
  const { platform, appId, prefer, deviceId } = opts;
package/docs/CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
2
 
3
3
  All notable changes to the **Mobile Debug MCP** project will be documented in this file.
4
4
 
5
+ ## [0.23.0]
6
+ - Added network monitoring
7
+ - Added
8
+
9
+ ## [0.22.0]
10
+ - Added a portable `test-authoring` skill package and documented the repository's vendor-neutral skill format
11
+ - Added `AGENTS.md` as a top-level cold-start guide for autonomous agents entering the public repository
12
+ - Added `tap_element` for deterministic action execution on previously resolved `wait_for_ui` elements
13
+
5
14
  ## [0.21.5]
6
15
  - Fixed incorrect timeout
7
16
 
@@ -52,7 +61,7 @@ All notable changes to the **Mobile Debug MCP** project will be documented in th
52
61
 
53
62
  ## [0.16.0]
54
63
  - Added `wait_for_screen_change` interact tool: polls the platform-specific `get_screen_fingerprint` until it differs from a provided `previousFingerprint`, with configurable `timeoutMs` and `pollIntervalMs` and an optional stability confirmation poll to avoid reacting to transient UI flickers. Implemented at the interact layer and delegates fingerprinting to the observe implementations (Android/iOS).
55
- - Added unit tests covering immediate change, transient null fingerprints, stability confirmation and timeout behavior: `test/interact/unit/wait_for_screen_change.test.ts`.
64
+ - Added unit tests covering immediate change, transient null fingerprints, stability confirmation and timeout behavior: `test/unit/interact/wait_for_screen_change.test.ts`.
56
65
 
57
66
  ## [0.15.0]
58
67
  - Reorganised repository for cohesion: merged tool handlers into feature entrypoints (src/observe, src/interact, src/manage) and moved platform helpers and CLI tooling into src/utils/{android,ios,cli}.
@@ -50,7 +50,7 @@ Notes:
50
50
  - Visibility check uses element.bounds intersecting the device resolution when available; falls back to the element.visible flag if bounds/resolution are missing.
51
51
  - The tool fingerprints the visible UI between scrolls; if the fingerprint doesn't change after a swipe the tool stops early assuming end-of-list.
52
52
  - Android swipe uses `adb shell input swipe` with screen percentage coordinates. iOS swipe uses `idb ui swipe` command; note `idb` swipe does not accept a duration argument.
53
- - Unit tests are located at `test/unit/observe/scroll_to_element.test.ts` and device runners at `test/device/observe/`.
53
+ - Unit tests are located at `test/unit/observe/scroll_to_element.test.ts`, automated device smoke checks under `test/device/automated/...`, and manual device runners under `test/device/manual/...`.
54
54
 
55
55
  ---
56
56
 
@@ -129,57 +129,96 @@ Notes:
129
129
  - `tapCoordinates` are the recommended center point to use for `tap` calls.
130
130
  - `confidence` mirrors the internal scoring (0..1) and is suitable for telemetry or logging to decide whether to proceed with an automated action.
131
131
  - The tool favours actionable (clickable/focusable) targets; when a matching node is not directly actionable, it finds the smallest containing clickable ancestor.
132
- - Unit tests for edge cases (parent-clickable child-text, resource-id matches, fuzzy matching) are under `test/observe/unit/find_element.test.ts`.
132
+ - Unit tests for edge cases (parent-clickable child-text, resource-id matches, fuzzy matching) are under `test/unit/observe/find_element.test.ts`.
133
133
 
134
134
  ---
135
135
 
136
136
  ## wait_for_ui
137
137
 
138
138
  Purpose:
139
- - Wait for a condition to occur on the device: UI element appearance, a log line, a screen fingerprint change, or an idle/stable screen state.
140
-
141
- Supported types and behavior:
142
- - ui: Delegates to `find_element` to perform a semantic search of the UI tree. Returns the matched element descriptor (including tapCoordinates) when found.
143
- - log: Reads the active log stream (via `start_log_stream`/`readLogStreamHandler`) and falls back to a snapshot of recent logs (`getLogsHandler`). Matches when the query substring appears in a new log line after a captured baseline.
144
- - screen: Compares screen fingerprints (visual checks) against an initial baseline and returns when fingerprint changes. If `query` is provided it will attempt a `find_element` on the new screen to validate the expected content.
145
- - idle: Waits until the screen fingerprint remains stable for a short stability window (default 1000ms).
139
+ - Deterministically wait for a UI selector match and return the matched element metadata.
146
140
 
147
141
  Input (ToolsInteract.waitForUIHandler):
148
142
  ```
149
- { "type": "ui|log|screen|idle", "query": "optional string", "timeoutMs": 5000, "pollIntervalMs": 200, "platform": "android|ios", "deviceId": "optional device id" }
143
+ {
144
+ "selector": { "text": "optional", "resource_id": "optional", "accessibility_id": "optional", "contains": false },
145
+ "condition": "exists|not_exists|visible|clickable",
146
+ "timeout_ms": 60000,
147
+ "poll_interval_ms": 300,
148
+ "match": { "index": 0 },
149
+ "retry": { "max_attempts": 1, "backoff_ms": 0 },
150
+ "platform": "android|ios",
151
+ "deviceId": "optional device id"
152
+ }
150
153
  ```
151
154
 
152
155
  Success response highlights:
153
- - success: true
154
- - type: requested type
155
- - matched: true
156
- - details: human-friendly explanation
157
- - timestamp: epoch ms
158
- - element: (for ui/screen when matched) actionable element metadata with tapCoordinates
159
- - log: (for log) matched log message and raw entry
160
- - newFingerprint: (for screen) new fingerprint value
156
+ - status: `success`
157
+ - matched: number of matches found in the current poll
158
+ - element: matched element metadata including `elementId`
159
+ - metrics: latency, poll count, attempts
161
160
 
162
161
  Failure/timeout response:
163
- - success: false
164
- - error or reason: explanation
165
- - type: requested type
166
- - timeoutMs: value used
162
+ - status: `timeout`
163
+ - error: structured error with `code` and `message`
164
+ - metrics: latency, poll count, attempts
167
165
 
168
166
  Notes & tips:
169
- - Defaults (timeoutMs=5000, pollIntervalMs=200) balance responsiveness with device query overhead; adjust in tests or scripts as needed.
170
- - For UI-sensitive flows prefer type='ui' rather than relying solely on visual fingerprint changes, as some UI updates don't alter the fingerprint.
167
+ - `wait_for_ui` is responsible for **resolution only**.
168
+ - Successful responses now include an `elementId` that can be passed to `tap_element`.
169
+ - This enables the deterministic loop: **observe -> act -> verify**.
171
170
 
172
171
  Tests:
173
- - Unit: `test/interact/unit/wait_for_ui.test.ts`
174
- - Device runner: `test/interact/device/wait_for_ui_device.ts` (requires devices/emulators and adb/xcrun in PATH)
172
+ - Unit: `test/unit/interact/wait_for_ui_contract.test.ts` and `test/unit/interact/wait_for_ui_selector_matching.test.ts`
173
+ - Automated device checks now live under `test/device/automated/...`; manual/debug runners live under `test/device/manual/...` (requires devices/emulators and adb/xcrun in PATH)
175
174
 
176
175
  Example:
177
176
  ```
178
- // Wait up to 5s for a button labeled "Generate Session" on Android
179
- ToolsInteract.waitForUIHandler({ type: 'ui', query: 'Generate Session', timeoutMs: 5000, platform: 'android' })
177
+ ToolsInteract.waitForUIHandler({
178
+ selector: { text: 'Generate Session' },
179
+ condition: 'clickable',
180
+ timeout_ms: 5000,
181
+ platform: 'android'
182
+ })
180
183
  ```
181
184
 
182
185
  Troubleshooting:
183
- - If wait_for_ui(log) never matches, ensure log streaming is started for the target package and baseline logs captured correctly.
184
- - If wait_for_ui(screen) times out despite visible UI change, try type='ui' to validate content-level changes.
186
+ - If `wait_for_ui` times out, confirm the selector is precise and that the current UI tree exposes the expected text, resource ID, or accessibility ID.
187
+
188
+ ## tap_element
189
+
190
+ Purpose:
191
+ - Execute a tap against a UI element that has already been resolved by `wait_for_ui`.
192
+
193
+ Input:
194
+ ```
195
+ { "elementId": "el_..." }
196
+ ```
185
197
 
198
+ Behavior:
199
+ - validates that the element still exists in the current UI context
200
+ - validates that the element is visible
201
+ - validates that the element is enabled
202
+ - performs the tap using the resolved element bounds
203
+
204
+ Success response:
205
+ ```
206
+ { "success": true, "elementId": "el_123", "action": "tap" }
207
+ ```
208
+
209
+ Failure response:
210
+ ```
211
+ {
212
+ "success": false,
213
+ "elementId": "el_123",
214
+ "action": "tap",
215
+ "error": { "code": "element_not_found|element_not_visible|element_not_enabled", "message": "..." }
216
+ }
217
+ ```
218
+
219
+ Notes:
220
+ - `tap_element` does **not** accept selectors.
221
+ - `tap_element` does **not** perform lookup, waiting, retries, or ambiguity resolution.
222
+ - Migration pattern for selector-based flows is:
223
+ 1. `wait_for_ui(selector)`
224
+ 2. `tap_element(elementId)`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mobile-debug-mcp",
3
- "version": "0.21.5",
3
+ "version": "0.23.0",
4
4
  "description": "MCP server for mobile app debugging (Android + iOS), with focus on security and reliability",
5
5
  "type": "module",
6
6
  "bin": {
@@ -13,8 +13,8 @@
13
13
  "healthcheck": "tsx ./src/cli/idb/check-idb.ts",
14
14
  "install-idb": "tsx ./src/cli/idb/install-idb.ts",
15
15
  "preflight-ios": "tsx ./src/cli/ios/preflight-ios.ts",
16
- "test:unit": "SKIP_DEVICE_TESTS=1 tsx test/unit/index.ts",
17
- "test:integration": "npm run build && tsx test/device/index.ts",
16
+ "test:unit": "tsx test/unit/index.ts",
17
+ "test:integration": "npm run test:device",
18
18
  "test:device": "npm run build && tsx test/device/index.ts",
19
19
  "test": "npm run test:unit",
20
20
  "lint": "eslint --ext .ts,.js src test --quiet",
@@ -0,0 +1,35 @@
1
+ # Skills
2
+
3
+ This repository stores agent skills as **plain Markdown packages** so they can be consumed by different agent systems, including Copilot, Codex, Claude, or custom internal agents.
4
+
5
+ ## Portability rules
6
+
7
+ 1. Keep the entrypoint in `skills/<skill-name>/SKILL.md`.
8
+ 2. Use simple, readable sections instead of vendor-specific syntax or hidden metadata.
9
+ 3. Keep `SKILL.md` short and task-oriented.
10
+ 4. Put detailed guidance in `skills/<skill-name>/references/*.md`.
11
+ 5. Describe behavior in terms of **inputs, outputs, workflow, and constraints**, not a specific product's tool API.
12
+ 6. When a skill depends on repository conventions, link to the exact repo paths and commands the agent should inspect or run.
13
+
14
+ ## Recommended SKILL.md structure
15
+
16
+ - Title
17
+ - `name`
18
+ - `version`
19
+ - `summary`
20
+ - Purpose
21
+ - Activation conditions
22
+ - Surface area (actions)
23
+ - Core guidance
24
+ - Inputs & outputs
25
+ - Failure handling
26
+ - Progressive disclosure
27
+ - References
28
+
29
+ ## Repository-specific expectation
30
+
31
+ Skills in this repo should be:
32
+
33
+ - **generic enough** to be followed by different agents
34
+ - **specific enough** to encode this repository's working conventions
35
+ - **lightweight** so agents can load them quickly and fetch references only when needed
@@ -0,0 +1,57 @@
1
+ # Test Authoring skill
2
+
3
+ name: test-authoring
4
+ version: 0.1.0
5
+ summary: Reusable guidance for adding or updating tests in this repository without tying the workflow to a single agent product.
6
+
7
+ # Purpose
8
+ Help an agent create, extend, or reorganize tests while following this repository's current conventions for unit tests, automated device smoke tests, and manual device helpers.
9
+
10
+ # Activation conditions
11
+ Activate when an agent needs to:
12
+ - add a new test for existing code
13
+ - update tests after changing tool behavior or response contracts
14
+ - move tests into the correct unit, automated device, or manual device location
15
+ - improve coverage for handlers, server contracts, or shared utilities
16
+
17
+ # Surface area (actions)
18
+ - choose-test-type
19
+ - place-test-file
20
+ - mirror-existing-pattern
21
+ - add-or-update-fixtures
22
+ - validate-test-command
23
+ - document-test-scope
24
+
25
+ # Core guidance
26
+ 1. Prefer the **smallest reliable test type**: unit test first, automated device smoke test only when real platform integration matters, manual helper only when automation is not practical.
27
+ 2. Follow the repository's current automated test style: self-running `tsx` scripts with explicit assertions and non-zero exit on failure.
28
+ 3. Put automated unit tests under `test/unit/...`; the unit runner automatically executes every `*.test.ts` file there.
29
+ 4. Put automated device smoke tests under `test/device/automated/...`; put human-invoked helpers under `test/device/manual/...`.
30
+ 5. Reuse existing mocking seams and test hooks before creating new ones.
31
+ 6. When testing user-facing tools, protect **response shape**, required fields, and documented defaults.
32
+ 7. Keep tests deterministic: isolate process-global mocks, prefer fake binaries or injected helpers over shared mutable module state.
33
+ 8. Validate with the repository commands that already exist instead of inventing new runners.
34
+
35
+ # Inputs & outputs
36
+ - choose-test-type(input: { changedPaths[], behaviorType, requiresRealDevice?: boolean }) -> { recommendedType: 'unit'|'device-automated'|'device-manual', rationale, targetPath }
37
+ - place-test-file(input: { featureArea, recommendedType }) -> { filePath, namingPattern, runner }
38
+ - mirror-existing-pattern(input: { targetPath }) -> { referenceFiles[], patternSummary }
39
+ - validate-test-command(input: { scope: 'unit'|'device'|'repo' }) -> { commands[] }
40
+ - document-test-scope(input: { testPath, purpose }) -> { docsToUpdate[] }
41
+
42
+ # Failure handling
43
+ - If the correct test type is ambiguous, prefer unit coverage unless the behavior depends on real device/simulator integration.
44
+ - If a test requires brittle global monkeypatching, isolate it in a subprocess-friendly way or add a narrow injection seam.
45
+ - If device automation is too environment-dependent, keep the helper under `test/device/manual/...` and add a smaller automated smoke test around a stable contract.
46
+
47
+ # Progressive disclosure
48
+ - Keep this file short.
49
+ - Load detailed references only when deciding placement, matching existing style, or validating new tests.
50
+
51
+ # References
52
+ - `references/repo-test-layout.md` — where tests belong and what each tree means
53
+ - `references/test-authoring-workflow.md` — step-by-step workflow for creating or changing tests
54
+ - `references/test-quality-checklist.md` — what a good test in this repo should prove before it is considered done
55
+
56
+ # License
57
+ Same as repository (MIT).
@@ -0,0 +1,47 @@
1
+ # Repository test layout
2
+
3
+ Use this reference when deciding **where** a new test belongs.
4
+
5
+ ## Automated unit tests
6
+
7
+ - Location: `test/unit/...`
8
+ - Naming: `*.test.ts`
9
+ - Execution: `npm run test:unit`
10
+ - Runner behavior: `test/unit/index.ts` discovers every `*.test.ts` file and runs each one in an isolated `tsx` subprocess
11
+
12
+ Best for:
13
+ - handler logic
14
+ - schema and contract defaults
15
+ - response-shape validation
16
+ - shared utility behavior
17
+ - deterministic edge cases
18
+
19
+ ## Automated device smoke tests
20
+
21
+ - Location: `test/device/automated/...`
22
+ - Naming: usually `*.smoke.ts` or `*.integration.ts`
23
+ - Execution: `npm run test:device`
24
+ - Runner behavior: `test/device/index.ts` runs only the automated subtree
25
+
26
+ Best for:
27
+ - real Android/iOS tool wiring
28
+ - smoke-level confidence that a command works end-to-end
29
+ - checks that need real `adb`, `xcrun`, simulator, or device behavior
30
+
31
+ ## Manual device helpers
32
+
33
+ - Location: `test/device/manual/...`
34
+ - Purpose: scripts that humans or agents can invoke directly for ad hoc validation
35
+ - Not part of the default automated test commands
36
+
37
+ Best for:
38
+ - workflows that require app-specific arguments
39
+ - environment-specific debugging
40
+ - high-friction scenarios not suitable for default CI-like execution
41
+
42
+ ## Placement rules
43
+
44
+ 1. If the behavior can be validated with mocks, fake binaries, or narrow test hooks, prefer `test/unit/...`.
45
+ 2. If the behavior depends on real platform tooling, add an automated smoke test under `test/device/automated/...`.
46
+ 3. If the behavior requires manual setup, project-specific app IDs, or ad hoc arguments, add a helper under `test/device/manual/...`.
47
+ 4. Avoid leaving obsolete tests in the wrong tree; move or delete them so tree meaning stays clear.
@@ -0,0 +1,73 @@
1
+ # Test authoring workflow
2
+
3
+ Use this workflow when adding or updating tests in this repository.
4
+
5
+ ## 1. Identify the behavior boundary
6
+
7
+ Classify the change:
8
+
9
+ - pure utility or parser logic
10
+ - handler / response contract logic
11
+ - server registration or schema wiring
12
+ - real platform integration
13
+
14
+ This usually determines the cheapest useful test type.
15
+
16
+ ## 2. Inspect nearby examples
17
+
18
+ Before writing a new test, inspect tests already covering the same area:
19
+
20
+ - `test/unit/server/...` for tool registration and response-shape tests
21
+ - `test/unit/interact/...` for handler tests and UI wait behavior
22
+ - `test/unit/manage/...` for build/install flows and fake toolchain binaries
23
+ - `test/unit/observe/...` for observe response contracts
24
+ - `test/unit/utils/...` for shared helper coverage
25
+ - `test/device/automated/...` for smoke test wrappers
26
+ - `test/device/manual/...` for JSON-producing helper scripts used by smoke tests
27
+
28
+ ## 3. Match the existing style
29
+
30
+ For automated tests in this repo:
31
+
32
+ - use a self-running script
33
+ - use `assert` or explicit checks
34
+ - print a small success message
35
+ - `process.exit(1)` on failure
36
+
37
+ For unit tests that need isolation:
38
+
39
+ - patch only what is required
40
+ - restore original values in `finally`
41
+ - prefer injected seams or fake binaries over broad module mutation
42
+
43
+ ## 4. Prefer existing seams
44
+
45
+ Before adding a new hook, search for:
46
+
47
+ - existing test-only setters/resetters
48
+ - environment-variable based indirection
49
+ - fake executable patterns already used in system/manage tests
50
+
51
+ Only add a new seam when the current code cannot be tested safely otherwise.
52
+
53
+ ## 5. Protect the contract, not just the happy path
54
+
55
+ When testing tools or handlers, cover:
56
+
57
+ - expected success shape
58
+ - required fields
59
+ - key defaults
60
+ - at least one failure or invalid-input path
61
+
62
+ ## 6. Validate with repo commands
63
+
64
+ Use only the existing validation entrypoints:
65
+
66
+ - `npm run build`
67
+ - `npm run lint`
68
+ - `npm run test:unit`
69
+ - `SKIP_DEVICE_TESTS=1 npm run test:device` for automated device wrappers when real devices are not part of the current validation run
70
+
71
+ ## 7. Update docs only when structure or expectations changed
72
+
73
+ If the test layout, naming, or default commands change, update repo docs. If you only add another test following existing rules, docs may not need changes.
@@ -0,0 +1,39 @@
1
+ # Test quality checklist
2
+
3
+ A new test in this repo is high quality when it does most of the following:
4
+
5
+ ## Contract quality
6
+
7
+ - proves the right behavior, not just that code executed
8
+ - asserts on meaningful fields rather than only truthy values
9
+ - protects documented defaults where drift would matter
10
+ - covers at least one failure path when the behavior can fail
11
+
12
+ ## Determinism
13
+
14
+ - does not depend on arbitrary timing unless the behavior is inherently time-based
15
+ - restores mocks, env vars, and monkeypatches in `finally`
16
+ - avoids leaking state into later tests
17
+ - runs reliably in an isolated subprocess
18
+
19
+ ## Placement quality
20
+
21
+ - lives in the correct tree (`test/unit`, `test/device/automated`, or `test/device/manual`)
22
+ - uses file naming consistent with the surrounding tests
23
+ - does not leave obsolete duplicate coverage in a different tree
24
+
25
+ ## Maintainability
26
+
27
+ - reuses existing helpers, fake-binary patterns, or test seams
28
+ - keeps fixtures as small as possible
29
+ - makes the failure obvious from the assertion message or output
30
+ - stays focused on one behavior cluster instead of becoming a broad scenario script
31
+
32
+ ## Completion bar
33
+
34
+ Before considering the work done, the author should be able to say:
35
+
36
+ 1. The test is in the correct place.
37
+ 2. The test matches the current repo style.
38
+ 3. The test would fail if the protected behavior regressed.
39
+ 4. The normal repository validation commands still pass.
@@ -0,0 +1,64 @@
1
+ export type ActionOutcome = 'success' | 'no_op' | 'backend_failure' | 'ui_failure' | 'unknown'
2
+ export type NetworkRequestStatus = 'success' | 'failure' | 'retryable'
3
+
4
+ export interface NetworkRequest {
5
+ endpoint: string
6
+ status: NetworkRequestStatus
7
+ }
8
+
9
+ export interface ClassifyActionOutcomeInput {
10
+ uiChanged: boolean
11
+ expectedElementVisible?: boolean | null
12
+ /** null = get_network_activity has not been called yet */
13
+ networkRequests?: NetworkRequest[] | null
14
+ hasLogErrors?: boolean | null
15
+ }
16
+
17
+ export interface ClassifyActionOutcomeResult {
18
+ outcome: ActionOutcome
19
+ reasoning: string
20
+ /** Present when the caller must call get_network_activity before a final classification is possible */
21
+ nextAction?: 'call_get_network_activity'
22
+ }
23
+
24
+ /**
25
+ * Pure deterministic classifier. Applies rules in fixed order.
26
+ * Same inputs always produce the same output.
27
+ */
28
+ export function classifyActionOutcome(input: ClassifyActionOutcomeInput): ClassifyActionOutcomeResult {
29
+ const { uiChanged, expectedElementVisible, networkRequests, hasLogErrors } = input
30
+
31
+ // Step 1 — UI signal is positive
32
+ if (uiChanged || expectedElementVisible === true) {
33
+ return { outcome: 'success', reasoning: expectedElementVisible === true ? 'expected element is visible' : 'UI changed after action' }
34
+ }
35
+
36
+ // Step 2 — UI did not change; network signal is required
37
+ if (networkRequests === null || networkRequests === undefined) {
38
+ return {
39
+ outcome: 'unknown',
40
+ reasoning: 'UI did not change; get_network_activity must be called before classification can proceed',
41
+ nextAction: 'call_get_network_activity'
42
+ }
43
+ }
44
+
45
+ // Step 3 — any network failure
46
+ const failedRequest = networkRequests.find((r) => r.status === 'failure' || r.status === 'retryable')
47
+ if (failedRequest) {
48
+ return { outcome: 'backend_failure', reasoning: `network request ${failedRequest.endpoint} returned ${failedRequest.status}` }
49
+ }
50
+
51
+ // Step 4 — no network requests at all
52
+ if (networkRequests.length === 0) {
53
+ const logNote = hasLogErrors ? ' (log errors present)' : ''
54
+ return { outcome: 'no_op', reasoning: `no UI change and no network activity${logNote}` }
55
+ }
56
+
57
+ // Step 5 — network requests exist and all succeeded
58
+ if (networkRequests.every((r) => r.status === 'success')) {
59
+ return { outcome: 'ui_failure', reasoning: 'network requests succeeded but UI did not change' }
60
+ }
61
+
62
+ // Step 6 — fallback
63
+ return { outcome: 'unknown', reasoning: 'signals are inconclusive' }
64
+ }