@pzy560117/opentest 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/assets/manifest.json +12 -1
  2. package/assets/skills/opentest/references/api-testing.md +77 -0
  3. package/assets/skills/opentest/references/codex-harness-coverage-heuristics.md +17 -1
  4. package/assets/skills/opentest/references/complete-testing-workflow.md +27 -0
  5. package/assets/skills/opentest/references/desktop-gui-testing.md +52 -0
  6. package/assets/skills/opentest/references/matrix-format.md +12 -2
  7. package/assets/skills/opentest/references/opentest-driven-development.md +10 -0
  8. package/assets/skills/opentest/references/test-asset-layout.md +64 -0
  9. package/assets/skills/opentest/references/test-surfaces.md +101 -0
  10. package/assets/skills/opentest/references/web-browser-testing.md +40 -0
  11. package/assets/skills/opentest/templates/acceptance-template.md +3 -1
  12. package/assets/skills/opentest/templates/api-acceptance-template.md +44 -0
  13. package/assets/skills/opentest/templates/desktop-gui-acceptance-template.md +43 -0
  14. package/assets/skills/opentest/templates/matrix-template.md +12 -11
  15. package/assets/skills/opentest/templates/plan-template.md +2 -2
  16. package/assets/skills/opentest/templates/web-acceptance-template.md +27 -0
  17. package/assets/skills/opentest-accept/SKILL.md +15 -6
  18. package/assets/skills/opentest-api/SKILL.md +25 -0
  19. package/assets/skills/opentest-author/SKILL.md +7 -5
  20. package/assets/skills/opentest-desktop-gui/SKILL.md +24 -0
  21. package/assets/skills/opentest-plan/SKILL.md +15 -9
  22. package/assets/skills/opentest-run/SKILL.md +14 -8
  23. package/assets/skills/opentest-web-browser/SKILL.md +26 -0
  24. package/assets/skills-zh/opentest/references/api-testing.md +77 -0
  25. package/assets/skills-zh/opentest/references/codex-harness-coverage-heuristics.md +17 -1
  26. package/assets/skills-zh/opentest/references/complete-testing-workflow.md +27 -0
  27. package/assets/skills-zh/opentest/references/desktop-gui-testing.md +52 -0
  28. package/assets/skills-zh/opentest/references/matrix-format.md +12 -2
  29. package/assets/skills-zh/opentest/references/opentest-driven-development.md +10 -0
  30. package/assets/skills-zh/opentest/references/test-asset-layout.md +64 -0
  31. package/assets/skills-zh/opentest/references/test-surfaces.md +101 -0
  32. package/assets/skills-zh/opentest/references/web-browser-testing.md +40 -0
  33. package/assets/skills-zh/opentest/templates/acceptance-template.md +3 -1
  34. package/assets/skills-zh/opentest/templates/api-acceptance-template.md +44 -0
  35. package/assets/skills-zh/opentest/templates/desktop-gui-acceptance-template.md +43 -0
  36. package/assets/skills-zh/opentest/templates/matrix-template.md +12 -11
  37. package/assets/skills-zh/opentest/templates/plan-template.md +2 -2
  38. package/assets/skills-zh/opentest/templates/web-acceptance-template.md +27 -0
  39. package/assets/skills-zh/opentest-accept/SKILL.md +14 -5
  40. package/assets/skills-zh/opentest-api/SKILL.md +25 -0
  41. package/assets/skills-zh/opentest-author/SKILL.md +7 -5
  42. package/assets/skills-zh/opentest-desktop-gui/SKILL.md +24 -0
  43. package/assets/skills-zh/opentest-plan/SKILL.md +13 -7
  44. package/assets/skills-zh/opentest-run/SKILL.md +13 -7
  45. package/assets/skills-zh/opentest-web-browser/SKILL.md +26 -0
  46. package/package.json +1 -1
  47. package/scripts/smoke-test.js +309 -1
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "0.1.9",
2
+ "version": "0.1.11",
3
3
  "languages": [
4
4
  {
5
5
  "id": "en",
@@ -60,25 +60,36 @@
60
60
  "localized": [
61
61
  "opentest/SKILL.md",
62
62
  "opentest/references/acceptance-evidence.md",
63
+ "opentest/references/api-testing.md",
63
64
  "opentest/references/codex-harness-coverage-heuristics.md",
64
65
  "opentest/references/command-routing.md",
65
66
  "opentest/references/complete-testing-workflow.md",
67
+ "opentest/references/desktop-gui-testing.md",
66
68
  "opentest/references/lifecycle.md",
67
69
  "opentest/references/matrix-format.md",
68
70
  "opentest/references/opentest-driven-development.md",
69
71
  "opentest/references/quality-gate.md",
72
+ "opentest/references/test-asset-layout.md",
73
+ "opentest/references/test-surfaces.md",
74
+ "opentest/references/web-browser-testing.md",
70
75
  "opentest/templates/acceptance-template.md",
76
+ "opentest/templates/api-acceptance-template.md",
71
77
  "opentest/templates/archive-layout.md",
78
+ "opentest/templates/desktop-gui-acceptance-template.md",
72
79
  "opentest/templates/fixtures-template.md",
73
80
  "opentest/templates/matrix-template.md",
74
81
  "opentest/templates/plan-template.md",
75
82
  "opentest/templates/report-template.md",
83
+ "opentest/templates/web-acceptance-template.md",
76
84
  "opentest-accept/SKILL.md",
85
+ "opentest-api/SKILL.md",
77
86
  "opentest-archive/SKILL.md",
78
87
  "opentest-author/SKILL.md",
79
88
  "opentest-heal/SKILL.md",
80
89
  "opentest-plan/SKILL.md",
81
90
  "opentest-run/SKILL.md",
91
+ "opentest-desktop-gui/SKILL.md",
92
+ "opentest-web-browser/SKILL.md",
82
93
  "opentest-verify/SKILL.md"
83
94
  ],
84
95
  "shared": [
@@ -0,0 +1,77 @@
1
+ # API Testing
2
+
3
+ Use this reference for `api` execution-surface rows.
4
+
5
+ ## Default Architecture
6
+
7
+ Use the repository's existing API test framework first. If the project has no clear API test command, default to:
8
+
9
+ ```text
10
+ pytest
11
+ -> httpx or requests client
12
+ -> pytest fixtures for seed/teardown
13
+ -> jsonschema or existing Pydantic/DTO models for contract assertions
14
+ -> optional DB/storage/log read-back
15
+ -> pytest report or JUnit XML
16
+ ```
17
+
18
+ Use Docker Compose, testcontainers, or the repository's local service runner when dependencies need isolation. Mock or stub third-party APIs by default; live external services require an explicit requirement and recorded risk.
19
+
20
+ Place durable API assets under `tests/api/` from `opentest/references/test-asset-layout.md`: clients in `tests/api/clients/`, fixtures in `tests/api/fixtures/`, schemas in `tests/api/schemas/`, and repeatable entry through `scripts/opentest-run-api.ps1` or the repository's equivalent command.
21
+
22
+ ## Evidence Layers
23
+
24
+ | Layer | Proves | Typical command/tool |
25
+ | --- | --- | --- |
26
+ | contract | status code, headers, response fields, schema, error shape | project contract tests, `pytest`, OpenAPI-based checks |
27
+ | integration | API handler, service, database/storage, queue/log side effects | project integration command, `pytest`, local services |
28
+ | smoke | base URL and critical endpoints are alive | project smoke command, small `pytest`/curl script |
29
+ | security-review | auth, authorization, sensitive field exposure, injection risk | targeted tests plus review notes |
30
+
31
+ ## Required API Cases
32
+
33
+ For API changes, include applicable matrix rows for:
34
+
35
+ - happy path: expected status, payload, headers, and business state
36
+ - validation failure: invalid, empty, boundary, malformed, unsupported fields
37
+ - auth and permission: unauthenticated, expired token, wrong role, object-level authorization
38
+ - not found and stale state: missing resource, deleted resource, stale version
39
+ - conflict and idempotency: duplicate create, repeated submit, retry with idempotency key, concurrent update
40
+ - rate limit or throttling when applicable
41
+ - pagination, filtering, sorting, and empty result when list endpoints are changed
42
+ - data consistency: response, DB/storage, emitted event, queue message, file, or log
43
+ - teardown/cleanup: created resources removed or isolated fixture namespace reset
44
+
45
+ ## Contract Source
46
+
47
+ Prefer contract sources in this order:
48
+
49
+ 1. OpenAPI/protobuf/schema file committed in the repository.
50
+ 2. Existing request/response DTO, Pydantic model, serializer, or typed client.
51
+ 3. Requirement/design document with explicit fields and errors.
52
+ 4. Handwritten schema in the acceptance case.
53
+
54
+ Do not infer contract solely from current implementation behavior when it conflicts with requirements.
55
+
56
+ ## Blocking Rules
57
+
58
+ Record `blocked` when any required prerequisite is missing:
59
+
60
+ - base URL or service start command
61
+ - auth token, role, or test user
62
+ - fixture seed/teardown path
63
+ - dependency service, database, queue, or mock server
64
+ - stable contract source or expected schema
65
+ - deterministic read-after-write surface
66
+
67
+ Do not mark API acceptance as PASS from a 2xx response alone. Write operations require read-after-write evidence from a trustworthy surface such as API read endpoint, DB/storage record, queue/event/log, or another project-owned state surface.
68
+
69
+ ## Matrix Requirements
70
+
71
+ `api` rows must include:
72
+
73
+ - `Execution surface`: `api`
74
+ - `Acceptance mode`: `n/a`
75
+ - `Evidence layer`: `contract`, `integration`, `smoke`, or `security-review`
76
+ - `Framework/command`: project API command, `python -m pytest tests/api -v`, curl/httpie script, Postman/Newman when already used, or contract tool
77
+ - `Required evidence`: request/response record, status code, payload/schema assertion, auth/permission assertion when applicable, read-after-write/data consistency, and cleanup/teardown proof
@@ -13,6 +13,9 @@ This reference extracts short rules from the local Codex Harness knowledge base,
13
13
  - `tdd-workflow`
14
14
  - `e2e-runner`
15
15
  - `browser-e2e-testing`
16
+ - `android-midscene-pytest`
17
+ - `opentest-desktop-gui`
18
+ - `opentest-api`
16
19
  - `verification-loop`
17
20
  - `code-reviewer`
18
21
  - `speckit-checklist`
@@ -28,6 +31,19 @@ This reference extracts short rules from the local Codex Harness knowledge base,
28
31
  | Permissions, payments, security, data writes, cross-page loops | high-risk acceptance or E2E evidence |
29
32
  | Copy, configuration, small non-behavioral changes | targeted review or light evidence |
30
33
 
34
+ ## Execution Surface Selection
35
+
36
+ Choose the execution surface separately from the evidence layer:
37
+
38
+ | Surface | Default route |
39
+ | --- | --- |
40
+ | `web-browser` | Chrome DevTools MCP, Playwright CLI, or browser acceptance |
41
+ | `android-app` | `android-midscene-pytest` when available; `python -m pytest tests_py -v` drives Midscene Android through ADB |
42
+ | `desktop-gui` | `opentest-desktop-gui`; project GUI automation first, `@midscene/computer` for visual/native/RDP GUI flows, or scripted manual GUI acceptance when automation is unavailable |
43
+ | `api` | `opentest-api`; project API/integration command first, otherwise `pytest` with `httpx`/`requests`, schema checks, fixtures, read-after-write, and cleanup/teardown |
44
+
45
+ Do not classify code checks such as unit, component, integration, contract, smoke, or security review as execution surfaces. Use them as evidence layers.
46
+
31
47
  ## Frontend Acceptance Dimensions
32
48
 
33
49
  Frontend or real workflow acceptance may choose applicable items from the following dimensions. Full coverage is not required every time:
@@ -74,7 +90,7 @@ The OpenTest plan phase checks these questions by default. If applicable, add th
74
90
  | ACC-001 | User sees success feedback after save | medium | UI acceptance | pending |
75
91
  ```
76
92
 
77
- Add coverage dimension, command, evidence path, and blocker reason columns only when risk or change type requires them.
93
+ Add execution surface, evidence layer, command/tool, evidence path, and blocker reason columns when the matrix drives acceptance execution or risk requires them.
78
94
 
79
95
  ## Quality Gate Heuristics
80
96
 
@@ -8,6 +8,33 @@ Use this reference only when the phase skill asks for detailed coverage rules.
8
8
  plan -> matrix -> fixtures -> tests -> run -> accept -> smoke -> pre-push -> verify -> archive
9
9
  ```
10
10
 
11
+ ## Requirement-First Acceptance
12
+
13
+ `plan` and `author` happen before implementation and must turn requirements into acceptance contracts. Use requirements, design notes, user workflows, business rules, and risk boundaries as sources. Use current code only to discover execution facts such as commands, existing frameworks, routes, fixtures, and reusable helpers.
14
+
15
+ Unit, component, integration, contract, E2E, smoke, and browser acceptance are evidence layers. They describe how to prove a requirement after or during implementation; they do not decide what the requirement is. If code does not exist yet, keep the acceptance case and mark code-dependent evidence as pending or blocked with a reason.
16
+
17
+ ## Execution Surfaces
18
+
19
+ Every matrix row must name both an execution surface and an evidence layer. The execution surface is where the requirement is exercised; the evidence layer is how the result is proven.
20
+
21
+ Before authoring tests, select the fixed asset layout from `opentest/references/test-asset-layout.md`. Option B, the standard framework skeleton, is the default; one-off scripts are allowed only for explicitly non-durable acceptance or blocked investigation.
22
+
23
+ Primary execution surfaces are:
24
+
25
+ - `web-browser`: browser-rendered pages and web apps
26
+ - `android-app`: Android APK/app GUI on emulator or device
27
+ - `desktop-gui`: native desktop GUI, Electron, Tauri, or similar app UI
28
+ - `api`: HTTP API, RPC, backend workflow, contract, or service endpoint
29
+
30
+ Do not use unit, component, integration, contract, smoke, or security review as the execution surface. Those are evidence layers or run gates. If an Android GUI requirement is present, route acceptance through the `android-midscene-pytest` skill when available and require pytest/Midscene/screenshot/logcat evidence. If native desktop GUI behavior is present, route acceptance through `opentest-desktop-gui` and require project GUI automation or `@midscene/computer` evidence plus screenshots, GUI action logs, window/app metadata, and deterministic read-back. If API behavior is present, route acceptance through `opentest-api` and require contract, status code, payload/schema, auth/permission, read-after-write, and cleanup/teardown evidence when applicable.
31
+
32
+ For `web-browser`, choose an acceptance mode from `opentest/references/web-browser-testing.md`. MCP and Playwright CLI are immediate acceptance routes; durable regression requires a committed repeatable test such as `@playwright/test`.
33
+
34
+ For `desktop-gui`, use `opentest/references/desktop-gui-testing.md`. Electron/Tauri DOM-verifiable flows can stay in `web-browser`; native shell, tray, file picker, menu, OS dialog, installer, updater, RDP, and multi-window behavior stay in `desktop-gui`.
35
+
36
+ For `api`, use `opentest/references/api-testing.md`. Project API/integration commands are preferred; without them, use `pytest` with `httpx` or `requests`, schema checks, fixtures, and deterministic read-back.
37
+
11
38
  ## Test Data
12
39
 
13
40
  Create `docs/opentest/fixtures/` for changes that touch data, files, roles, permissions, APIs, or stateful workflows.
@@ -0,0 +1,52 @@
1
+ # Desktop GUI Testing
2
+
3
+ Use this reference for `desktop-gui` execution-surface rows.
4
+
5
+ Durable desktop GUI assets follow `opentest/references/test-asset-layout.md`: scripts under `tests/desktop/scripts/`, Midscene assets under `tests/desktop/midscene/`, metadata captures under `tests/desktop/metadata/`, and repeatable entry through `scripts/opentest-run-desktop.ps1` or the project GUI command.
6
+
7
+ ## Tool Routes
8
+
9
+ | Route | Use when | Required evidence |
10
+ | --- | --- | --- |
11
+ | project GUI automation | The repository already has a repeatable desktop automation command | command, report/log, screenshot or recording, post-action read-back |
12
+ | `@midscene/computer` | Native desktop controls, weak selectors, visual workflows, multi-window flows, or Windows RDP need AI visual assistance | Midscene/computer run log, screenshots, model env status, window/app metadata, deterministic read-back |
13
+ | accessibility/window metadata | Native controls expose stable accessibility tree, title, process, window handle, or menu state | metadata dump, action log, expected state assertion |
14
+ | scripted manual GUI acceptance | No reliable automation exists and the acceptance is one-off | exact steps, screenshots, window/app metadata, observed result, blocker/risk note |
15
+
16
+ ## Midscene Desktop Route
17
+
18
+ `@midscene/computer` is the Midscene desktop automation package. It can control local Windows, macOS, and Linux desktops, and can control remote Windows desktops through RDP when configured.
19
+
20
+ Use it as an OpenTest visual automation layer, not as the whole quality gate:
21
+
22
+ ```text
23
+ desktop-gui matrix row
24
+ -> project launch / environment check
25
+ -> @midscene/computer or project GUI automation
26
+ -> screenshots + GUI action log + window/app metadata
27
+ -> deterministic read/assert changed result
28
+ ```
29
+
30
+ For Electron or Tauri, first decide whether the requirement is DOM-verifiable. DOM-verifiable flows belong to `web-browser`; shell, tray, file picker, native menu, OS dialog, installer, updater, and multi-window behavior belong to `desktop-gui`.
31
+
32
+ ## Blocking Rules
33
+
34
+ Record `blocked` when any required prerequisite is missing:
35
+
36
+ - model credentials for Midscene visual automation
37
+ - desktop access, display, or RDP session
38
+ - app launch command or target process/window identity
39
+ - stable fixture data or reset/teardown path
40
+ - deterministic result surface after writes
41
+
42
+ Do not mark `desktop-gui` acceptance as PASS from an AI visual assertion alone. After create/update/delete/save actions, re-read a trustworthy result surface: reopened window state, file/config value, app storage/API, accessibility metadata, process/window metadata, or visible persisted value after restart.
43
+
44
+ ## Matrix Requirements
45
+
46
+ `desktop-gui` rows must include:
47
+
48
+ - `Execution surface`: `desktop-gui`
49
+ - `Acceptance mode`: `n/a`
50
+ - `Evidence layer`: `gui-acceptance`, `visual-acceptance`, `integration`, or `smoke`
51
+ - `Framework/command`: project GUI command, `@midscene/computer`, accessibility/window metadata script, or scripted manual GUI route
52
+ - `Required evidence`: screenshots or recording, GUI action log, window/app metadata, deterministic read-back, and blocked prerequisites when unavailable
@@ -2,8 +2,14 @@
2
2
 
3
3
  ## Minimal Columns
4
4
 
5
- | ID | Intent | Trigger/Input | Expected behavior | Risk | Evidence layer | Required evidence | Status |
6
- | --- | --- | --- | --- | --- | --- | --- | --- |
5
+ | ID | Requirement source | Intent | Execution surface | Acceptance mode | Trigger/Input | Expected behavior | Risk | Evidence layer | Required evidence | Status |
6
+ | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
7
+
8
+ `Requirement source` is mandatory. Use a requirement ID, design section, user story, business rule, risk note, issue, or explicit user request. Do not use a function name, component path, or existing test file as the source of acceptance.
9
+
10
+ `Execution surface` is mandatory and should be one of `web-browser`, `android-app`, `desktop-gui`, or `api`.
11
+
12
+ `Acceptance mode` is mandatory for `web-browser`: `instant-acceptance`, `durable-regression`, or `visual-ai-assist`.
7
13
 
8
14
  ## Optional Columns
9
15
 
@@ -17,6 +23,8 @@ Only add optional columns when risk or change type needs them.
17
23
 
18
24
  ## Evidence Layers
19
25
 
26
+ Evidence layers describe how a requirement will be proven. They do not create or limit requirements.
27
+
20
28
  - `unit`: pure functions, validation rules, state calculation.
21
29
  - `component`: form feedback, button states, local UI states.
22
30
  - `integration`: module collaboration, API client, state management, mock server.
@@ -24,4 +32,6 @@ Only add optional columns when risk or change type needs them.
24
32
  - `e2e`: cross-page flows, login, permissions, critical business loops.
25
33
  - `smoke`: key pages or happy paths do not crash.
26
34
  - `browser-acceptance`: real browser interaction, feedback location, responsive and visual state.
35
+ - `visual-acceptance`: visual GUI behavior on Android app or desktop GUI surfaces.
36
+ - `gui-acceptance`: desktop GUI behavior, window state, dialogs, menus, and native controls.
27
37
  - `security-review`: permissions, sensitive information, authorization bypass, duplicate submit, injection risk.
@@ -16,6 +16,14 @@ Traditional TDD can easily cover only explicit requirements and happy paths. Rea
16
16
 
17
17
  These scenarios should enter the acceptance-to-test matrix before evidence layers are selected.
18
18
 
19
+ ## Requirement-First Contract
20
+
21
+ OpenTest `plan` and `author` phases produce a requirement acceptance contract before implementation. Acceptance cases must come from requirements, design notes, user workflows, business rules, risk boundaries, and expected interaction feedback.
22
+
23
+ Current code may be inspected to discover project facts such as existing test frameworks, commands, routes, fixtures, or reusable helpers. Current code must not decide whether a requirement is accepted, which user-visible behavior is required, or whether a requirement can be dropped.
24
+
25
+ Evidence layers such as `unit`, `component`, `integration`, `contract`, or `e2e` describe how the requirement will be proven after or during implementation. They are not requirement sources. If no code exists yet, the matrix still records the required behavior and marks implementation-dependent evidence as pending.
26
+
19
27
  ## Recommended Order
20
28
 
21
29
  ```text
@@ -42,6 +50,8 @@ Requirement / OpenSuper design
42
50
 
43
51
  ## Output Requirements
44
52
 
53
+ - Every matrix row must cite a requirement source, such as a requirement ID, design section, user story, business rule, risk note, or explicit user request.
54
+ - Acceptance wording must be implementation-independent: describe user-observable behavior and business result, not current function names, component internals, or existing test files.
45
55
  - Every required scenario must have an evidence layer and execution surface.
46
56
  - blocked is not pass; it must include a reason and recovery path.
47
57
  - Missing evidence for high-risk scenarios defaults to fail unless the user explicitly accepts the risk and the reason is written down.
@@ -0,0 +1,64 @@
1
+ # Test Asset Layout
2
+
3
+ OpenTest default test assets use Option B: a standard framework skeleton. Do not decide directories ad hoc during `author`.
4
+
5
+ ## Default Layout
6
+
7
+ ```text
8
+ tests/
9
+ api/
10
+ conftest.py
11
+ clients/
12
+ fixtures/
13
+ schemas/
14
+ test_contract_*.py
15
+ test_permissions_*.py
16
+ test_crud_*.py
17
+ web/
18
+ playwright/
19
+ midscene/
20
+ android/
21
+ tests_py/
22
+ midscene/
23
+ desktop/
24
+ scripts/
25
+ midscene/
26
+ metadata/
27
+
28
+ docs/opentest/
29
+ matrix.md
30
+ fixtures/
31
+ acceptance/
32
+ runs/
33
+ reports/
34
+
35
+ scripts/
36
+ opentest-run-api.ps1
37
+ opentest-run-web.ps1
38
+ opentest-run-android.ps1
39
+ opentest-run-desktop.ps1
40
+ ```
41
+
42
+ Use the closest existing project directories when they already exist, but keep the same logical slots: surface tests under `tests/<surface>/`, evidence under `docs/opentest/`, and repeatable entry scripts under `scripts/opentest-run-*.ps1` or the repository's equivalent command.
43
+
44
+ ## Shape Rules
45
+
46
+ - The default is not a large QA platform. It is a stable skeleton for repeatable tests, fixtures, reports, and run entry points.
47
+ - Do not create one-off scripts as the durable path. One-off scripts may be used only for `instant-acceptance` or blocked investigation evidence.
48
+ - Do not create a separate top-level QA project unless the user explicitly chooses a team-scale template.
49
+ - `author` creates or updates assets only inside the chosen layout.
50
+ - `run` invokes fixed entry commands, not newly invented paths.
51
+ - `accept` records evidence under `docs/opentest/acceptance/` and run artifacts under `docs/opentest/runs/` or `docs/opentest/reports/`.
52
+
53
+ ## Surface Mapping
54
+
55
+ | Surface | Durable test location | Default run entry |
56
+ | --- | --- | --- |
57
+ | `api` | `tests/api/` | `scripts/opentest-run-api.ps1` or `python -m pytest tests/api -v` |
58
+ | `web-browser` | `tests/web/playwright/` and `tests/web/midscene/` | `scripts/opentest-run-web.ps1` or project E2E command |
59
+ | `android-app` | `tests/android/tests_py/` and `tests/android/midscene/` | `scripts/opentest-run-android.ps1` or `python -m pytest tests_py -v` in existing Android harness |
60
+ | `desktop-gui` | `tests/desktop/scripts/`, `tests/desktop/midscene/`, `tests/desktop/metadata/` | `scripts/opentest-run-desktop.ps1` or project GUI command |
61
+
62
+ ## When To Use A Lighter Shape
63
+
64
+ Use a lighter script-only shape only when the matrix row is explicitly one-off, exploratory, or blocked. Mark it as not durable regression and record the reason in `gap/blocker`.
@@ -0,0 +1,101 @@
1
+ # Test Surfaces
2
+
3
+ OpenTest classifies acceptance execution by surface and evidence layer:
4
+
5
+ - `Execution surface` is where the requirement is exercised from the user's or caller's point of view.
6
+ - `Evidence layer` is how the requirement is proven, for example unit, integration, contract, e2e, smoke, or security review.
7
+ - Test assets use the fixed layout in `opentest/references/test-asset-layout.md`; do not invent directories during authoring.
8
+
9
+ Keep the primary execution surface to one of these four values:
10
+
11
+ | Surface | Use when | Default acceptance path | Required artifacts |
12
+ | --- | --- | --- | --- |
13
+ | `web-browser` | Browser-rendered web pages, web apps, admin consoles, SaaS, dashboards | Use `opentest-web-browser`: Playwright MCP first, Playwright CLI fallback, `@playwright/test` for durable regression, Midscene only for visual assist | screenshots, snapshots, post-submit assertions, console/network notes, trace/report when durable |
14
+ | `android-app` | Android APK or Android app GUI on emulator/device | Use the `android-midscene-pytest` skill when available: pytest orchestrates, `@midscene/android` executes visual automation, ADB/emulator controls device | pytest report, Midscene HTML report, screenshots, logcat, device/app metadata |
15
+ | `desktop-gui` | Native desktop GUI or Electron/Tauri/Windows/macOS/Linux app UI | Use `opentest-desktop-gui`: project GUI automation first; `@midscene/computer` for visual desktop automation, weak selectors, native controls, multi-window flows, or RDP; scripted manual GUI acceptance only when automation is unavailable | screenshots or recording, GUI action log, window/app metadata, deterministic read-back, failure capture |
16
+ | `api` | HTTP API, RPC, backend workflow, contract, service endpoint | Use `opentest-api`: project API/integration command first; otherwise `pytest` with `httpx` or `requests`, schema checks, fixtures, and read-after-write evidence | request/response records, status codes, payload/schema assertions, auth/permission results, data consistency, cleanup/teardown, logs |
17
+
18
+ Do not invent a fifth primary surface. Code checks such as `unit`, `component`, `integration`, `contract`, `smoke`, and `security-review` are evidence layers or run gates, not primary surfaces.
19
+
20
+ ## Surface vs Evidence Layer
21
+
22
+ Examples:
23
+
24
+ | Requirement | Execution surface | Evidence layer |
25
+ | --- | --- | --- |
26
+ | User submits a web form and sees the saved item | `web-browser` | `browser-acceptance` + `integration` |
27
+ | Android user creates a task in the app | `android-app` | `e2e` + `visual-acceptance` |
28
+ | Desktop app opens a settings dialog and saves a preference | `desktop-gui` | `gui-acceptance` + `integration` |
29
+ | Client creates an entity through REST API | `api` | `contract` + `integration` |
30
+
31
+ ## Android App Surface
32
+
33
+ For Android GUI work, route through `android-midscene-pytest` when installed:
34
+
35
+ ```text
36
+ python -m pytest tests_py -v
37
+ -> npm/Vitest wrapper
38
+ -> @midscene/android
39
+ -> ADB + Android emulator/device
40
+ -> screenshots + logcat + Midscene HTML report
41
+ ```
42
+
43
+ Route selection:
44
+
45
+ - Stable automation, demos, and repeatable reports: use `pytest -> npm/Vitest -> @midscene/android -> ADB`.
46
+ - One-off natural-language exploration: Midscene YAML runner is optional, but it must not replace the pytest entry.
47
+ - Agent-controlled Android: Midscene MCP is optional only when separately configured with `MIDSCENE_MCP_ANDROID_MODE=true`; do not write global MCP config automatically.
48
+ - Pure Python stack: evaluate `midscene-python` only when the user explicitly asks.
49
+
50
+ Layered run:
51
+
52
+ - User-facing entry is `python -m pytest tests_py -v`.
53
+ - pytest should check ADB, prepare emulator/device, install APK, run ADB smoke, and collect evidence before Midscene.
54
+ - Run `npm run test:android` only when model environment variables are complete.
55
+ - Run `npm run test:android` directly only to debug the Midscene layer.
56
+
57
+ If Midscene model credentials, ADB, emulator/device, APK path, or package name are missing, record `blocked` with the exact missing prerequisite. Do not mark Android GUI acceptance as pass from a static screenshot alone.
58
+
59
+ Failure evidence should include any available `midscene_run/log/ai-call.log`, `midscene_run/log/agent.log`, `midscene_run/log/android-device.log`, and `midscene_run/report/*.html`.
60
+
61
+ ## Web Browser Surface
62
+
63
+ For `web-browser`, read `opentest/references/web-browser-testing.md` or use `opentest-web-browser`.
64
+
65
+ Set `Acceptance mode`:
66
+
67
+ - `instant-acceptance`: Playwright MCP first, Playwright CLI fallback.
68
+ - `durable-regression`: `@playwright/test` or the repository's existing E2E framework.
69
+ - `visual-ai-assist`: Midscene for weak selectors, canvas, cross-frame UI, or visual matching.
70
+
71
+ Do not treat MCP or Playwright CLI evidence as durable regression by itself.
72
+
73
+ ## Desktop GUI Surface
74
+
75
+ For `desktop-gui`, read `opentest/references/desktop-gui-testing.md` or use `opentest-desktop-gui`.
76
+
77
+ Route selection:
78
+
79
+ - Prefer explicit project GUI automation when the repository already provides a repeatable command.
80
+ - Use `@midscene/computer` for native controls, visual workflows, weak selectors, multi-window flows, or Windows RDP that needs AI visual assistance.
81
+ - For Electron or Tauri, use `web-browser` when the requirement is DOM-verifiable; keep native shell, tray, file picker, menu, OS dialog, installer, updater, and multi-window behavior under `desktop-gui`.
82
+ - Scripted manual GUI acceptance is a fallback for one-off evidence, not durable regression.
83
+
84
+ Do not record `desktop-gui` as PASS from an AI visual assertion alone. Save/create/update/delete flows must include screenshots or recording, GUI action log, window/app metadata, and a deterministic read/assert changed result after reopening, restarting, or reading a trusted app/file/config state.
85
+
86
+ ## API Surface
87
+
88
+ For `api`, read `opentest/references/api-testing.md` or use `opentest-api`.
89
+
90
+ Route selection:
91
+
92
+ - Prefer explicit project API, integration, contract, or smoke commands.
93
+ - If no project command exists, default to `python -m pytest tests/api -v` with `httpx` or `requests`, `jsonschema` or existing Pydantic/DTO models, and pytest fixtures for seed/teardown.
94
+ - Use OpenAPI, protobuf, schema files, DTOs, serializers, typed clients, or requirement docs as contract sources.
95
+ - Mock or stub third-party APIs unless the requirement explicitly needs live external services.
96
+
97
+ Do not record `api` as PASS from a 2xx response alone. API writes must include request/response records, payload/schema assertions, auth/permission checks when applicable, read-after-write/data consistency, and cleanup or teardown proof.
98
+
99
+ ## Matrix Rule
100
+
101
+ Every matrix row must include both `Execution surface` and `Evidence layer`. `web-browser` rows must also include `Acceptance mode`. If a requirement needs more than one surface or mode, split it into separate rows or state the primary surface and add secondary evidence in `Required evidence`.
@@ -0,0 +1,40 @@
1
+ # Web Browser Testing
2
+
3
+ Use this reference for `web-browser` execution-surface rows.
4
+
5
+ Durable web assets follow `opentest/references/test-asset-layout.md`: Playwright tests under `tests/web/playwright/`, Midscene visual assists under `tests/web/midscene/`, and repeatable entry through `scripts/opentest-run-web.ps1` or the repository's existing E2E command.
6
+
7
+ ## Acceptance Modes
8
+
9
+ | Mode | Use when | Default tool | Required evidence |
10
+ | --- | --- | --- | --- |
11
+ | `instant-acceptance` | Prove the current change in a real browser now | Playwright MCP first, Playwright CLI fallback | snapshots, action steps, post-submit assertion, screenshot, console/network notes |
12
+ | `durable-regression` | The workflow must run repeatedly in CI or future releases | `@playwright/test` or existing E2E framework | committed test file, deterministic locators/assertions, command, report/trace path |
13
+ | `visual-ai-assist` | Selectors cannot reliably prove the UI state | Midscene plus Playwright or project browser driver | Midscene report, screenshot, and deterministic read/assert result |
14
+
15
+ ## Tool Rules
16
+
17
+ - Playwright MCP and Playwright CLI are immediate acceptance tools. They are useful for live exploration and proof, but they are not durable regression by themselves.
18
+ - `@playwright/test` or the project's existing E2E framework is the default durable regression path.
19
+ - Midscene is a supplemental AI visual UI automation layer for weak selectors, canvas, cross-frame UI, visual matching, or natural-language exploration.
20
+ - Do not record `visual-ai-assist` as PASS from an AI assertion alone. Re-read a trustworthy result surface after writes.
21
+
22
+ ## Required Web Write Chain
23
+
24
+ ```text
25
+ open -> snapshot -> fill/input -> click(submit/confirm) -> snapshot -> read/assert changed result -> screenshot -> PASS/FAIL
26
+ ```
27
+
28
+ PASS must name the changed value and where it was read back: page, list, detail view, API response, storage record, or logs.
29
+
30
+ ## Matrix Fields
31
+
32
+ For `web-browser`, include:
33
+
34
+ - `Execution surface`: `web-browser`
35
+ - `Acceptance mode`: `instant-acceptance`, `durable-regression`, or `visual-ai-assist`
36
+ - `Evidence layer`: `browser-acceptance`, `e2e`, `visual-acceptance`, `integration`, or `smoke`
37
+ - `Framework/command`: MCP, Playwright CLI, `npx playwright test`, project E2E command, or Midscene route
38
+ - `Required evidence`: snapshots, screenshot, post-submit assertion, report/trace, console/network notes, or Midscene report
39
+
40
+ If a feature needs both immediate acceptance and durable regression, split it into two rows.
@@ -5,7 +5,9 @@
5
5
  - intent:
6
6
  - context:
7
7
  - actor:
8
- - execution surface:
8
+ - execution surface: web-browser | android-app | desktop-gui | api
9
+ - acceptance mode:
10
+ - evidence layer:
9
11
  - trigger/input:
10
12
  - expected feedback location:
11
13
  - status: pending
@@ -0,0 +1,44 @@
1
+ # API Acceptance
2
+
3
+ ## ACC-API-001
4
+
5
+ - execution surface: api
6
+ - acceptance mode: n/a
7
+ - tool route: project API command | pytest + httpx/requests | curl/httpie | Postman/Newman | contract tool
8
+ - evidence layer: contract | integration | smoke | security-review
9
+ - base URL:
10
+ - auth/role:
11
+ - fixture/seed:
12
+ - teardown:
13
+ - status: pending
14
+
15
+ ### Request
16
+
17
+ - method:
18
+ - path:
19
+ - headers:
20
+ - query:
21
+ - body:
22
+
23
+ ### Expected Response
24
+
25
+ - status code:
26
+ - headers:
27
+ - schema/source:
28
+ - payload assertions:
29
+ - error contract:
30
+
31
+ ### Read-Back Contract
32
+
33
+ - API read endpoint:
34
+ - DB/storage/log/event assertion:
35
+ - idempotency/retry assertion:
36
+ - cleanup assertion:
37
+
38
+ ### Evidence
39
+
40
+ - status:
41
+ - request/response record:
42
+ - report path:
43
+ - artifacts:
44
+ - blockers:
@@ -0,0 +1,43 @@
1
+ # Desktop GUI Acceptance
2
+
3
+ ## ACC-Desktop-001
4
+
5
+ - execution surface: desktop-gui
6
+ - acceptance mode: n/a
7
+ - tool route: project GUI automation | @midscene/computer | accessibility/window metadata | scripted manual GUI acceptance
8
+ - evidence layer: gui-acceptance | visual-acceptance | integration | smoke
9
+ - target app/window:
10
+ - launch command:
11
+ - fixture/reset:
12
+ - status: pending
13
+
14
+ ### Environment
15
+
16
+ - OS/display/RDP:
17
+ - model env status:
18
+ - app version/build:
19
+ - target process/window metadata:
20
+
21
+ ### Steps
22
+
23
+ 1.
24
+
25
+ ### Expected Outcome
26
+
27
+ -
28
+
29
+ ### Read-Back Contract
30
+
31
+ - persisted result surface:
32
+ - reopen/restart check:
33
+ - accessibility/window metadata assertion:
34
+ - file/config/app-state assertion:
35
+
36
+ ### Evidence
37
+
38
+ - status:
39
+ - screenshots/recording:
40
+ - GUI action log:
41
+ - window/app metadata:
42
+ - Midscene/computer report or log:
43
+ - blockers: