@hasna/testers 0.0.36 → 0.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ import { type HealthScanOptions, type HealthScanSummary } from "./health-scan.js";
2
+ import { type SmokeResult } from "./smoke.js";
3
+ export type QuickQaScanner = NonNullable<HealthScanOptions["scanners"]>[number];
4
+ export type QuickQaSkipTarget = QuickQaScanner | "smoke";
5
+ export type QuickQaStatus = "passed" | "warn" | "failed";
6
+ export declare const DEFAULT_QUICK_QA_SCANNERS: QuickQaScanner[];
7
+ export interface QuickQaSelection {
8
+ scanners: QuickQaScanner[];
9
+ includeSmoke: boolean;
10
+ skipped: QuickQaSkipTarget[];
11
+ }
12
+ export interface QuickQaOptions {
13
+ url: string;
14
+ pages?: string[];
15
+ projectId?: string;
16
+ headed?: boolean;
17
+ timeoutMs?: number;
18
+ maxPages?: number;
19
+ scanners?: QuickQaScanner[];
20
+ includeSmoke?: boolean;
21
+ model?: string;
22
+ wcagLevel?: "A" | "AA" | "AAA";
23
+ }
24
+ export interface QuickQaCheckSummary {
25
+ name: "health" | "smoke";
26
+ status: QuickQaStatus | "skipped";
27
+ issues: number;
28
+ actionableIssues: number;
29
+ detail: string;
30
+ }
31
+ export interface QuickQaResult {
32
+ url: string;
33
+ status: QuickQaStatus;
34
+ durationMs: number;
35
+ health: HealthScanSummary;
36
+ smoke: SmokeResult | null;
37
+ checks: QuickQaCheckSummary[];
38
+ issueCounts: {
39
+ total: number;
40
+ actionable: number;
41
+ health: number;
42
+ smoke: number;
43
+ };
44
+ }
45
+ export declare function normalizeQuickQaWcagLevel(value: unknown): "A" | "AA" | "AAA";
46
+ export declare function resolveQuickQaSelection(options?: {
47
+ skip?: string[];
48
+ includeA11y?: boolean;
49
+ includeSmoke?: boolean;
50
+ scanners?: QuickQaScanner[];
51
+ }): QuickQaSelection;
52
+ export declare function runQuickQa(options: QuickQaOptions): Promise<QuickQaResult>;
53
+ export declare function buildQuickQaResult(input: {
54
+ url: string;
55
+ health: HealthScanSummary;
56
+ smoke: SmokeResult | null;
57
+ durationMs: number;
58
+ }): QuickQaResult;
59
+ export declare function getQuickQaExitCode(result: QuickQaResult): number;
60
+ export declare function formatQuickQaReport(result: QuickQaResult): string;
61
+ //# sourceMappingURL=quick-qa.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"quick-qa.d.ts","sourceRoot":"","sources":["../../src/lib/quick-qa.ts"],"names":[],"mappings":"AAAA,OAAO,EAAiB,KAAK,iBAAiB,EAAE,KAAK,iBAAiB,EAAE,MAAM,kBAAkB,CAAC;AACjG,OAAO,EAAY,KAAK,WAAW,EAAE,MAAM,YAAY,CAAC;AAExD,MAAM,MAAM,cAAc,GAAG,WAAW,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AAChF,MAAM,MAAM,iBAAiB,GAAG,cAAc,GAAG,OAAO,CAAC;AACzD,MAAM,MAAM,aAAa,GAAG,QAAQ,GAAG,MAAM,GAAG,QAAQ,CAAC;AAEzD,eAAO,MAAM,yBAAyB,EAAE,cAAc,EAKrD,CAAC;AAgBF,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,YAAY,EAAE,OAAO,CAAC;IACtB,OAAO,EAAE,iBAAiB,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,cAAc,EAAE,CAAC;IAC5B,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,GAAG,GAAG,IAAI,GAAG,KAAK,CAAC;CAChC;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,QAAQ,GAAG,OAAO,CAAC;IACzB,MAAM,EAAE,aAAa,GAAG,SAAS,CAAC;IAClC,MAAM,EAAE,MAAM,CAAC;IACf,gBAAgB,EAAE,MAAM,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,aAAa;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,aAAa,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,iBAAiB,CAAC;IAC1B,KAAK,EAAE,WAAW,GAAG,IAAI,CAAC;IAC1B,MAAM,EAAE,mBAAmB,EAAE,CAAC;IAC9B,WAAW,EAAE;QACX,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;QACnB,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;CACH;AAED,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,KAAK,CAK5E;AAED,wBAAgB,uBAAuB,CAAC,OAAO,GAAE;IAC/C,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,cAAc,EAAE,CAAC;CACxB,GAAG,gBAAgB,CAwBxB;AAED,wBAAsB,UAAU,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC,CA6BhF;AAED,wBAAgB,kBAAkB,CAAC,KAAK,EAAE;IACxC,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,iBAAiB,CAAC;IAC1B,KAAK,EAAE,WAAW,GAAG,IAAI,CAAC;IAC1B,UAAU,EAAE,MAAM,CAAC;CACpB,GAAG,aAAa,CA2DhB;AAED,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM,CAEhE;AAED,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,aAAa,GAAG,MAAM,CA8CjE"}
package/dist/mcp/index.js CHANGED
@@ -52,7 +52,7 @@ var package_default;
52
52
  var init_package = __esm(() => {
53
53
  package_default = {
54
54
  name: "@hasna/testers",
55
- version: "0.0.36",
55
+ version: "0.0.37",
56
56
  description: "AI-powered QA testing CLI \u2014 spawns cheap AI agents to test web apps with headless browsers",
57
57
  type: "module",
58
58
  main: "dist/index.js",
@@ -71,6 +71,7 @@ var init_package = __esm(() => {
71
71
  files: [
72
72
  "dist/",
73
73
  "dashboard/dist/",
74
+ "skills/",
74
75
  "LICENSE",
75
76
  "README.md"
76
77
  ],
@@ -46910,7 +46910,7 @@ import { join as join14 } from "path";
46910
46910
  // package.json
46911
46911
  var package_default = {
46912
46912
  name: "@hasna/testers",
46913
- version: "0.0.36",
46913
+ version: "0.0.37",
46914
46914
  description: "AI-powered QA testing CLI \u2014 spawns cheap AI agents to test web apps with headless browsers",
46915
46915
  type: "module",
46916
46916
  main: "dist/index.js",
@@ -46929,6 +46929,7 @@ var package_default = {
46929
46929
  files: [
46930
46930
  "dist/",
46931
46931
  "dashboard/dist/",
46932
+ "skills/",
46932
46933
  "LICENSE",
46933
46934
  "README.md"
46934
46935
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/testers",
3
- "version": "0.0.36",
3
+ "version": "0.0.37",
4
4
  "description": "AI-powered QA testing CLI — spawns cheap AI agents to test web apps with headless browsers",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -19,6 +19,7 @@
19
19
  "files": [
20
20
  "dist/",
21
21
  "dashboard/dist/",
22
+ "skills/",
22
23
  "LICENSE",
23
24
  "README.md"
24
25
  ],
@@ -0,0 +1,97 @@
1
+ ---
2
+ name: skill-debug-prod
3
+ description: "Create a safe testers-powered production debug plan for a prod URL, request ID, session ID, project ID, org/user identifier, or login-as/check-prod request without leaking secrets or crossing tenant boundaries."
4
+ argument-hint: "<prod-url|session-id|project-id|user-email|request-id> [--browser] [--messages] [--jobs] [--blocks] [--logs] [--full]"
5
+ user_invocable: true
6
+ ---
7
+
8
+ # skill-debug-prod
9
+
10
+ Use this skill to investigate production issues while preserving customer
11
+ privacy, tenant boundaries, and auditability. The execution surface is
12
+ `testers prod-debug`; this skill is the safety policy and follow-through loop.
13
+
14
+ ## Safety Rules
15
+
16
+ 1. Never print secrets, cookies, bearer tokens, password reset links, magic
17
+ links, OAuth codes, private keys, raw headers, or full auth state.
18
+ 2. Never ask for or use a customer's password.
19
+ 3. Never query by a user-controlled identifier alone. Resolve the target org,
20
+ user, project, session, or request and verify scope before reading data.
21
+ 4. Never export bulk production data. Read only the minimum records needed.
22
+ 5. Never perform a production write unless the user explicitly approves that
23
+ exact write and the audit trail records the reason.
24
+ 6. Browser reproduction must use an audited support URL/session/grant. If none
25
+ exists, do read-only log/API checks and report the missing support tool.
26
+
27
+ ## Start With Testers
28
+
29
+ Create the safe plan before touching app-specific tools:
30
+
31
+ ```bash
32
+ testers prod-debug "<target>" --reason "<why you are debugging>" --json
33
+ testers prod-debug "<prod-url>" --profile "<configured-app-profile>" --reason "<why>" --json
34
+ testers prod-debug "<prod-url>" --support-url "<audited-support-url>" --support-grant "<grant-id>" --reason "<why>"
35
+ ```
36
+
37
+ The command redacts sensitive URL parameters, parses likely org/project/session
38
+ identifiers, proposes safe log/API/browser checks, and blocks user-scoped
39
+ browser reproduction unless audited support access is present.
40
+
41
+ If the CLI cannot produce the needed plan, add the missing capability to
42
+ `open-testers`, test it, publish it, reinstall it, and then rerun the production
43
+ debug plan.
44
+
45
+ ## Evidence To Capture
46
+
47
+ Accept any of:
48
+ - Prod URL
49
+ - Request ID
50
+ - Session ID
51
+ - Project ID/reference
52
+ - Org slug or user email
53
+ - Browser/login/OAuth/connector symptom
54
+
55
+ Capture sanitized evidence only:
56
+ - Target org/project/session/user identifiers after scoping checks
57
+ - Request IDs, job IDs, block IDs, timestamps, routes, status codes
58
+ - Error names/codes and short redacted snippets
59
+ - Support access grant/audit ID, scope, and TTL when used
60
+
61
+ ## Debugging Order
62
+
63
+ 1. Run `testers prod-debug` and follow its safe checks.
64
+ 2. Use app-specific audited wrappers only after the target and plan are clear.
65
+ 3. For login or browser repro, mint/use a support session with a short TTL.
66
+ 4. For logs, filter by request/session/project/org and redact before posting.
67
+ 5. For database reads, keep queries read-only and org-scoped.
68
+ 6. For connector/OAuth bugs, record provider, sanitized callback URL, request
69
+ ID, error code, and redirect URI mismatch. Never reveal OAuth codes or tokens.
70
+
71
+ ## Output
72
+
73
+ Return a concise sanitized report:
74
+
75
+ ```text
76
+ Target
77
+ - org/project/session/user: ...
78
+ - support access: read-only/browser-debug, TTL, audit id if available
79
+
80
+ Findings
81
+ - ...
82
+
83
+ Evidence
84
+ - request IDs, job IDs, block IDs, timestamps, statuses
85
+
86
+ Likely cause
87
+ - ...
88
+
89
+ Fix/next action
90
+ - code/config path to patch; approval needed if a prod write is required
91
+ ```
92
+
93
+ ## Done
94
+
95
+ Done means the production target was scoped, `testers prod-debug` was run, the
96
+ safe checks were followed, evidence was recorded in the active task, and any
97
+ fix or missing tool has a verified follow-up.
@@ -0,0 +1,81 @@
1
+ ---
2
+ name: skill-quick-qa
3
+ description: "Run a quick testers-powered QA pass, fix every bug found when the user asked for fixes, and verify the final app behavior. Trigger for quick QA, smoke test this, check the app, run browser QA, or find and fix product bugs."
4
+ user_invocable: true
5
+ ---
6
+
7
+ # skill-quick-qa
8
+
9
+ Use `testers` as the primary execution surface. This skill is for a fast but
10
+ real QA pass: server health, console/runtime errors, broken links, performance,
11
+ optional accessibility, and optional autonomous smoke exploration.
12
+
13
+ This is not a report-only skill when the user asks for fixes. Find issues, turn
14
+ them into tracked tasks, fix the root cause, rerun the failing check, then rerun
15
+ the quick QA pass.
16
+
17
+ ## Start
18
+
19
+ 1. Create or update the active `todos` task and add a progress comment.
20
+ 2. Determine the app URL:
21
+ - Use the URL from the user when provided.
22
+ - Otherwise inspect the repo for a dev command and port, start the server
23
+ yourself, and use `http://<machine>:<port>` for remote machine access.
24
+ - Bind local dev servers to `0.0.0.0` when another machine needs to reach
25
+ them.
26
+ 3. Confirm the server is answering before running browser checks:
27
+ ```bash
28
+ curl -fsS "<url>" >/tmp/testers-health.html
29
+ testers doctor
30
+ ```
31
+
32
+ ## Run The Quick Pass
33
+
34
+ Default:
35
+
36
+ ```bash
37
+ testers quick-qa "<url>" --json --output /tmp/testers-quick-qa.json
38
+ ```
39
+
40
+ Use these variants when they fit:
41
+
42
+ ```bash
43
+ testers quick-qa "<url>" --no-smoke --json --output /tmp/testers-quick-qa.json
44
+ testers quick-qa "<url>" --a11y AA --json --output /tmp/testers-quick-qa.json
45
+ testers quick-qa "<url>" --page / --page /login --page /dashboard --json
46
+ testers quick-qa "<url>" --skip perf --skip smoke --json
47
+ ```
48
+
49
+ Use `testers quick-check` only as an alias for `testers quick-qa`.
50
+
51
+ If `testers quick-qa` is not available in the installed CLI, update/publish
52
+ `@hasna/testers` from `open-testers` instead of falling back to unrelated
53
+ browser tools.
54
+
55
+ ## Fix Loop
56
+
57
+ For each failing issue:
58
+
59
+ 1. Record the failing URL, check name, severity, message, screenshot/report ID,
60
+ and command in the task comment.
61
+ 2. Classify the failure:
62
+ - App bug: broken route, UI state, console/network/runtime failure, bad auth.
63
+ - Test setup bug: stale scenario, missing auth, missing seed data.
64
+ - Environment bug: server down, migrations missing, provider key unavailable.
65
+ 3. Fix the smallest root cause and add a regression test at the repo's natural
66
+ test layer.
67
+ 4. Rerun the narrow failing command.
68
+ 5. Rerun `testers quick-qa`.
69
+
70
+ For deeper flows that quick QA cannot cover, switch to `skill-testers-qa` or
71
+ `skill-testers-workflow` and use saved scenarios/workflows rather than ad hoc
72
+ manual clicking.
73
+
74
+ ## Done
75
+
76
+ Only report completion when:
77
+ - `testers quick-qa` has run against the target app.
78
+ - Bugs found in a fix request are fixed and reverified.
79
+ - The final command, output file/report, and remaining tracked issues are posted
80
+ to the active task.
81
+ - Any remaining failures have explicit follow-up tasks with evidence.
@@ -0,0 +1,89 @@
1
+ ---
2
+ name: skill-testers-qa
3
+ description: "Use @hasna/testers for a serious AI-native QA pass on a web app or repo. Trigger for requests like test this app, QA this feature, run testers, check the preview, validate auth/pages, run local or sandbox browser tests, or find and fix product bugs."
4
+ user_invocable: true
5
+ ---
6
+
7
+ # skill-testers-qa
8
+
9
+ Use `testers` as the execution surface for app QA. This is broader than unit
10
+ testing: it checks real pages, browser behavior, generated scenarios, repo-native
11
+ tests, screenshots, console/network failures, personas, accessibility, and
12
+ regressions. If bugs are found and the user asked for fixes, fix them and rerun.
13
+
14
+ ## Start
15
+
16
+ 1. Create or update a `todos` task and post a short start message:
17
+ ```bash
18
+ todos add "QA <app or feature>" --project "$(pwd)" --priority high --tags qa,testers
19
+ conversations send --space "<project-or-testers>" "Starting QA: <scope>"
20
+ ```
21
+ 2. Identify the target:
22
+ - If the user gave a URL, use it.
23
+ - If the app is local, discover the dev command and port from `package.json`,
24
+ `.env`, server docs, or existing process state. Start/restart it yourself.
25
+ - On multi-machine work, bind servers to `0.0.0.0` and use
26
+ `http://<machine>:<port>`.
27
+ 3. Run setup checks:
28
+ ```bash
29
+ testers doctor
30
+ testers project list --json || true
31
+ testers list --json || true
32
+ testers repo discover . --json || true
33
+ ```
34
+ Do not print API keys or secrets. If no provider key is available, either use
35
+ deterministic/repo-native tests or fix the key setup through the approved
36
+ secrets workflow.
37
+
38
+ ## Choose The Run
39
+
40
+ - Fast default pass: `testers quick-qa <url> --json --output /tmp/testers-quick-qa.json`
41
+ - Fast default without AI smoke: `testers quick-qa <url> --no-smoke --json`
42
+ - Fast default with accessibility: `testers quick-qa <url> --a11y AA --json`
43
+ - Existing scenarios: `testers run <url> --json --output /tmp/testers-run.json`
44
+ - No scenarios yet: `testers run <url> --auto-generate --json --output /tmp/testers-run.json`
45
+ - Focused feature: `testers generate <url> --focus "<area>" --save`, then run by
46
+ tag or scenario.
47
+ - Fast CI smoke: `testers run <url> --smoke --minimal --json`
48
+ - Accessibility: `testers run <url> --a11y AA --json`
49
+ - Selector churn: add `--self-heal` when the goal is to repair flaky selectors.
50
+ - Changed files only: `testers run-affected <url>` or `testers run <url> --diff`.
51
+ - Repo-native Playwright: `testers repo prepare .` then `testers repo run .`.
52
+ - Larger or risky workflow: create/run a sandbox workflow with
53
+ `skill-testers-workflow`.
54
+
55
+ Prefer provider-specific model IDs when useful:
56
+ - Cerebras: `--model qwen-*` or `--model llama-*`
57
+ - Z.AI GLM: `--model glm-5.1`
58
+ - OpenAI: `--model gpt-*`
59
+ - Google: `--model gemini-*`
60
+ - Anthropic/default: Claude model IDs or presets
61
+
62
+ ## Investigate Failures
63
+
64
+ After a run:
65
+
66
+ ```bash
67
+ testers runs --json
68
+ testers results <run-id> --json
69
+ testers screenshots <run-or-result-id> --json
70
+ testers report <run-id>
71
+ ```
72
+
73
+ Classify each failure before editing:
74
+ - App bug: user-visible error, broken route, console/network failure, bad UI state.
75
+ - Test bug: stale selector, wrong assumption, missing auth/persona/setup.
76
+ - Environment bug: server down, database not migrated, missing provider key.
77
+
78
+ If it is an app bug, reproduce with the smallest scenario or browser step,
79
+ write a regression test where the repo has an appropriate test layer, fix the
80
+ root cause, rerun the failing scenario, then rerun the relevant suite.
81
+
82
+ ## Done
83
+
84
+ The task is done only when:
85
+ - The target URL/app was actually exercised.
86
+ - Results, screenshots or report IDs are recorded in the task/comment.
87
+ - Bugs found during a fix request are fixed and reverified.
88
+ - The final run is green or remaining failures are scoped, reproduced, and
89
+ intentionally tracked as follow-up tasks.
@@ -0,0 +1,126 @@
1
+ ---
2
+ name: skill-testers-workflow
3
+ description: "Create, run, and maintain reusable @hasna/testers workflows for deterministic scripts, agentic goal loops, personas, local execution, and sandbox execution. Trigger when asked to map workflows, test a user journey, run a script, use sandboxes, or make repeatable QA flows."
4
+ user_invocable: true
5
+ ---
6
+
7
+ # skill-testers-workflow
8
+
9
+ Use this when a QA request is more than a one-off page check: auth flows,
10
+ project creation, chat prompts, connector setup, billing, admin actions,
11
+ multi-persona behavior, non-deterministic AI interactions, or any flow that
12
+ should be saved and rerun.
13
+
14
+ ## Model The Workflow First
15
+
16
+ 1. Name the user-visible journey, not the implementation detail.
17
+ 2. Split deterministic checks from agentic/non-deterministic steps.
18
+ 3. Decide the execution target:
19
+ - `local`: fast, cheap, good for simple flows and local dev servers.
20
+ - `sandbox`: bigger, slower, better for isolated repo setup, long-running
21
+ workflows, destructive tests, or tests that need a clean machine.
22
+ 4. Decide whether this should be:
23
+ - Scenarios: stored steps run by `testers run`.
24
+ - A workflow: reusable saved bundle with tags/personas/goal/sandbox config.
25
+ - A hybrid script: TypeScript file run by `testers run-script`.
26
+ - A goal loop: `testers workflow agent`, which can create open-todos next
27
+ actions from observed failures.
28
+
29
+ ## Create Scenarios
30
+
31
+ For manual scenario steps:
32
+
33
+ ```bash
34
+ testers add "User can create a project" \
35
+ --description "Creates a project from the dashboard and verifies it appears" \
36
+ --steps "Open the dashboard" \
37
+ --steps "Click New project" \
38
+ --steps "Enter a unique project name" \
39
+ --steps "Save the project" \
40
+ --steps "Verify the project appears in the list" \
41
+ --tag projects --tag smoke --priority high
42
+ ```
43
+
44
+ For AI-generated coverage:
45
+
46
+ ```bash
47
+ testers generate "<url>" --focus "<journey or area>" --save --json
48
+ testers list --tag "<tag>" --json
49
+ ```
50
+
51
+ For recorded sessions:
52
+
53
+ ```bash
54
+ testers record "<url>"
55
+ testers convert "<recording-or-har-file>" --model "<model>" --json
56
+ ```
57
+
58
+ ## Save A Workflow
59
+
60
+ Local workflow:
61
+
62
+ ```bash
63
+ testers workflow create "<name>" \
64
+ --description "<what the journey proves>" \
65
+ --tag "<tag>" \
66
+ --goal "<agentic testing goal if needed>" \
67
+ --success "<observable success criterion>" \
68
+ --target local \
69
+ --json
70
+ ```
71
+
72
+ Sandbox workflow:
73
+
74
+ ```bash
75
+ testers workflow create "<name>" \
76
+ --description "<what the journey proves>" \
77
+ --tag "<tag>" \
78
+ --goal "<agentic testing goal if needed>" \
79
+ --success "<observable success criterion>" \
80
+ --target sandbox \
81
+ --sandbox-provider e2b \
82
+ --sandbox-package @hasna/testers \
83
+ --sandbox-setup-command "<repo setup command>" \
84
+ --sandbox-cleanup delete \
85
+ --json
86
+ ```
87
+
88
+ Run or inspect before launching:
89
+
90
+ ```bash
91
+ testers workflow show <id> --json
92
+ testers workflow run <id> --url "<url>" --dry-run --json
93
+ testers workflow run <id> --url "<url>" --model "<model>" --json
94
+ testers workflow agent <id> --url "<url>" --model "<model>" --json
95
+ ```
96
+
97
+ ## Hybrid Scripts
98
+
99
+ Use `testers run-script` when part of the flow is deterministic Playwright-like
100
+ automation and part needs AI judgment. Keep scripts in the app repo near other
101
+ tests, not in global config.
102
+
103
+ ```bash
104
+ testers run-script tests/qa/<workflow>.ts --url "<url>" --json
105
+ ```
106
+
107
+ Hybrid scripts should export `HybridScenario[]` and keep selectors stable
108
+ through roles, labels, or `data-testid`.
109
+
110
+ ## Maintenance Rules
111
+
112
+ - Store reusable workflows/scenarios in `testers`; do not leave them only in
113
+ chat history.
114
+ - Prefer tags that map to product areas: `auth`, `projects`, `billing`,
115
+ `connectors`, `admin`, `chat`, `smoke`, `regression`.
116
+ - Use personas for role-sensitive behavior instead of hardcoding user state.
117
+ - Never store secrets in workflow descriptions, steps, scripts, or generated
118
+ JSON. Use env vars or the approved secrets workflow.
119
+ - If a workflow fails because the app is wrong, fix the app and rerun. If it
120
+ fails because the workflow is stale, update the workflow and record why.
121
+
122
+ ## Done
123
+
124
+ Done means the workflow is saved or the script exists, a dry-run plan was
125
+ checked, at least one real run was executed, and the result/report is attached
126
+ or summarized in the active `todos` task.