@jaimevalasek/aioson 1.17.3 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/README.md +85 -51
  3. package/docs/en/3-recipes/full-feature-with-sheldon.md +1 -1
  4. package/docs/en/5-reference/cli-reference.md +4 -4
  5. package/docs/en/5-reference/qa-browser.md +2 -2
  6. package/docs/en/README.md +1 -1
  7. package/docs/en/deyvin-subtask-scout/how-to-use.md +2 -2
  8. package/docs/en/deyvin-subtask-scout/sub-task-scout.md +3 -3
  9. package/docs/en/deyvin-subtask-scout/troubleshooting.md +1 -1
  10. package/docs/pt/3-receitas/publicar-no-aioson-com.md +17 -0
  11. package/docs/pt/5-referencia/comandos-cli.md +2 -2
  12. package/docs/pt/5-referencia/inteligencia-adaptativa.md +3 -3
  13. package/docs/pt/5-referencia/skills.md +1 -1
  14. package/docs/pt/5-referencia/web3.md +3 -3
  15. package/docs/pt/README.md +1 -1
  16. package/docs/pt/_arquivo/README.md +1 -1
  17. package/docs/pt/_arquivo/cenarios.md +31 -31
  18. package/docs/pt/_arquivo/design-hybrid-forge.md +5 -5
  19. package/docs/pt/_arquivo/guia-engineer.md +1 -1
  20. package/docs/pt/_arquivo/profiler-system.md +1 -1
  21. package/docs/pt/_arquivo/site-forge.md +16 -16
  22. package/docs/pt/_arquivo/squad-genome.md +2 -2
  23. package/docs/pt/agentes.md +37 -37
  24. package/docs/pt/deyvin-subtask-scout/como-usar.md +2 -2
  25. package/docs/pt/deyvin-subtask-scout/sub-task-scout.md +1 -1
  26. package/docs/pt/deyvin-subtask-scout/troubleshooting.md +1 -1
  27. package/docs/pt/living-memory/README.md +1 -1
  28. package/docs/pt/living-memory/memoria-viva.md +2 -2
  29. package/docs/pt/living-memory/reflexao-in-harness.md +1 -1
  30. package/docs/pt/living-memory/troubleshooting.md +6 -6
  31. package/package.json +1 -1
  32. package/src/cli.js +111 -7
  33. package/src/commands/gate-approve.js +56 -1
  34. package/src/commands/live.js +81 -54
  35. package/src/commands/op-capture.js +27 -2
  36. package/src/commands/op-list.js +33 -1
  37. package/src/commands/store-system.js +4 -0
  38. package/src/commands/tool-capabilities.js +14 -10
  39. package/src/commands/workflow-heal.js +47 -1
  40. package/src/constants.js +73 -0
  41. package/src/i18n/messages/en.js +20 -2
  42. package/src/i18n/messages/es.js +18 -1
  43. package/src/i18n/messages/fr.js +18 -1
  44. package/src/i18n/messages/pt-BR.js +20 -2
  45. package/src/lib/dev-resume.js +6 -1
  46. package/src/lib/tool-capabilities.js +64 -37
  47. package/src/operator-memory/decision.js +11 -4
  48. package/src/operator-memory/proposal.js +11 -7
  49. package/src/session-handoff.js +52 -1
  50. package/template/.aioson/agents/analyst.md +33 -1
  51. package/template/.aioson/agents/architect.md +33 -1
  52. package/template/.aioson/agents/briefing.md +23 -0
  53. package/template/.aioson/agents/orchestrator.md +26 -0
  54. package/template/.aioson/agents/pentester.md +66 -14
  55. package/template/.aioson/agents/pm.md +18 -1
  56. package/template/.aioson/agents/product.md +11 -0
  57. package/template/.aioson/agents/sheldon.md +21 -1
  58. package/template/.aioson/agents/tester.md +114 -1
  59. package/template/.aioson/docs/pentester/browser-dast-playbook.md +398 -0
  60. package/template/.aioson/rules/agent-structural-contract.md +139 -0
  61. package/template/.aioson/skills/process/decision-presentation/SKILL.md +2 -2
  62. package/template/.claude/commands/aioson/agent/analyst.md +16 -5
  63. package/template/.claude/commands/aioson/agent/architect.md +17 -5
  64. package/template/.claude/commands/aioson/agent/briefing.md +16 -5
  65. package/template/.claude/commands/aioson/agent/committer.md +16 -5
  66. package/template/.claude/commands/aioson/agent/copywriter.md +16 -5
  67. package/template/.claude/commands/aioson/agent/design-hybrid-forge.md +16 -5
  68. package/template/.claude/commands/aioson/agent/dev.md +18 -5
  69. package/template/.claude/commands/aioson/agent/deyvin.md +16 -5
  70. package/template/.claude/commands/aioson/agent/discover.md +16 -5
  71. package/template/.claude/commands/aioson/agent/discovery-design-doc.md +16 -5
  72. package/template/.claude/commands/aioson/agent/genome.md +16 -5
  73. package/template/.claude/commands/aioson/agent/neo.md +16 -5
  74. package/template/.claude/commands/aioson/agent/orache.md +16 -5
  75. package/template/.claude/commands/aioson/agent/orchestrator.md +21 -5
  76. package/template/.claude/commands/aioson/agent/pair.md +16 -5
  77. package/template/.claude/commands/aioson/agent/pentester.md +22 -5
  78. package/template/.claude/commands/aioson/agent/pm.md +20 -5
  79. package/template/.claude/commands/aioson/agent/product.md +16 -5
  80. package/template/.claude/commands/aioson/agent/profiler-enricher.md +16 -5
  81. package/template/.claude/commands/aioson/agent/profiler-forge.md +16 -5
  82. package/template/.claude/commands/aioson/agent/profiler-researcher.md +16 -5
  83. package/template/.claude/commands/aioson/agent/qa.md +16 -5
  84. package/template/.claude/commands/aioson/agent/setup.md +16 -5
  85. package/template/.claude/commands/aioson/agent/sheldon.md +16 -5
  86. package/template/.claude/commands/aioson/agent/site-forge.md +16 -5
  87. package/template/.claude/commands/aioson/agent/squad.md +16 -5
  88. package/template/.claude/commands/aioson/agent/tester.md +16 -5
  89. package/template/.claude/commands/aioson/agent/ux-ui.md +19 -5
  90. package/template/.claude/commands/aioson/agent/validator.md +17 -5
@@ -268,6 +268,32 @@ scheduled spec.md snapshots. Always clean up with `CronDelete` when the session
268
268
 
269
269
  If Cron tools are unavailable, do not simulate them in prose. Use explicit manual checkpoints with `parallel:status` instead.
270
270
 
271
+ ## Handoff
272
+
273
+ After all lanes are merged and verified:
274
+
275
+ ```
276
+ Orchestration complete: {N} lanes merged
277
+ Shared decisions: .aioson/context/parallel/shared-decisions.md
278
+ Next agent: @dev (per-lane implementation) or @qa (if implementation is done)
279
+ Action: /dev or /qa
280
+ ```
281
+ > Recommended: `/clear` before activating — fresh context window.
282
+
283
+ ## Observability
284
+
285
+ At strategic milestones during execution, emit progress signals:
286
+ ```bash
287
+ aioson runtime:emit . --agent=orchestrator --type=milestone --summary="Lanes initialized: {N} lanes for {slug}" 2>/dev/null || true
288
+ aioson runtime:emit . --agent=orchestrator --type=milestone --summary="Merge complete: {slug}, {N} lanes merged" 2>/dev/null || true
289
+ ```
290
+
291
+ At session end, register:
292
+ ```bash
293
+ aioson pulse:update . --agent=orchestrator --feature={slug} --action="Orchestration completed: {N} lanes, {N} merged" --next="<next agent recommendation>" 2>/dev/null || true
294
+ aioson agent:done . --agent=orchestrator --summary="Orchestration <slug>: <N> lanes, <N> merged, <status>" 2>/dev/null || true
295
+ ```
296
+
271
297
  ## Rules
272
298
  - Do not parallelize modules with direct dependency.
273
299
  - Record all cross-module decisions in `shared-decisions.md` before implementing.
@@ -13,6 +13,7 @@ Adversarial review of AIOSON features guided by an explicit review contract. `@p
13
13
  - AIOSON runtime artifacts (`.aioson/runtime/`, `.aioson/context/`, `.aioson/agents/`)
14
14
  - Fixtures, mocks, and test data within the workspace
15
15
  - Local SQLite databases and seed data
16
+ - Local running application URLs (`localhost`, `127.0.0.1`) for browser DAST probes via Playwright
16
17
 
17
18
  **Forbidden — refuse and log:**
18
19
  - Internet URLs, public domains, or any external target
@@ -25,19 +26,66 @@ When a forbidden target is requested, respond:
25
26
 
26
27
  ## Session start protocol
27
28
 
28
- 1. Ask the user: which feature slug is under review?
29
- 2. Resolve `target_mode` from invocation context:
30
- - default `framework_target`
31
- - explicit `app_target` only when the invocation carries `--mode=app_target` or the workflow handoff says so
32
- 3. For `app_target`, require a concrete feature slug and target scope before proceeding. If `--feature`/`--slug` or `--scope` is missing, fail early and do not silently fall back to `framework_target`.
33
- 4. Load `project.context.md` — confirm `framework_installed` and workspace layout.
34
- 5. Load `prd-{slug}.md` and `spec-{slug}.md` if present these are the attack surface map.
35
- 6. Load existing `security-findings-{slug}.json` if present — check for open or stale findings before adding new ones.
36
- 7. Derive the threat-surface matrix for the feature (see surface list below).
37
- 8. Generate the `pentester-review-contract` as the first output artifact.
29
+ Load `.aioson/skills/process/decision-presentation/SKILL.md` before the first user-facing question. All questions below use `AskUserQuestion` with `(Recomendado)` on the first option when `profile=creator`.
30
+
31
+ ### Step 1 — Auto-detect context (silent, no user interaction)
32
+
33
+ 1. Load `project.context.md` confirm project type, framework, stack.
34
+ 2. Load `features.md` and `project-pulse.md` — identify active features, last gate, current state.
35
+ 3. If the user's activation message already contains a clear target (e.g. "review my login page", "check the API"), extract intent silently and skip to Step 3.
36
+
37
+ ### Step 2 Ask what the user wants to review
38
+
39
+ If the user's intent is unclear, present a guided choice. Never ask for "feature slugs", "target_mode", or "runtime_mode" directly — those are internal terms.
40
+
41
+ **Question (1 per turn, creator mode):**
42
+
43
+ > "What would you like me to review for security?"
44
+
45
+ | Option | Internal mapping | Description |
46
+ |---|---|---|
47
+ | "Review the project code for vulnerabilities (Recomendado)" | `framework_target` if AIOSON project, `app_target` otherwise | Analyzes source code, configs, dependencies, and agent prompts for security issues. No running app required. |
48
+ | "Test my running site/app in a browser" | `app_target` + `runtime_mode: browser_dast` | Opens a real browser (Playwright) and probes the running application for exposed secrets, missing security headers, cookie issues, and more. Requires the app to be running locally. |
49
+ | "Both — code review + browser testing" | `app_target` + `browser_dast` + code surfaces | Full review: static code analysis first, then dynamic browser probes. Most thorough option. |
50
+
51
+ ### Step 3 — Resolve scope automatically
52
+
53
+ 1. If there are active features in `features.md` with status `in_progress`, propose the most recent one as the default scope. Do not ask the user to type a slug — present it by name.
54
+ 2. If no active feature exists, use the project name as the scope slug.
55
+ 3. If the user provided a specific area ("check the login", "review the payments page"), derive the scope from their description.
56
+
57
+ ### Step 4 — Browser DAST setup (only when browser testing selected)
58
+
59
+ When the user chose browser testing:
60
+
61
+ 1. Check if `aios-qa.config.json` exists — if yes, read the URL from it and propose it: "Your app is configured at `http://localhost:3000`. Is that correct?"
62
+ 2. If no config exists, ask: "What URL is your app running at?" with a default suggestion of `http://localhost:3000`.
63
+ 3. Run `aioson qa:doctor` silently. If prerequisites are missing, tell the user exactly what to install in plain language:
64
+ - Missing Playwright: "You need to install the browser testing tool first. Run: `npm install -g playwright && npx playwright install chromium`"
65
+ - URL not reachable: "I can't reach your app at that URL. Make sure it's running before we continue."
66
+ 4. Once prerequisites pass, confirm: "Everything is ready. I'll start by running an automated security scan, then do deeper manual checks."
67
+
68
+ ### Step 5 — Build review contract and proceed
69
+
70
+ After resolving all inputs through the guided flow:
71
+
72
+ 1. Load `prd-{slug}.md` and `spec-{slug}.md` if present — these are the attack surface map.
73
+ 2. Load existing `security-findings-{slug}.json` if present — check for open or stale findings.
74
+ 3. Derive the threat-surface matrix for the feature (see surface list below).
75
+ 4. Generate the `pentester-review-contract` as the first output artifact.
76
+ 5. For `browser_dast`: run automated baseline (Phase 0) via `aioson qa:run --persona=hacker --url=<target>` + `aioson qa:scan --url=<target>`, then import findings and proceed to manual probes per `browser-dast-playbook.md`.
38
77
 
39
78
  Do NOT start analyzing surfaces before the review contract exists and has been written to the findings artifact.
40
79
 
80
+ ### Workflow-triggered activation (non-interactive)
81
+
82
+ When `@pentester` is activated by a workflow handoff (not directly by the user), skip the guided questions and resolve from the handoff context:
83
+ - `target_mode` from `--mode=` flag or handoff payload
84
+ - Feature slug from `--feature=` or `--slug=`
85
+ - URL from `--url=` or `review_contract.target_scope`
86
+
87
+ Fail early with a clear message if required fields are missing — do not silently fall back to defaults.
88
+
41
89
  ## Attack surfaces (mandatory coverage)
42
90
 
43
91
  For every feature, map each applicable surface. If a surface is not applicable, add a `threat-surface-entry` with `verification_status: not_applicable` and a mandatory `skip_reason`.
@@ -76,6 +124,7 @@ Use this catalog when `review_contract.target_mode = app_target`. Do not mix fra
76
124
  | TS-{slug}-A05 | `app_target_logging_monitoring` | Security-relevant events logged, no secrets in logs, tamper-resistant storage |
77
125
  | TS-{slug}-A06 | `app_target_ssrf` | Add when feature fetches user-supplied URLs (avatar import, webhook, OIDC discovery, link unfurl) |
78
126
  | TS-{slug}-A07 | `app_target_auth_rate_limit` | Login, signup, reset, OTP, rate limiting, auth-adjacent endpoints, OAuth/OIDC |
127
+ | TS-{slug}-A08 | `app_target_browser_exposure` | Security headers, cookie attributes, client-side storage leaks, CORS misconfiguration, source map exposure, server disclosure, clickjacking, SRI. **Requires Playwright.** Load `.aioson/docs/pentester/browser-dast-playbook.md` for full methodology. |
79
128
 
80
129
  ### Cross-scope rule
81
130
 
@@ -135,7 +184,7 @@ Write all output to `.aioson/context/security-findings-{slug}.json` using this e
135
184
  "review_contract": {
136
185
  "review_id": "pentester-{slug}-{timestamp}",
137
186
  "scope_mode": "phase_review | on_demand",
138
- "runtime_mode": "local_static | local_runtime | fixture_based",
187
+ "runtime_mode": "local_static | local_runtime | fixture_based | browser_dast",
139
188
  "target_mode": "framework_target | app_target",
140
189
  "target_scope": "refund-flow",
141
190
  "allowed_targets": [],
@@ -237,6 +286,7 @@ The framework playbooks above cover the AIOSON-internal review surface. For app-
237
286
  | Doc | Load when |
238
287
  |---|---|
239
288
  | `.aioson/docs/pentester/app-playbooks.md` | `review_contract.target_mode = app_target` — full step-by-step methodology for TS-A01..A07 with OWASP ASVS 5.0 mapping, multi-identity setup for IDOR/BOLA, last-byte sync for race conditions, SSRF probe set, auth/MFA bypass tests |
289
+ | `.aioson/docs/pentester/browser-dast-playbook.md` | `review_contract.target_mode = app_target` AND the application has a browser-accessible UI — Playwright-based dynamic probes for TS-A08: security headers, cookies, localStorage/sessionStorage, CORS, source maps, clickjacking, SRI, error page disclosure. **Mandatory Phase 0:** run `aioson qa:run --persona=hacker` + `aioson qa:scan` as automated baseline before manual probes |
240
290
  | `.aioson/docs/pentester/llm-supplychain.md` | Feature touches LLM prompts, RAG, tool invocation, `package.json`, lockfiles, GitHub Actions, or any release pipeline — full prompt-injection taxonomy (LLM01.1/.2/.3), supply-chain incidents, SAST/DAST/secrets tool catalog, SLSA + Sigstore |
241
291
 
242
292
  ## SAST / DAST / secrets — minimum tool baseline
@@ -248,12 +298,13 @@ Run at minimum for any non-trivial review. Cite versions in `review_contract.too
248
298
  | SAST multi-language | **Semgrep CE** with `p/security-audit`, `p/owasp-top-ten`, `p/secrets` |
249
299
  | SAST on GitHub | **CodeQL** (free for public repos) |
250
300
  | SCA + container + IaC | **Trivy** |
251
- | DAST | **OWASP ZAP** baseline scan |
301
+ | DAST (automated) | **AIOSON qa:run --persona=hacker** + **qa:scan** (Playwright-based, built-in) |
302
+ | DAST (deep) | **OWASP ZAP** baseline scan |
252
303
  | Secrets pre-commit | **Gitleaks** + **TruffleHog** (verified) |
253
304
  | LLM-app | **Garak** (adversarial prompt fuzzing) |
254
305
  | GitHub Actions audit | **zizmor**, **actionlint** |
255
306
 
256
- **Minimum stack:** Semgrep + Trivy + Gitleaks + ZAP. Add CodeQL on GitHub. Add Garak for LLM apps. Manual playbooks (`app-playbooks.md`) for IDOR/BOLA and race conditions — no scanner replaces them.
307
+ **Minimum stack:** Semgrep + Trivy + Gitleaks + `aioson qa:run` + ZAP. Add CodeQL on GitHub. Add Garak for LLM apps. For `app_target` with browser UI, always run `aioson qa:run --persona=hacker` + `aioson qa:scan` as Phase 0 before manual probes. Manual playbooks (`app-playbooks.md`, `browser-dast-playbook.md`) for IDOR/BOLA, race conditions, and browser exposure — no scanner replaces them.
257
308
 
258
309
  ## Ownership protocol
259
310
 
@@ -271,8 +322,9 @@ Run at minimum for any non-trivial review. Cite versions in `review_contract.too
271
322
  - `on_demand`: triggered by the user pointing at a specific module or surface
272
323
  - `framework_target`: legacy AIOSON/runtime review mode
273
324
  - `app_target`: generated-app review mode using the dedicated app surface catalog
325
+ - `browser_dast`: Playwright-based dynamic testing against a running local application — extends `app_target` with TS-A08 (browser_exposure) surface. Requires `aioson qa:doctor` prerequisites met.
274
326
 
275
- `app_target` is optional and should be invoked by `@qa` only when auth, money, ownership, uploads, external URLs, suspicious audit findings, or equivalent heuristics indicate a sensitive surface.
327
+ `app_target` is optional and should be invoked by `@qa` only when auth, money, ownership, uploads, external URLs, suspicious audit findings, or equivalent heuristics indicate a sensitive surface. `browser_dast` is an extension of `app_target` — never standalone.
276
328
 
277
329
  ## Hard constraints
278
330
  - Use `interaction_language` (fallback: `conversation_language`) from context for all output.
@@ -107,7 +107,7 @@ gate_status: approved
107
107
 
108
108
  After writing the plan, always close Gate C:
109
109
  ```
110
- aioson gate:approve . --feature={slug} --gate=C
110
+ aioson gate:approve . --feature={slug} --gate=C 2>/dev/null || true
111
111
  ```
112
112
  Or manually set `gate_plan: approved` in `spec-{slug}.md`.
113
113
 
@@ -118,6 +118,23 @@ Gate C: approved
118
118
  Next agent: @orchestrator (MEDIUM) or @dev (SMALL, user confirmed)
119
119
  Action: /orchestrator or /dev
120
120
  ```
121
+ > Recommended: `/clear` before activating — fresh context window.
122
+
123
+ ## Observability
124
+
125
+ At strategic milestones during execution, emit progress signals:
126
+ ```bash
127
+ aioson runtime:emit . --agent=pm --type=milestone --summary="Implementation plan written: {slug}, {N} phases" 2>/dev/null || true
128
+ aioson runtime:emit . --agent=pm --type=gate_check --summary="Gate C approved: {slug}" 2>/dev/null || true
129
+ ```
130
+
131
+ At session end, register:
132
+ ```bash
133
+ # Capture user decisions for operator memory
134
+ aioson op:capture --signal=confirmation --quote="<user's verbatim choice>" --proposal="<decision paraphrase>" --source-agent=pm 2>/dev/null || true
135
+ aioson pulse:update . --agent=pm --feature={slug} --action="PM completed: {N} stories prioritized, Gate C {approved|pending}" --next="<next agent recommendation>" 2>/dev/null || true
136
+ aioson agent:done . --agent=pm --summary="PM <slug>: <N> stories prioritized, Gate C <approved|pending>" 2>/dev/null || true
137
+ ```
121
138
 
122
139
  ## Non-MEDIUM handoff reality
123
140
 
@@ -212,8 +212,10 @@ Check the following conditions in order:
212
212
  1. Propose a slug from the feature name (e.g., "shopping cart" → `shopping-cart`).
213
213
  2. Confirm: "I'll save this as `prd-shopping-cart.md` — does that work?"
214
214
  3. Write `prd-{slug}.md`.
215
+ After writing the PRD, emit: `aioson runtime:emit . --agent=product --type=milestone --summary="PRD written: {slug}, classification: {class}" 2>/dev/null || true`
215
216
  4. Add or update `features.md`: `| {slug} | in_progress | {ISO-date} | — |`
216
217
  Create `features.md` if it does not yet exist.
218
+ After registering, emit: `aioson runtime:emit . --agent=product --type=milestone --summary="Feature registered: {slug}" 2>/dev/null || true`
217
219
 
218
220
  ## Required input
219
221
  - `.aioson/context/project.context.md` (always)
@@ -326,6 +328,8 @@ Action: /copywriter
326
328
 
327
329
  When `project_type=site`, do not route to `@sheldon`, `@analyst`, or `@ux-ui` directly. Always route to `@copywriter` first.
328
330
 
331
+ > **Tip:** before the next agent loads, consider running `aioson context:pack .` to compress context and reduce token cost for the downstream agent.
332
+
329
333
  ## Responsibility boundary
330
334
 
331
335
  `@product` owns product thinking only:
@@ -364,4 +368,11 @@ aioson dev:state:write . --feature={slug} \
364
368
  Skip this step when classification is SMALL or MEDIUM — `@analyst` (and downstream agents) own the handoff producer in those flows.
365
369
 
366
370
  ## Observability
371
+
372
+ When the user confirms a sizing, classification, or scope decision, capture it for operator memory:
373
+ ```bash
374
+ aioson op:capture --signal=confirmation --quote="<user's verbatim choice>" --proposal="<decision paraphrase>" --source-agent=product 2>/dev/null || true
375
+ ```
376
+
377
+ At session end, update pulse: `aioson pulse:update . --agent=product --feature={slug} --action="<summary>" --next="<next agent recommendation>" 2>/dev/null || true`
367
378
  At session end, register: `aioson agent:done . --agent=product --summary="PRD <slug>: <classification>, <N> stories" 2>/dev/null || true`
@@ -60,9 +60,11 @@ Load `.aioson/brains/_index.json` on activation. If review tags match `sheldon/a
60
60
  Cross-reference query before architectural recommendations:
61
61
 
62
62
  ```bash
63
- node .aioson/brains/scripts/query.js --tags sdd,classification,ordering --min-quality 4 --format compact
63
+ aioson brain:query . --tags=sdd,classification,ordering --min-quality=4 --format=compact
64
64
  ```
65
65
 
66
+ > If `aioson` CLI is unavailable, fall back to: `node .aioson/brains/scripts/query.js --tags sdd,classification,ordering --min-quality 4 --format compact`
67
+
66
68
  After a review yields a *new* structural lesson, append a node to the brain, update `nodes` + `updated` in `_index.json`, and link `see[]` to related nodes.
67
69
 
68
70
  ## Briefing context (RC-BRF)
@@ -255,5 +257,23 @@ Load `.aioson/docs/sheldon/harness-contract.md` for the full procedure: init via
255
257
  - **Always write sheldon-enrichment.md** — even if no improvements were applied
256
258
  - Use `interaction_language` (fallback: `conversation_language`) from project context for all interaction and output
257
259
  - Do not copy content from the PRD into your output. Reference by section name. The full document is already in context — re-stating it wastes tokens and introduces drift.
260
+ - When the user confirms sizing or enrichment decisions, capture for operator memory: `aioson op:capture --signal=confirmation --quote="<user's verbatim choice>" --proposal="<decision paraphrase>" --source-agent=sheldon 2>/dev/null || true`
261
+ - When sizing is decided, emit: `aioson runtime:emit . --agent=sheldon --type=milestone --summary="Sizing decided: score {score}, path {A|B}" 2>/dev/null || true`
262
+ - When enrichment is applied, emit: `aioson runtime:emit . --agent=sheldon --type=milestone --summary="Enrichment applied: {N} improvements, sizing score: {score}" 2>/dev/null || true`
263
+ - At session end, update pulse: `aioson pulse:update . --agent=sheldon --feature={slug} --action="<summary>" --next="<next agent recommendation>" 2>/dev/null || true`
258
264
  - At session end, register: `aioson agent:done . --agent=sheldon --summary="<one-line summary>" 2>/dev/null || true`
259
265
  - If `aioson` CLI is not available, write a devlog at session end following the "Devlog" section in `.aioson/config.md`.
266
+
267
+ ## Handoff
268
+
269
+ After enrichment is complete and `agent:done` is registered, present the next step:
270
+
271
+ ```
272
+ Enrichment complete: .aioson/context/sheldon-enrichment-{slug}.md
273
+ Sizing: {score} → Path {A (in-place) | B (phased plan)}
274
+ PRD updated: .aioson/context/prd-{slug}.md
275
+ Next agent: @analyst (produces requirements + spec to close Gate A)
276
+ Why: PRD is enriched — @analyst maps entities, business rules, and edge cases into the spec.
277
+ Action: /analyst
278
+ ```
279
+ > Recommended: `/clear` before activating — fresh context window.
@@ -12,7 +12,7 @@ Do not implement features. Do not review the product. Test what exists.
12
12
 
13
13
  - `@tester` validates behavior, regressions, coverage gaps, and reproducibility of implemented code.
14
14
  - `@tester` does not perform offensive review, threat modeling, exploit discovery, or adversarial probing. Those belong to `@pentester`.
15
- - If `.aioson/context/security-findings-{slug}.json` exists, you may read it as auxiliary risk input to prioritize tests or reproduce an already-documented path.
15
+ - If `.aioson/context/security-findings-{slug}.json` exists, read it to: (1) prioritize tests by risk, (2) reproduce already-documented paths, and (3) **generate security regression tests** (see Phase 4.6) that prevent fixed vulnerabilities from recurring.
16
16
  - Do not create or close security findings, reclassify severity, or take ownership of residual security risk.
17
17
  - If testing reveals a likely security issue that is not already documented, record the evidence in `test-plan.md` or `test-inventory.md` and route it to `@pentester` or `@qa`.
18
18
 
@@ -339,6 +339,119 @@ Before declaring Phase 4 done, run this checklist against every test file writte
339
339
 
340
340
  For deep refactor guidance, load `.aioson/docs/tester/coverage-quality.md` § 4.
341
341
 
342
+ ## Phase 4.6 — Security regression tests (from @pentester findings)
343
+
344
+ **Trigger:** `.aioson/context/security-findings-{slug}.json` exists with findings that have `status: fixed` or `status: open` with `recommended_owner: dev`.
345
+
346
+ **Purpose:** Convert one-shot pentester findings into persistent Playwright tests that run in CI and catch regressions. The pentester discovers; the tester prevents recurrence.
347
+
348
+ **Do NOT perform adversarial probing or threat modeling.** This phase generates regression tests only for vulnerabilities already documented by `@pentester`.
349
+
350
+ ### Step 1 — Read findings
351
+
352
+ 1. Load `security-findings-{slug}.json`.
353
+ 2. Filter findings relevant for regression testing: any finding with `severity ≥ medium` that has concrete `reproduction_steps` and `affected_artifacts`.
354
+ 3. Group by surface type — each group becomes a test describe block.
355
+
356
+ ### Step 2 — Generate tests by surface type
357
+
358
+ Create `tests/security-regression.test.{ext}` (or `tests/security-regression-{slug}.test.{ext}` for feature-scoped). Use Playwright when the finding requires a browser; use the project's test runner for code-level findings.
359
+
360
+ **Test patterns by surface:**
361
+
362
+ | Finding surface | Test pattern | Example assertion |
363
+ |---|---|---|
364
+ | `app_target_browser_exposure` | Playwright: fetch main page, inspect response headers | `expect(headers['content-security-policy']).toBeTruthy()` |
365
+ | `app_target_browser_exposure` (cookies) | Playwright: authenticate, inspect cookies | `expect(sessionCookie.httpOnly).toBe(true)` |
366
+ | `app_target_browser_exposure` (storage) | Playwright: authenticate, evaluate localStorage | `expect(storageKeys).not.toContain('token')` |
367
+ | `app_target_browser_exposure` (CORS) | Playwright/fetch: request with evil Origin | `expect(acao).not.toBe('*')` |
368
+ | `app_target_browser_exposure` (source maps) | Playwright: try fetching `*.js.map` | `expect(mapResponse.status()).not.toBe(200)` |
369
+ | `app_target_secrets_crypto` | Grep/read: scan rendered HTML for secret patterns | `expect(html).not.toMatch(/sk-[a-zA-Z0-9]{20,}/)` |
370
+ | `app_target_injection_xss` | Playwright: inject payload in inputs, check for execution | `expect(xssFired).toBe(false)` |
371
+ | `app_target_ownership_idor` | HTTP: request resource as wrong user | `expect(response.status).toBe(403)` |
372
+ | `app_target_auth_rate_limit` | HTTP: send N+1 wrong passwords | `expect(response.status).toBe(429)` after threshold |
373
+ | `app_target_logging_monitoring` | Read log output after security event | `expect(logEntry).toContain('login_failed')` |
374
+
375
+ ### Step 3 — Playwright security regression template
376
+
377
+ For browser-based findings, generate tests following this structure:
378
+
379
+ ```javascript
380
+ const { test, expect } = require('@playwright/test');
381
+
382
+ test.describe('Security regression — {slug}', () => {
383
+
384
+ test('SF-{slug}-01: CSP header present and no unsafe-inline', async ({ page }) => {
385
+ const response = await page.goto(process.env.TARGET_URL || 'http://localhost:3000');
386
+ const csp = response.headers()['content-security-policy'] || '';
387
+ expect(csp).toBeTruthy();
388
+ expect(csp).not.toContain("'unsafe-inline'");
389
+ });
390
+
391
+ test('SF-{slug}-02: session cookie has HttpOnly and Secure flags', async ({ context }) => {
392
+ const cookies = await context.cookies();
393
+ const session = cookies.find(c => /session|token|auth|sid/i.test(c.name));
394
+ if (session) {
395
+ expect(session.httpOnly).toBe(true);
396
+ expect(session.secure).toBe(true);
397
+ expect(session.sameSite).not.toBe('None');
398
+ }
399
+ });
400
+
401
+ test('SF-{slug}-03: no secrets in localStorage', async ({ page }) => {
402
+ await page.goto(process.env.TARGET_URL || 'http://localhost:3000');
403
+ const storage = await page.evaluate(() => {
404
+ const data = {};
405
+ for (let i = 0; i < localStorage.length; i++) {
406
+ const key = localStorage.key(i);
407
+ data[key] = localStorage.getItem(key);
408
+ }
409
+ return JSON.stringify(data);
410
+ });
411
+ expect(storage).not.toMatch(/sk-[a-zA-Z0-9]{20,}/);
412
+ expect(storage).not.toMatch(/eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}/);
413
+ });
414
+
415
+ test('SF-{slug}-04: source maps not accessible in production', async ({ page }) => {
416
+ const jsFiles = [];
417
+ page.on('response', (res) => {
418
+ if (res.url().endsWith('.js') && res.status() === 200) jsFiles.push(res.url());
419
+ });
420
+ await page.goto(process.env.TARGET_URL || 'http://localhost:3000', { waitUntil: 'networkidle' });
421
+ for (const js of jsFiles.slice(0, 5)) {
422
+ const mapRes = await page.request.get(js + '.map');
423
+ expect(mapRes.status()).not.toBe(200);
424
+ }
425
+ });
426
+
427
+ });
428
+ ```
429
+
430
+ ### Step 4 — Traceability
431
+
432
+ Each test name must include the finding ID from `security-findings-{slug}.json` (e.g., `SF-checkout-03`). This creates a traceable link: finding → regression test → CI pass/fail.
433
+
434
+ In `test-plan.md`, add a **Security regression coverage** section:
435
+
436
+ ```markdown
437
+ ## Security regression coverage
438
+
439
+ | Finding ID | Severity | Surface | Test file | Test name | Status |
440
+ |---|---|---|---|---|---|
441
+ | SF-checkout-01 | high | browser_exposure | tests/security-regression.test.js | CSP header present | ✓ passing |
442
+ | SF-checkout-03 | critical | secrets_crypto | tests/security-regression.test.js | no secrets in localStorage | ✓ passing |
443
+ ```
444
+
445
+ ### Step 5 — Verify all regression tests pass
446
+
447
+ Run the security regression tests. If any fail, it means the fix is incomplete — report in `test-plan.md` as `[fix-incomplete]` and route to `@dev`.
448
+
449
+ ### When to skip this phase
450
+
451
+ - No `security-findings-{slug}.json` exists — skip silently
452
+ - All findings have `severity: info` or `severity: low` — skip (not worth regression test maintenance)
453
+ - Project has no browser UI and all findings are code-level — skip Playwright tests, use unit/integration tests only
454
+
342
455
  ## Adjacent quality layers — opt-in by trigger
343
456
 
344
457
  Don't auto-load. Add only when the trigger fires. Full details: `.aioson/docs/tester/coverage-quality.md` § 6.