@exaudeus/workrail 3.13.0 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/application/services/validation-engine.js +4 -9
  2. package/dist/application/services/workflow-compiler.js +4 -6
  3. package/dist/console/assets/index-BZYIjrzJ.js +28 -0
  4. package/dist/console/assets/index-OLCKbDdm.css +1 -0
  5. package/dist/console/index.html +2 -2
  6. package/dist/engine/engine-factory.js +2 -2
  7. package/dist/engine/types.d.ts +1 -1
  8. package/dist/manifest.json +63 -63
  9. package/dist/mcp/handlers/shared/request-workflow-reader.d.ts +5 -0
  10. package/dist/mcp/handlers/shared/request-workflow-reader.js +47 -2
  11. package/dist/mcp/handlers/v2-advance-core/assessment-consequences.d.ts +1 -1
  12. package/dist/mcp/handlers/v2-advance-core/assessment-consequences.js +4 -5
  13. package/dist/mcp/handlers/v2-advance-core/index.js +1 -1
  14. package/dist/mcp/handlers/v2-advance-core/outcome-blocked.js +1 -1
  15. package/dist/mcp/handlers/v2-execution/start.d.ts +1 -0
  16. package/dist/mcp/handlers/v2-execution/start.js +20 -1
  17. package/dist/mcp/handlers/v2-workflow.d.ts +23 -0
  18. package/dist/mcp/handlers/v2-workflow.js +177 -10
  19. package/dist/mcp/output-schemas.d.ts +202 -8
  20. package/dist/mcp/output-schemas.js +38 -11
  21. package/dist/mcp/server.js +48 -1
  22. package/dist/mcp/tool-descriptions.js +17 -9
  23. package/dist/mcp/v2/tools.d.ts +6 -0
  24. package/dist/mcp/v2/tools.js +2 -0
  25. package/dist/mcp/workflow-protocol-contracts.js +5 -1
  26. package/dist/types/workflow-definition.d.ts +2 -2
  27. package/dist/v2/infra/local/workspace-anchor/index.js +4 -1
  28. package/dist/v2/usecases/console-routes.js +49 -1
  29. package/dist/v2/usecases/console-service.d.ts +1 -0
  30. package/dist/v2/usecases/console-service.js +4 -1
  31. package/dist/v2/usecases/console-types.d.ts +12 -0
  32. package/dist/v2/usecases/worktree-service.js +55 -7
  33. package/package.json +3 -2
  34. package/spec/authoring-spec.json +91 -3
  35. package/spec/workflow-tags.json +132 -0
  36. package/spec/workflow.schema.json +411 -97
  37. package/workflows/adaptive-ticket-creation.json +40 -22
  38. package/workflows/architecture-scalability-audit.json +65 -31
  39. package/workflows/bug-investigation.agentic.v2.json +36 -14
  40. package/workflows/coding-task-workflow-agentic.json +50 -38
  41. package/workflows/coding-task-workflow-agentic.lean.v2.json +124 -37
  42. package/workflows/coding-task-workflow-agentic.v2.json +90 -30
  43. package/workflows/cross-platform-code-conversion.v2.json +168 -48
  44. package/workflows/document-creation-workflow.json +47 -17
  45. package/workflows/documentation-update-workflow.json +8 -8
  46. package/workflows/intelligent-test-case-generation.json +2 -2
  47. package/workflows/learner-centered-course-workflow.json +267 -267
  48. package/workflows/mr-review-workflow.agentic.v2.json +81 -14
  49. package/workflows/personal-learning-materials-creation-branched.json +175 -175
  50. package/workflows/presentation-creation.json +159 -159
  51. package/workflows/production-readiness-audit.json +54 -15
  52. package/workflows/relocation-workflow-us.json +44 -35
  53. package/workflows/routines/tension-driven-design.json +1 -1
  54. package/workflows/scoped-documentation-workflow.json +25 -25
  55. package/workflows/test-artifact-loop-control.json +1 -2
  56. package/workflows/ui-ux-design-workflow.json +327 -0
  57. package/workflows/workflow-diagnose-environment.json +1 -1
  58. package/workflows/workflow-for-workflows.json +507 -484
  59. package/workflows/workflow-for-workflows.v2.json +90 -18
  60. package/workflows/wr.discovery.json +112 -30
  61. package/dist/console/assets/index-DW78t31j.css +0 -1
  62. package/dist/console/assets/index-EsSXrC_a.js +0 -28
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "id": "relocation-workflow-us",
3
- "name": "US Relocation Decision Workflow (Evidence-Driven AreaSpec Ranked Dossier)",
3
+ "name": "US Relocation Decision Workflow (Evidence-Driven \u2022 AreaSpec \u2022 Ranked Dossier)",
4
4
  "version": "1.0.0",
5
- "description": "An evidence-driven relocation workflow for the United States. Discovers preferences, generates a broad candidate pool with anti-anchoring discipline, screens with strict caps, deep-dives a shortlist, and produces a ranked dossier with per-location profiles and an explainable weighted ranking.",
5
+ "description": "Use this to evaluate US cities or regions for a potential relocation. Discovers your preferences, generates candidate areas, screens them, and produces a ranked dossier with evidence.",
6
6
  "recommendedPreferences": {
7
7
  "recommendedAutonomy": "guided",
8
8
  "recommendedRiskPolicy": "conservative"
@@ -20,14 +20,14 @@
20
20
  ],
21
21
  "metaGuidance": [
22
22
  "DEFAULT BEHAVIOR: self-execute with tools. Ask the user only for true preferences, real confirmations, and any external context you cannot find yourself.",
23
- "V2 DURABILITY: use output.notesMarkdown and explicit context variables as the durable record. RELOCATION_DOSSIER.md and profile docs are human-facing artifacts they are NOT required workflow memory and are never read back for routing.",
23
+ "V2 DURABILITY: use output.notesMarkdown and explicit context variables as the durable record. RELOCATION_DOSSIER.md and profile docs are human-facing artifacts \u2014 they are NOT required workflow memory and are never read back for routing.",
24
24
  "ANTI-ANCHORING: generate a broad pool first; screen second; deep-dive only the shortlist. Do not deep-dive a single favorite area early.",
25
25
  "AREA BOUNDARIES: every candidate must have an AreaSpec before entering the pool. Use areaId = <candidateType>-<slug(displayName)>-<sortedStateCodes> (e.g. metro-raleigh-durham-nc). Never switch a candidate's boundary mid-run without logging it.",
26
26
  "CLAIMS LEDGER: every key claim about a location must include source (URL or citation), retrievedAt (date), and confidenceGrade (High/Medium/Low). If a claim cannot be sourced, grade it Low.",
27
27
  "MISSING DATA: when a data point is unavailable, record it as Unknown and apply the chosen missingDataPolicy consistently. Never silently assume a value for an Unknown.",
28
28
  "ARTIFACTS: try to write files (RELOCATION_DOSSIER.md, relocation-profiles/<slug>.md). If file writing is unavailable, paste full canonical content in chat and treat that as the record.",
29
29
  "MODULES: activate only sections relevant to activeModules. Do not include placeholder sections for inactive modules.",
30
- "SCREENING CAPS: first-pass screening must stay fast dealbreakers plus top weighted criteria only, strict claim and time caps. Deep research belongs in Phase 6 deep dives.",
30
+ "SCREENING CAPS: first-pass screening must stay fast \u2014 dealbreakers plus top weighted criteria only, strict claim and time caps. Deep research belongs in Phase 6 deep dives.",
31
31
  "NON-OBVIOUS CANDIDATES: non-obvious = not in userTopOfMind AND not in top-100 US metros. If you cannot map a candidate to the top-100 list deterministically, mark obviousness Unknown and do not count it toward non-obvious requirements."
32
32
  ],
33
33
  "steps": [
@@ -37,18 +37,18 @@
37
37
  "promptBlocks": {
38
38
  "goal": "Establish the scope and structure for this relocation search, then discover calibrated preferences and a stable weight model before any research begins.",
39
39
  "constraints": [
40
- "Do not start researching candidates yet preferences and boundary rules must be locked first.",
40
+ "Do not start researching candidates yet \u2014 preferences and boundary rules must be locked first.",
41
41
  "Activate only modules the user actually needs; do not load everything by default.",
42
42
  "If the user is unsure about weights, offer the MaxDiff helper before finalizing."
43
43
  ],
44
44
  "procedure": [
45
- "Step 1 Confirm scope and initialize artifacts. Confirm this is a US-only v1 relocation search. Ask for userTopOfMind (0-10 areas the user already has in mind; empty is fine). Initialize RELOCATION_DOSSIER.md with sections: User Context & Modules, Boundary & Definitions, Preferences (Draft), Constraints & Dealbreakers, Missing Data Policy, Sources Strategy, Candidate Pool, Screened Candidates, Screening Claims Ledger, Baseline Flags (Not Scored), Red Flag Gate Decisions (append-only), Shortlist, Profiles Index, Comparison & Ranking, Decision Log (append-only). Create the relocation-profiles/ directory.",
46
- "Step 2 Capture user context. Ask about and record: timelineToMove (0-3 months / 3-12 months / 12+ months), householdProfile (single / couple / family with kids / multi-generational), housingPlan (rent/buy/either and budget range), workConstraints (remote/hybrid/onsite; time zones allowed), geoExclusions (states or regions to exclude).",
47
- "Step 3 Select modules. Present the module list and activate all that apply: kids/schools, commute, transit, climate-risk, healthcare-access, career-job-market, outdoors, nightlife-arts, safety, taxes, diversity-community, disability-accessibility, amenities-errands, air-quality, noise, internet-infra. Record as activeModules.",
48
- "Step 4 Lock boundary rules. Set candidateType (default: metro; options: city, county, custom). Record the AreaSpec model: areaId = <candidateType>-<slug(displayName)>-<sortedStateCodes>. For metro candidates, record the definition source and treat the full metro area as the boundary (not just the city). For custom areas (v1), use radius mode: center (place + stateCode) + radiusMiles. Update RELOCATION_DOSSIER.md Boundary & Definitions section.",
49
- "Step 5 Elicit preferences. Ask about: hard constraints (must-have geography, climate, budget, job, family, health), anti-goals (explicit non-goals), dealbreakers. Draft 6-10 weighted criteria across active modules; weights must sum to 100. If the user is unsure, start with equal-weight draft and offer MaxDiff.",
50
- "Step 6 Optional MaxDiff weight derivation. Ask: 'Do you want help deriving weights using Most/Least comparisons?' If yes: build deterministic rotation sets (N<=7: 3 sets of 4; N>=8: 4 sets of 5). For each set ask which criterion is MOST important and which is LEAST. Derive weights: raw[c] = mostCount[c] - leastCount[c]; shifted[c] = raw[c] - min(raw) + 1; weight[c] = round(shifted[c] / sum(shifted) * 100); adjust largest weight so sum = exactly 100. Allow one small tweak pass (up to 2 weights adjusted, then re-normalize).",
51
- "Step 7 Calibration deck. Generate 8-12 diverse US location archetypes (dense transit metro, college town, mountain small city, coastal mid-size, sunbelt suburb, rust-belt revival city, DC-adjacent, etc.). For each: 2-3 sentences on lifestyle and tradeoffs, who it fits, who it frustrates. Ask the user to rank top 3 and bottom 3 and name 1-2 surprises. Update weights and constraints if calibration reveals new signal. Record derivedSignals (densityLeaning, climateLeaning, regionLeaning).",
45
+ "Step 1 \u2014 Confirm scope and initialize artifacts. Confirm this is a US-only v1 relocation search. Ask for userTopOfMind (0-10 areas the user already has in mind; empty is fine). Initialize RELOCATION_DOSSIER.md with sections: User Context & Modules, Boundary & Definitions, Preferences (Draft), Constraints & Dealbreakers, Missing Data Policy, Sources Strategy, Candidate Pool, Screened Candidates, Screening Claims Ledger, Baseline Flags (Not Scored), Red Flag Gate Decisions (append-only), Shortlist, Profiles Index, Comparison & Ranking, Decision Log (append-only). Create the relocation-profiles/ directory.",
46
+ "Step 2 \u2014 Capture user context. Ask about and record: timelineToMove (0-3 months / 3-12 months / 12+ months), householdProfile (single / couple / family with kids / multi-generational), housingPlan (rent/buy/either and budget range), workConstraints (remote/hybrid/onsite; time zones allowed), geoExclusions (states or regions to exclude).",
47
+ "Step 3 \u2014 Select modules. Present the module list and activate all that apply: kids/schools, commute, transit, climate-risk, healthcare-access, career-job-market, outdoors, nightlife-arts, safety, taxes, diversity-community, disability-accessibility, amenities-errands, air-quality, noise, internet-infra. Record as activeModules.",
48
+ "Step 4 \u2014 Lock boundary rules. Set candidateType (default: metro; options: city, county, custom). Record the AreaSpec model: areaId = <candidateType>-<slug(displayName)>-<sortedStateCodes>. For metro candidates, record the definition source and treat the full metro area as the boundary (not just the city). For custom areas (v1), use radius mode: center (place + stateCode) + radiusMiles. Update RELOCATION_DOSSIER.md Boundary & Definitions section.",
49
+ "Step 5 \u2014 Elicit preferences. Ask about: hard constraints (must-have geography, climate, budget, job, family, health), anti-goals (explicit non-goals), dealbreakers. Draft 6-10 weighted criteria across active modules; weights must sum to 100. If the user is unsure, start with equal-weight draft and offer MaxDiff.",
50
+ "Step 6 \u2014 Optional MaxDiff weight derivation. Ask: 'Do you want help deriving weights using Most/Least comparisons?' If yes: build deterministic rotation sets (N<=7: 3 sets of 4; N>=8: 4 sets of 5). For each set ask which criterion is MOST important and which is LEAST. Derive weights: raw[c] = mostCount[c] - leastCount[c]; shifted[c] = raw[c] - min(raw) + 1; weight[c] = round(shifted[c] / sum(shifted) * 100); adjust largest weight so sum = exactly 100. Allow one small tweak pass (up to 2 weights adjusted, then re-normalize).",
51
+ "Step 7 \u2014 Calibration deck. Generate 8-12 diverse US location archetypes (dense transit metro, college town, mountain small city, coastal mid-size, sunbelt suburb, rust-belt revival city, DC-adjacent, etc.). For each: 2-3 sentences on lifestyle and tradeoffs, who it fits, who it frustrates. Ask the user to rank top 3 and bottom 3 and name 1-2 surprises. Update weights and constraints if calibration reveals new signal. Record derivedSignals (densityLeaning, climateLeaning, regionLeaning).",
52
52
  "Capture these context variables: activeModules, candidateType, userTopOfMind, timelineToMove, householdProfile, housingPlan, workConstraints, geoExclusions, dealbreakers, weights (array of {criterion, weight}), weightsCount, derivedSignals."
53
53
  ],
54
54
  "verify": [
@@ -67,17 +67,17 @@
67
67
  "promptBlocks": {
68
68
  "goal": "Lock the decision mechanics and gate parameters before any candidate research begins. These policies govern how ambiguity, missing data, and diversity requirements are handled throughout the workflow.",
69
69
  "constraints": [
70
- "Every policy must be explicit no implicit defaults allowed past this gate.",
70
+ "Every policy must be explicit \u2014 no implicit defaults allowed past this gate.",
71
71
  "The user must confirm these settings before Phase 3 begins."
72
72
  ],
73
73
  "procedure": [
74
- "Step 1 Missing data policy. Ask the user to choose one: (a) neutral Unknown scores 0.5; (b) penalize Unknown scores 0.25; (c) followup_required Unknown scores 0.5 AND candidates with Unknown on any criterion with weight >= 15 are ineligible for the top 3. Record as missingDataPolicy.",
75
- "Step 2 Intake completeness check. Confirm you have enough context to set dealbreakers and weights. If not, note missingInputs and resolve before proceeding.",
76
- "Step 3 Anti-anchoring gate parameters. Propose defaults and ask the user to confirm or adjust: minCandidatePool (default 20), minNonObviousCandidates (default 6), minCoverageRegions (default 3), minCoverageClimateBands (default 2).",
77
- "Step 4 Shortlist range. Propose defaults and ask the user to confirm or adjust: shortlistMin (default 8), shortlistMax (default 12).",
78
- "Step 5 Screening caps. Propose defaults and ask the user to confirm or adjust: screeningTopCriteriaCount (default 3 screen dealbreakers + top N weighted criteria only), screeningMaxClaimsPerCandidate (default 3), screeningTimeboxMinutesPerCandidate (default 5), screeningBatchSize (default 10).",
79
- "Step 6 Discovery caps. Propose defaults and ask the user to confirm or adjust: perSourceCandidateCap (default 8 cap per curated-list source to avoid editorial bias).",
80
- "Step 7 Baseline flags caps. Propose defaults and ask the user to confirm or adjust: baselineMaxFlagsPerCandidate (default 2), baselineMaxSourcesPerFlag (default 1), baselineTimeboxMinutesPerCandidate (default 2).",
74
+ "Step 1 \u2014 Missing data policy. Ask the user to choose one: (a) neutral \u2014 Unknown scores 0.5; (b) penalize \u2014 Unknown scores 0.25; (c) followup_required \u2014 Unknown scores 0.5 AND candidates with Unknown on any criterion with weight >= 15 are ineligible for the top 3. Record as missingDataPolicy.",
75
+ "Step 2 \u2014 Intake completeness check. Confirm you have enough context to set dealbreakers and weights. If not, note missingInputs and resolve before proceeding.",
76
+ "Step 3 \u2014 Anti-anchoring gate parameters. Propose defaults and ask the user to confirm or adjust: minCandidatePool (default 20), minNonObviousCandidates (default 6), minCoverageRegions (default 3), minCoverageClimateBands (default 2).",
77
+ "Step 4 \u2014 Shortlist range. Propose defaults and ask the user to confirm or adjust: shortlistMin (default 8), shortlistMax (default 12).",
78
+ "Step 5 \u2014 Screening caps. Propose defaults and ask the user to confirm or adjust: screeningTopCriteriaCount (default 3 \u2014 screen dealbreakers + top N weighted criteria only), screeningMaxClaimsPerCandidate (default 3), screeningTimeboxMinutesPerCandidate (default 5), screeningBatchSize (default 10).",
79
+ "Step 6 \u2014 Discovery caps. Propose defaults and ask the user to confirm or adjust: perSourceCandidateCap (default 8 \u2014 cap per curated-list source to avoid editorial bias).",
80
+ "Step 7 \u2014 Baseline flags caps. Propose defaults and ask the user to confirm or adjust: baselineMaxFlagsPerCandidate (default 2), baselineMaxSourcesPerFlag (default 1), baselineTimeboxMinutesPerCandidate (default 2).",
81
81
  "Update RELOCATION_DOSSIER.md with all policies and caps. Capture all values as context variables: missingDataPolicy, minCandidatePool, minNonObviousCandidates, minCoverageRegions, minCoverageClimateBands, shortlistMin, shortlistMax, screeningTopCriteriaCount, screeningMaxClaimsPerCandidate, screeningTimeboxMinutesPerCandidate, screeningBatchSize, perSourceCandidateCap, baselineMaxFlagsPerCandidate, baselineMaxSourcesPerFlag, baselineTimeboxMinutesPerCandidate."
82
82
  ],
83
83
  "verify": [
@@ -99,10 +99,10 @@
99
99
  "Do not proceed to screening until the anti-anchoring gate passes."
100
100
  ],
101
101
  "procedure": [
102
- "Step 1 Sources strategy. Before generating candidates, document the sources strategy in RELOCATION_DOSSIER.md: Housing (Zillow + alternative), Taxes (state revenue sites), Climate normals (NOAA), Climate risk (FEMA flood maps), Employment (BLS / state labor stats), Transit/commute (local agencies), Air quality (AirNow/EPA), Noise (airport contour maps), Internet (FCC broadband map). Include only sources for active modules. Use this sources strategy as your research guide throughout candidate generation generate the pool from actual data, not from memory alone.",
103
- "Step 2 Generate candidates. Use the weight model and dealbreakers as the filter. For each candidate: assign a stable areaId, record the full AreaSpec (candidateType, displayName, stateCodes, and boundary definition), record why included, tag with candidateFacets (region, climateBand, sizeTier, taxRegime, airportAccess, outdoorsBiome as applicable). Fill coverage gaps deliberately include a mix of obvious and non-obvious candidates. Include at least minCandidatePool candidates total.",
104
- "Step 3 Anti-anchoring gate. Check: candidatePoolCount >= minCandidatePool, qualifyingNonObviousCandidateCount >= minNonObviousCandidates, coverageRegionsCount >= minCoverageRegions, coverageClimateBandsCount >= minCoverageClimateBands. A 'qualifying non-obvious' candidate is non-obvious AND plausibly passes dealbreakers. Record the top-100 list source used for non-obvious classification. If the gate fails, expand the pool by filling coverage gaps (prefer non-obvious candidates). Repeat until the gate passes.",
105
- "Step 4 Build screening batches. Divide candidatePool into batches of screeningBatchSize, preserving order. Each batch: { batchId, startIndex, endIndexExclusive, candidates }. Record screeningBatches and screeningBatchesCount.",
102
+ "Step 1 \u2014 Sources strategy. Before generating candidates, document the sources strategy in RELOCATION_DOSSIER.md: Housing (Zillow + alternative), Taxes (state revenue sites), Climate normals (NOAA), Climate risk (FEMA flood maps), Employment (BLS / state labor stats), Transit/commute (local agencies), Air quality (AirNow/EPA), Noise (airport contour maps), Internet (FCC broadband map). Include only sources for active modules. Use this sources strategy as your research guide throughout candidate generation \u2014 generate the pool from actual data, not from memory alone.",
103
+ "Step 2 \u2014 Generate candidates. Use the weight model and dealbreakers as the filter. For each candidate: assign a stable areaId, record the full AreaSpec (candidateType, displayName, stateCodes, and boundary definition), record why included, tag with candidateFacets (region, climateBand, sizeTier, taxRegime, airportAccess, outdoorsBiome as applicable). Fill coverage gaps deliberately \u2014 include a mix of obvious and non-obvious candidates. Include at least minCandidatePool candidates total.",
104
+ "Step 3 \u2014 Anti-anchoring gate. Check: candidatePoolCount >= minCandidatePool, qualifyingNonObviousCandidateCount >= minNonObviousCandidates, coverageRegionsCount >= minCoverageRegions, coverageClimateBandsCount >= minCoverageClimateBands. A 'qualifying non-obvious' candidate is non-obvious AND plausibly passes dealbreakers. Record the top-100 list source used for non-obvious classification. If the gate fails, expand the pool by filling coverage gaps (prefer non-obvious candidates). Repeat until the gate passes.",
105
+ "Step 4 \u2014 Build screening batches. Divide candidatePool into batches of screeningBatchSize, preserving order. Each batch: { batchId, startIndex, endIndexExclusive, candidates }. Record screeningBatches and screeningBatchesCount.",
106
106
  "Update RELOCATION_DOSSIER.md: Candidate Pool table (name, candidateType, region, why included, early risks/unknowns). Capture context variables: candidatePool, candidatePoolCount, nonObviousCandidateCount, qualifyingNonObviousCandidateCount, coverageRegionsCount, coverageClimateBandsCount, screeningBatches, screeningBatchesCount, discoverySourcesUsed, nonObviousDefinitionUsed."
107
107
  ],
108
108
  "verify": [
@@ -136,7 +136,7 @@
136
136
  {
137
137
  "id": "phase-4-baseline-flags",
138
138
  "title": "Phase 4b: Baseline Due Diligence (Not Scored)",
139
- "prompt": "Run a lightweight baseline pass on all Pass or Maybe candidates from screenResults.\n\nScope check only:\n- Climate risk (high-level: flood zone, wildfire, extreme heat)\n- Safety and crime (high-level: neighborhood-level variance)\n- Schools and healthcare access only if kids/schools or healthcare-access modules are active\n\nCaps (apply strictly):\n- At most baselineMaxFlagsPerCandidate flags per candidate\n- At most baselineMaxSourcesPerFlag sources per flag\n- At most baselineTimeboxMinutesPerCandidate minutes per candidate\n\nFor each Pass/Maybe candidate, produce 0 to baselineMaxFlagsPerCandidate baseline flags. Each flag: category (climate/safety/schools/healthcare/policy/other), severity (yellow/orange/red), one-sentence summary, source, retrievedAt, confidenceGrade. A red flag has severity = red.\n\nDo NOT compute or modify any scores here. Do NOT silently turn flags into dealbreakers or weights. If evidence is unclear, record Unknown.\n\nUpdate RELOCATION_DOSSIER.md: add Baseline Flags (Not Scored) section with per-candidate table.\n\nCapture context variables: baselineFlags ({ [candidateKey]: { flags: array, unknowns: string[] } }), redFlagCandidates (string[]), redFlagCount (number).",
139
+ "prompt": "Run a lightweight baseline pass on all Pass or Maybe candidates from screenResults.\n\nScope \u2014 check only:\n- Climate risk (high-level: flood zone, wildfire, extreme heat)\n- Safety and crime (high-level: neighborhood-level variance)\n- Schools and healthcare access \u2014 only if kids/schools or healthcare-access modules are active\n\nCaps (apply strictly):\n- At most baselineMaxFlagsPerCandidate flags per candidate\n- At most baselineMaxSourcesPerFlag sources per flag\n- At most baselineTimeboxMinutesPerCandidate minutes per candidate\n\nFor each Pass/Maybe candidate, produce 0 to baselineMaxFlagsPerCandidate baseline flags. Each flag: category (climate/safety/schools/healthcare/policy/other), severity (yellow/orange/red), one-sentence summary, source, retrievedAt, confidenceGrade. A red flag has severity = red.\n\nDo NOT compute or modify any scores here. Do NOT silently turn flags into dealbreakers or weights. If evidence is unclear, record Unknown.\n\nUpdate RELOCATION_DOSSIER.md: add Baseline Flags (Not Scored) section with per-candidate table.\n\nCapture context variables: baselineFlags ({ [candidateKey]: { flags: array, unknowns: string[] } }), redFlagCandidates (string[]), redFlagCount (number).",
140
140
  "requireConfirmation": false
141
141
  },
142
142
  {
@@ -145,17 +145,26 @@
145
145
  "promptFragments": [
146
146
  {
147
147
  "id": "no-red-flags",
148
- "when": { "var": "redFlagCount", "equals": 0 },
148
+ "when": {
149
+ "var": "redFlagCount",
150
+ "equals": 0
151
+ },
149
152
  "text": "No red flags were detected. Record redFlagDecision = 'fyi' and redFlagDecisionNotes = 'No red flags detected in baseline due diligence.' Proceed to Phase 5."
150
153
  },
151
154
  {
152
155
  "id": "has-red-flags",
153
- "when": { "var": "redFlagCount", "gt": 0 },
154
- "text": "Red flags were found. Summarize each: candidate name, category, one-line summary, source. Ask the user to choose exactly one action: (a) promote_to_dealbreakers — update the `dealbreakers` context variable AND the RELOCATION_DOSSIER.md Constraints section with the new/updated dealbreakers, then re-check screenResults for affected candidates; (b) add_weighted_criterion — ask the user how to weight it and which existing weights decrease so the `weights` array still sums to 100; (c) fyi — record the decision and move on. Record redFlagDecision and redFlagDecisionNotes. Append to RELOCATION_DOSSIER.md Red Flag Gate Decisions (append-only)."
156
+ "when": {
157
+ "var": "redFlagCount",
158
+ "gt": 0
159
+ },
160
+ "text": "Red flags were found. Summarize each: candidate name, category, one-line summary, source. Ask the user to choose exactly one action: (a) promote_to_dealbreakers \u2014 update the `dealbreakers` context variable AND the RELOCATION_DOSSIER.md Constraints section with the new/updated dealbreakers, then re-check screenResults for affected candidates; (b) add_weighted_criterion \u2014 ask the user how to weight it and which existing weights decrease so the `weights` array still sums to 100; (c) fyi \u2014 record the decision and move on. Record redFlagDecision and redFlagDecisionNotes. Append to RELOCATION_DOSSIER.md Red Flag Gate Decisions (append-only)."
155
161
  }
156
162
  ],
157
163
  "prompt": "Handle baseline red flags before selecting the shortlist.",
158
- "requireConfirmation": { "var": "redFlagCount", "gt": 0 }
164
+ "requireConfirmation": {
165
+ "var": "redFlagCount",
166
+ "gt": 0
167
+ }
159
168
  },
160
169
  {
161
170
  "id": "phase-5-shortlist",
@@ -207,16 +216,16 @@
207
216
  "goal": "Produce the final comparison matrix, explainable ranking, and a practical next-steps plan.",
208
217
  "constraints": [
209
218
  "The score formula must be applied consistently to every candidate.",
210
- "Unknowns must be disclosed in the ranking narrative never presented as if they were evidence-backed.",
219
+ "Unknowns must be disclosed in the ranking narrative \u2014 never presented as if they were evidence-backed.",
211
220
  "Baseline flags (not scored) are shown separately and do not change totalScore.",
212
221
  "One re-weight pass is allowed if the user says the ranking direction is wrong."
213
222
  ],
214
223
  "procedure": [
215
- "Step 1 Comparison matrix. Build a table in RELOCATION_DOSSIER.md: rows = shortlisted candidates, columns = weighted criteria. Mark Unknowns explicitly. Add a separate appendix table for baseline flags (Not Scored): red/orange flags per candidate.",
216
- "Step 2 Score each candidate. For each criterion, assign a normalized subscore: Strong fit = 1.0, Mixed/conditional = 0.5, Weak fit = 0.0. For Unknowns: neutral policy 0.5; penalize policy 0.25; followup_required policy 0.5 and flag candidate ineligible for top 3 if Unknown on any criterion with weight >= 15. Compute totalScore = sum(weight_i * subscore_i) for each candidate.",
217
- "Step 3 Ranking narrative. For each candidate write: 'Ranks #k because it wins on X and Y, loses on Z. Biggest tradeoff: ...' Make sure all Unknown subscores are called out explicitly in the narrative.",
218
- "Step 4 Re-weight gate. Ask the user: 'Does this ranking direction feel correct?' If not, allow one re-weight (user adjusts any number of criteria weights; must still sum to 100; re-run scoring). Record reweightUsed (true/false). Update Decision Log with any weight changes and rationale.",
219
- "Step 5 Next steps. Produce: suggested visit plan for top 2-4 candidates (what to validate in person), open questions per candidate (from Unknowns sections), pivot triggers (what evidence would change the ranking), optional neighborhood-level follow-ups if enough evidence exists. Update RELOCATION_DOSSIER.md with Next Steps and Pivot Triggers.",
224
+ "Step 1 \u2014 Comparison matrix. Build a table in RELOCATION_DOSSIER.md: rows = shortlisted candidates, columns = weighted criteria. Mark Unknowns explicitly. Add a separate appendix table for baseline flags (Not Scored): red/orange flags per candidate.",
225
+ "Step 2 \u2014 Score each candidate. For each criterion, assign a normalized subscore: Strong fit = 1.0, Mixed/conditional = 0.5, Weak fit = 0.0. For Unknowns: neutral policy \u2192 0.5; penalize policy \u2192 0.25; followup_required policy \u2192 0.5 and flag candidate ineligible for top 3 if Unknown on any criterion with weight >= 15. Compute totalScore = sum(weight_i * subscore_i) for each candidate.",
226
+ "Step 3 \u2014 Ranking narrative. For each candidate write: 'Ranks #k because it wins on X and Y, loses on Z. Biggest tradeoff: ...' Make sure all Unknown subscores are called out explicitly in the narrative.",
227
+ "Step 4 \u2014 Re-weight gate. Ask the user: 'Does this ranking direction feel correct?' If not, allow one re-weight (user adjusts any number of criteria weights; must still sum to 100; re-run scoring). Record reweightUsed (true/false). Update Decision Log with any weight changes and rationale.",
228
+ "Step 5 \u2014 Next steps. Produce: suggested visit plan for top 2-4 candidates (what to validate in person), open questions per candidate (from Unknowns sections), pivot triggers (what evidence would change the ranking), optional neighborhood-level follow-ups if enough evidence exists. Update RELOCATION_DOSSIER.md with Next Steps and Pivot Triggers.",
220
229
  "Capture context variables: ranking ([{name, totalScore, rank}]), unknownsImpactSummary, reweightUsed."
221
230
  ],
222
231
  "verify": [
@@ -41,7 +41,7 @@
41
41
  {
42
42
  "id": "step-generate-candidates",
43
43
  "title": "Step 3: Generate Candidates from Tensions",
44
- "prompt": "Generate design candidates that resolve the identified tensions differently.\n\nMANDATORY candidates:\n1. The simplest possible change that satisfies acceptance criteria. If the problem doesn't need an architectural solution, say so.\n2. Follow the existing repo pattern — adapt what the codebase already does for similar problems. Don't invent when you can adapt.\n\nAdditional candidates (1-2 more):\n- Each must resolve the identified tensions DIFFERENTLY, not just vary surface details\n- Each must be grounded in a real constraint or tradeoff, not an abstract perspective label\n- Consider philosophy conflicts: if the stated philosophy disagrees with repo patterns, one candidate could follow the stated philosophy and another could follow the established pattern\n\nFor each candidate, produce:\n- One-sentence summary of the approach\n- Which tensions it resolves and which it accepts\n- Boundary solved at, and why that boundary is the best fit\n- The specific failure mode you'd watch for\n- How it relates to existing repo patterns (follows / adapts / departs)\n- What you gain and what you give up\n- Impact surface beyond the immediate task\n- Scope judgment: too narrow / best-fit / too broad, with concrete evidence\n- Which philosophy principles it honors and which it conflicts with (by name)\n\nRules:\n- candidates must be genuinely different in shape, not just wording\n- if all candidates converge on the same approach, that's signal — note it honestly rather than manufacturing fake diversity\n- broader scope requires concrete evidence\n- cite specific files or patterns when they materially shape a candidate",
44
+ "prompt": "Generate design candidates that resolve the identified tensions differently.\n\nMANDATORY candidates:\n1. The simplest possible change that satisfies acceptance criteria. If the problem doesn't need an architectural solution, say so.\n2. Follow the existing repo pattern — adapt what the codebase already does for similar problems. Don't invent when you can adapt.\n\nAdditional candidates (1-2 more):\n- Each must resolve the identified tensions DIFFERENTLY, not just vary surface details\n- Each must be grounded in a real constraint or tradeoff, not an abstract perspective label\n- Consider philosophy conflicts: if the stated philosophy disagrees with repo patterns, one candidate could follow the stated philosophy and another could follow the established pattern\n\nFor each candidate, produce:\n- One-sentence summary of the approach\n- Which tensions it resolves and which it accepts\n- Boundary solved at, and why that boundary is the best fit\n- The specific failure mode you'd watch for\n- How it relates to existing repo patterns (follows / adapts / departs)\n- What you gain and what you give up\n- Impact surface beyond the immediate task\n- Scope judgment: too narrow / best-fit / too broad, with concrete evidence\n- Which philosophy principles it honors and which it conflicts with (by name)\n\nRules:\n- candidates must be genuinely different in shape, not just wording\n- if all candidates converge on the same approach, that's signal — note it honestly rather than manufacturing fake diversity\n- broader scope requires concrete evidence\n- cite specific files or patterns when they materially shape a candidate\n- specify each candidate at the level of concrete shape, not concept labels: 'tags' is not a candidate specification; 'per-workflow multi-labels drawn from a closed 9-value enum' is. If you find yourself using a concept label (tags, categories, events, hooks), you have not yet specified the candidate — name the data structure, the vocabulary or value set it uses, who maintains it, and how it is queried",
45
45
  "agentRole": "You are generating genuinely diverse design candidates grounded in real tensions.",
46
46
  "requireConfirmation": false
47
47
  },
@@ -2,7 +2,7 @@
2
2
  "id": "scoped-documentation-workflow",
3
3
  "name": "Scoped Documentation Workflow",
4
4
  "version": "2.0.0",
5
- "description": "Create documentation for a SINGLE, BOUNDED subject with strict scope enforcement. Perfect for: one class/component, one integration point, one mechanism, one architecture decision. Prevents documentation sprawl through continuous boundary validation.",
5
+ "description": "Use this to create documentation for a single, bounded subject \u2014 one class, one integration point, one mechanism, or one architecture decision. Enforces strict scope discipline to prevent documentation sprawl.",
6
6
  "clarificationPrompts": [
7
7
  "What specifically do you want to document? (feature, component, library, mechanism, interaction, architecture, process, etc.)",
8
8
  "Who will read this documentation? (team members, external users, new developers, architects, etc.)",
@@ -16,18 +16,18 @@
16
16
  "Agent can read files, analyze code, and write documentation"
17
17
  ],
18
18
  "metaGuidance": [
19
- "SCOPE IS LAW: Define a boundary and defend it. REFERENCE out-of-scope items (one sentence + link). Never EXPLAIN them. One violation leads to more protect the boundary.",
20
- "REFERENCE vs EXPLAIN: Good 'This uses CacheManager (see Cache Docs) to store results.' Bad 'CacheManager works by maintaining an LRU cache that...' Never explain out-of-scope internals.",
21
- "TEMPTATION LOGGING: Every time you almost explain an out-of-scope item, log it: 'Almost explained [X] but stopped out of scope.' Zero logs on a complex subject is a red flag.",
22
- "NOTES-FIRST DURABILITY: use output.notesMarkdown as the primary durable record. ANALYSIS.md, OUTLINE.md, SCOPE_CONTRACT.md are optional human-facing artifacts not required workflow memory.",
23
- "RUBRIC OVER VIBES: Score concrete dimensions with evidence sentences. Derive your next action from the rubric result not from a gut feeling about whether things seem okay.",
19
+ "SCOPE IS LAW: Define a boundary and defend it. REFERENCE out-of-scope items (one sentence + link). Never EXPLAIN them. One violation leads to more \u2014 protect the boundary.",
20
+ "REFERENCE vs EXPLAIN: Good \u2014 'This uses CacheManager (see Cache Docs) to store results.' Bad \u2014 'CacheManager works by maintaining an LRU cache that...' Never explain out-of-scope internals.",
21
+ "TEMPTATION LOGGING: Every time you almost explain an out-of-scope item, log it: 'Almost explained [X] but stopped \u2014 out of scope.' Zero logs on a complex subject is a red flag.",
22
+ "NOTES-FIRST DURABILITY: use output.notesMarkdown as the primary durable record. ANALYSIS.md, OUTLINE.md, SCOPE_CONTRACT.md are optional human-facing artifacts \u2014 not required workflow memory.",
23
+ "RUBRIC OVER VIBES: Score concrete dimensions with evidence sentences. Derive your next action from the rubric result \u2014 not from a gut feeling about whether things seem okay.",
24
24
  "DEFAULT BEHAVIOR: self-execute with tools. Only ask the user for approvals at explicit checkpoints or for external knowledge you genuinely cannot determine yourself."
25
25
  ],
26
26
  "steps": [
27
27
  {
28
28
  "id": "phase-0-reconnaissance-and-scope",
29
29
  "title": "Phase 0: Reconnaissance & Scope Definition",
30
- "prompt": "I want to create focused documentation for: \"[user's request]\"\n\n**How I work:** I handle most decisions autonomously and stop only for critical choices like scope approval. You can adjust anytime: say 'check with me more often' to add phase checkpoints, or 'just finish it' to minimize stops.\n\n**Step 1 Reconnaissance (2-3 minutes):**\n\nExplore the subject quickly to understand the landscape before proposing scope:\n\n- Locate the subject (files, system definitions, process docs)\n- Identify primary interfaces and entry points\n- Map immediate dependencies (one level deep)\n- Check for existing documentation to avoid duplication\n- Note related components and assess complexity (simple/moderate/complex)\n\nReconnaissance findings:\n- **Primary Subject:** [what was requested]\n- **Type:** [code/system/concept/process/interaction]\n- **Located at:** [file paths, system names, or description]\n- **Related Components:** [list with brief descriptions]\n- **Dependencies:** [key dependencies identified]\n- **Existing Docs:** [found/not found]\n- **Complexity:** [Simple/Moderate/Complex]\n\n**Step 2 Scope Proposal:**\n\nBased on reconnaissance, propose a scope contract:\n\n**Subject:** [One clear sentence describing what you're documenting]\n\n**IN SCOPE** (will be explained in detail):\n- [Specific component/feature/mechanism and its core behaviors]\n- [Common use cases and usage patterns]\n- [Integration points and interfaces]\n- [Important edge cases and design decisions]\n\n**OUT OF SCOPE** (will be referenced only, not explained):\n- [Dependency X] internals referenced as prerequisite\n- [Related feature Y] link to separate docs\n- [Historical context] unless specifically needed\n\n**BOUNDARY CONDITIONS** (where in-scope meets out-of-scope):\n- [Interface with System A]: document our side, reference their docs\n- [Integration with Component B]: document the contract, not their internals\n\n**Target Audience:** [who will read this]\n**Success Criteria:** Reader can [specific outcome]\n**Estimated Length:** ~[N] words\n\n**Does this scope look right?** Reply with approval or adjustments. If no response, I'll interpret as approval and proceed.\n\nAfter approval: I'll proceed with autonomous analysis, checking back only for critical questions or validation issues.",
30
+ "prompt": "I want to create focused documentation for: \"[user's request]\"\n\n**How I work:** I handle most decisions autonomously and stop only for critical choices like scope approval. You can adjust anytime: say 'check with me more often' to add phase checkpoints, or 'just finish it' to minimize stops.\n\n**Step 1 \u2014 Reconnaissance (2-3 minutes):**\n\nExplore the subject quickly to understand the landscape before proposing scope:\n\n- Locate the subject (files, system definitions, process docs)\n- Identify primary interfaces and entry points\n- Map immediate dependencies (one level deep)\n- Check for existing documentation to avoid duplication\n- Note related components and assess complexity (simple/moderate/complex)\n\nReconnaissance findings:\n- **Primary Subject:** [what was requested]\n- **Type:** [code/system/concept/process/interaction]\n- **Located at:** [file paths, system names, or description]\n- **Related Components:** [list with brief descriptions]\n- **Dependencies:** [key dependencies identified]\n- **Existing Docs:** [found/not found]\n- **Complexity:** [Simple/Moderate/Complex]\n\n**Step 2 \u2014 Scope Proposal:**\n\nBased on reconnaissance, propose a scope contract:\n\n**Subject:** [One clear sentence describing what you're documenting]\n\n**IN SCOPE** (will be explained in detail):\n- [Specific component/feature/mechanism and its core behaviors]\n- [Common use cases and usage patterns]\n- [Integration points and interfaces]\n- [Important edge cases and design decisions]\n\n**OUT OF SCOPE** (will be referenced only, not explained):\n- [Dependency X] internals \u2014 referenced as prerequisite\n- [Related feature Y] \u2014 link to separate docs\n- [Historical context] \u2014 unless specifically needed\n\n**BOUNDARY CONDITIONS** (where in-scope meets out-of-scope):\n- [Interface with System A]: document our side, reference their docs\n- [Integration with Component B]: document the contract, not their internals\n\n**Target Audience:** [who will read this]\n**Success Criteria:** Reader can [specific outcome]\n**Estimated Length:** ~[N] words\n\n**Does this scope look right?** Reply with approval or adjustments. If no response, I'll interpret as approval and proceed.\n\nAfter approval: I'll proceed with autonomous analysis, checking back only for critical questions or validation issues.",
31
31
  "requireConfirmation": true,
32
32
  "validationCriteria": [
33
33
  {
@@ -54,14 +54,14 @@
54
54
  "promptBlocks": {
55
55
  "goal": "Analyze the subject thoroughly within the approved scope boundary and score evidence quality before proceeding.",
56
56
  "constraints": [
57
- "Enforce scope during analysis: read and trace only in-scope files and behaviors. When you encounter an out-of-scope item, log it and move on do not analyze it.",
57
+ "Enforce scope during analysis: read and trace only in-scope files and behaviors. When you encounter an out-of-scope item, log it and move on \u2014 do not analyze it.",
58
58
  "Notes-first: record analysis findings in notesMarkdown, not in ANALYSIS.md as a required artifact.",
59
- "Derive the proceed/gather-more decision from the evidence rubric not from a gut feeling."
59
+ "Derive the proceed/gather-more decision from the evidence rubric \u2014 not from a gut feeling."
60
60
  ],
61
61
  "procedure": [
62
62
  "Investigation approach: (1) Map interfaces and public API surface (in-scope only). (2) Trace key execution flows and behaviors. (3) Identify important mechanisms and patterns. (4) Extract 5+ representative examples from code or usage. (5) Document integration points at the interface/contract level only. (6) Note design decisions and tradeoffs.",
63
- "Boundary enforcement (continuous) when you encounter something outside scope: REFERENCE IT ('This integrates with [System X] via [interface]'), LINK TO DOCS ('For [System X] details, see [link]'), DO NOT EXPLAIN IT, DO NOT ANALYZE IT, LOG THE TEMPTATION ('Almost analyzed [X] but stopped out of scope').",
64
- "Evidence rubric score all 4 dimensions before deciding to proceed. Score each dimension 0, 1, or 2 and write one evidence sentence for each.",
63
+ "Boundary enforcement (continuous) \u2014 when you encounter something outside scope: REFERENCE IT ('This integrates with [System X] via [interface]'), LINK TO DOCS ('For [System X] details, see [link]'), DO NOT EXPLAIN IT, DO NOT ANALYZE IT, LOG THE TEMPTATION ('Almost analyzed [X] but stopped \u2014 out of scope').",
64
+ "Evidence rubric \u2014 score all 4 dimensions before deciding to proceed. Score each dimension 0, 1, or 2 and write one evidence sentence for each.",
65
65
  "subjectBoundaryClarity: 0=boundary confirmed and clear, 1=likely correct but one area uncertain, 2=boundary still ambiguous",
66
66
  "behaviorCoverage: 0=all key behaviors identified with examples, 1=most behaviors covered with minor gaps, 2=significant behavior gaps remain",
67
67
  "examplesCollected: 0=5+ concrete examples extracted from subject, 1=2-4 examples found, 2=fewer than 2 verifiable examples",
@@ -70,7 +70,7 @@
70
70
  "If you find a critical scope issue (the subject is actually two distinct subjects, or a required dependency cannot be referenced without explaining it), surface this to the user before proceeding."
71
71
  ],
72
72
  "outputRequired": {
73
- "analysisFindings": "Subject overview, core behaviors/components, integration points, examples (5+), design decisions recorded in notesMarkdown",
73
+ "analysisFindings": "Subject overview, core behaviors/components, integration points, examples (5+), design decisions \u2014 recorded in notesMarkdown",
74
74
  "scopeBoundaryLog": "Every temptation stopped: what was encountered and why it was left out",
75
75
  "evidenceRubricScores": "All 4 dimensions scored with evidence sentences, plus gate decision (proceed or gather more with specifics)"
76
76
  },
@@ -90,18 +90,18 @@
90
90
  "goal": "Design the documentation structure and create a detailed outline that maps directly to the Phase 1 analysis findings.",
91
91
  "constraints": [
92
92
  "Every section must map to an in-scope item from Phase 1 analysis.",
93
- "Out-of-scope items belong only in a 'Related Documentation' section as references, never explanations.",
94
- "Pull all content points from Phase 1 findings do not invent new content that wasn't in the analysis.",
93
+ "Out-of-scope items belong only in a 'Related Documentation' section \u2014 as references, never explanations.",
94
+ "Pull all content points from Phase 1 findings \u2014 do not invent new content that wasn't in the analysis.",
95
95
  "Notes-first: record the outline in notesMarkdown, not only in OUTLINE.md."
96
96
  ],
97
97
  "procedure": [
98
- "Step 1 Choose structure type based on subject: Code subject: Overview How It Works Usage Guide Reference Edge Cases Related Docs. System/concept: Overview Components Interactions Integration Examples Related Docs. Process/workflow: Overview Steps Decision Points Examples Troubleshooting Related Docs. Adjust sections based on what analysis actually found.",
99
- "Step 2 Create detailed outline. For each section, specify: content points sourced from Phase 1 analysis (with evidence references), examples to include (from Phase 1 examples), approximate word count target, whether this section documents in-scope items or references out-of-scope ones.",
100
- "Step 3 Scope compliance check on the outline. Review every section and confirm: Does it map to an in-scope item? Does it require explaining any out-of-scope dependency? Can the content be written entirely from Phase 1 findings?",
98
+ "Step 1 \u2014 Choose structure type based on subject: Code subject: Overview \u2192 How It Works \u2192 Usage Guide \u2192 Reference \u2192 Edge Cases \u2192 Related Docs. System/concept: Overview \u2192 Components \u2192 Interactions \u2192 Integration \u2192 Examples \u2192 Related Docs. Process/workflow: Overview \u2192 Steps \u2192 Decision Points \u2192 Examples \u2192 Troubleshooting \u2192 Related Docs. Adjust sections based on what analysis actually found.",
99
+ "Step 2 \u2014 Create detailed outline. For each section, specify: content points sourced from Phase 1 analysis (with evidence references), examples to include (from Phase 1 examples), approximate word count target, whether this section documents in-scope items or references out-of-scope ones.",
100
+ "Step 3 \u2014 Scope compliance check on the outline. Review every section and confirm: Does it map to an in-scope item? Does it require explaining any out-of-scope dependency? Can the content be written entirely from Phase 1 findings?",
101
101
  "If any section requires explaining an out-of-scope item: convert it to a reference, remove it, or surface a scope re-negotiation to the user. If any section cannot be written from Phase 1 findings: flag what additional analysis is needed and return to Phase 1."
102
102
  ],
103
103
  "outputRequired": {
104
- "documentationStructure": "Section titles, word count targets, and content-source mapping recorded in notesMarkdown",
104
+ "documentationStructure": "Section titles, word count targets, and content-source mapping \u2014 recorded in notesMarkdown",
105
105
  "scopeComplianceConfirmation": "All sections confirmed to map to in-scope items",
106
106
  "totalEstimatedWordCount": "Total word count estimate",
107
107
  "analysisGaps": "Any gaps requiring return to Phase 1 (if none, state none)"
@@ -109,7 +109,7 @@
109
109
  "verify": [
110
110
  "Every section maps to an in-scope item from Phase 1 analysis",
111
111
  "Out-of-scope items only appear in 'Related Documentation' section",
112
- "All examples sourced from Phase 1 none invented",
112
+ "All examples sourced from Phase 1 \u2014 none invented",
113
113
  "Outline recorded in notesMarkdown"
114
114
  ]
115
115
  },
@@ -121,16 +121,16 @@
121
121
  "promptBlocks": {
122
122
  "goal": "Write the documentation section by section, enforcing scope boundaries continuously and logging all boundary decisions.",
123
123
  "constraints": [
124
- "Content must come from Phase 1 analysis and Phase 2 outline do not introduce new claims or examples that weren't in the analysis.",
124
+ "Content must come from Phase 1 analysis and Phase 2 outline \u2014 do not introduce new claims or examples that weren't in the analysis.",
125
125
  "After every paragraph: check scope compliance. Am I explaining something in scope or referencing something out of scope?",
126
126
  "Mark boundaries clearly in the text so readers know what's in vs out.",
127
127
  "Notes-first: the documentation file is the primary artifact; ANALYSIS.md or OUTLINE.md sidecar files are optional."
128
128
  ],
129
129
  "procedure": [
130
- "Writing approach section by section. For each section from the Phase 2 outline: (1) Draft content from outline and Phase 1 findings. (2) Add examples from analysis (complete, not fragments). (3) Check scope compliance inline. (4) Write boundary markers for out-of-scope references. (5) Log any temptation to explain an out-of-scope item.",
131
- "Scope boundary wording use these patterns. Good (reference only): 'This uses the CacheManager (see Cache Documentation) to store results.' Good: 'For authentication details, see Auth Service Documentation.' Bad (explaining out-of-scope): 'The CacheManager works by maintaining an in-memory LRU cache that...'",
132
- "After writing all sections final scope audit. Read through the complete draft and count: lines documenting in-scope items (vast majority), lines referencing out-of-scope items (minimal), lines explaining out-of-scope items (must be zero). Fix any violations before advancing to Phase 4.",
133
- "If you discover a critical inaccuracy (a claim you cannot verify from analysis), flag it for user confirmation in Phase 4 don't silently remove it.",
130
+ "Writing approach \u2014 section by section. For each section from the Phase 2 outline: (1) Draft content from outline and Phase 1 findings. (2) Add examples from analysis (complete, not fragments). (3) Check scope compliance inline. (4) Write boundary markers for out-of-scope references. (5) Log any temptation to explain an out-of-scope item.",
131
+ "Scope boundary wording \u2014 use these patterns. Good (reference only): 'This uses the CacheManager (see Cache Documentation) to store results.' Good: 'For authentication details, see Auth Service Documentation.' Bad (explaining out-of-scope): 'The CacheManager works by maintaining an in-memory LRU cache that...'",
132
+ "After writing all sections \u2014 final scope audit. Read through the complete draft and count: lines documenting in-scope items (vast majority), lines referencing out-of-scope items (minimal), lines explaining out-of-scope items (must be zero). Fix any violations before advancing to Phase 4.",
133
+ "If you discover a critical inaccuracy (a claim you cannot verify from analysis), flag it for user confirmation in Phase 4 \u2014 don't silently remove it.",
134
134
  "Write the documentation to a file. Suggested filename: [SUBJECT]_Documentation.md or similar. Record the path in mainDocumentationFile."
135
135
  ],
136
136
  "outputRequired": {
@@ -151,7 +151,7 @@
151
151
  {
152
152
  "id": "phase-4-validation-and-delivery",
153
153
  "title": "Phase 4: Adversarial Validation & Delivery",
154
- "prompt": "Now be the harshest critic before delivering.\n\n**VALIDATION RUBRIC score all 4 dimensions with evidence:**\n\n**1. Scope Compliance**\n- PASS: zero unexplained out-of-scope items in the documentation\n- PARTIAL: minor violations found and fixed autonomously\n- FAIL: violations found that cannot be fixed without user input\n- Evidence: [sentence + violation count]\n- Action: FAIL triggers user input; PARTIAL acceptable with disclosure\n\n**2. Completeness**\n- PASS: all in-scope items from the scope contract are documented\n- PARTIAL: minor gaps (non-critical items missing)\n- FAIL: significant in-scope items missing\n- Evidence: [sentence + gap count]\n- Action: FAIL triggers autonomous content addition then re-validate; PARTIAL acceptable with disclosure\n\n**3. Accuracy**\n- PASS: all technical claims verifiable from Phase 1 analysis\n- PARTIAL: minor uncertainties clearly marked in documentation\n- FAIL: key claims uncertain external confirmation required\n- Evidence: [sentence + uncertain claim count]\n- Action: FAIL triggers user checkpoint with specific questions; PARTIAL acceptable with disclosure\n\n**4. Clarity**\n- PASS: target audience can achieve the success criteria using this documentation\n- PARTIAL: some sections need clearer wording\n- FAIL: significant clarity gaps preventing successful use\n- Evidence: [sentence]\n- Action: FAIL triggers autonomous rewrite then re-validate; PARTIAL acceptable with disclosure\n\n---\n\n**Gate rules:**\n- scopeCompliance and completeness must both reach PASS for delivery\n- accuracy FAIL triggers user checkpoint list specific questions below\n- clarity FAIL triggers autonomous rewrite\n- accuracy PARTIAL and clarity PARTIAL proceed with disclosure\n\n**If all gates pass or are acceptable for delivery:**\n\nCreate SCOPE_MAP.md:\n```\n# Scope Map: [Subject]\n\n## Documented Here (In Scope)\n- [Item]: [brief description] see [Section]\n\n## Referenced Only (Out of Scope)\n- [Item]: [brief description] [link if available]\n\n## Integration Points\n- [Where this connects to other systems / interface contracts]\n\n## Related Documentation\n- [Links with descriptions]\n```\n\nOptionally create MAINTENANCE_NOTES.md with: when to update this documentation, which sections to review for accuracy, scope boundary reminders (what stays referenced-only).\n\n**DELIVERY SUMMARY:**\n\n- Main documentation: [path] (~[N] words)\n- Scope map: SCOPE_MAP.md\n- Validation results: scopeCompliance=[X], completeness=[X], accuracy=[X], clarity=[X]\n- Scope discipline: [N] temptations stopped, 0 violations in final delivery\n- Audience: [target audience]\n- Success criteria: Reader can [outcome]\n\n---\n\n**IF accuracy FAIL user checkpoint required:**\n\nBefore I can deliver, I need your input on accuracy:\n\n[List specific technical claims that need confirmation, with context for each]\n\nOnce you confirm, I'll finalize and deliver.",
154
+ "prompt": "Now be the harshest critic before delivering.\n\n**VALIDATION RUBRIC \u2014 score all 4 dimensions with evidence:**\n\n**1. Scope Compliance**\n- PASS: zero unexplained out-of-scope items in the documentation\n- PARTIAL: minor violations found and fixed autonomously\n- FAIL: violations found that cannot be fixed without user input\n- Evidence: [sentence + violation count]\n- Action: FAIL triggers user input; PARTIAL acceptable with disclosure\n\n**2. Completeness**\n- PASS: all in-scope items from the scope contract are documented\n- PARTIAL: minor gaps (non-critical items missing)\n- FAIL: significant in-scope items missing\n- Evidence: [sentence + gap count]\n- Action: FAIL triggers autonomous content addition then re-validate; PARTIAL acceptable with disclosure\n\n**3. Accuracy**\n- PASS: all technical claims verifiable from Phase 1 analysis\n- PARTIAL: minor uncertainties clearly marked in documentation\n- FAIL: key claims uncertain \u2014 external confirmation required\n- Evidence: [sentence + uncertain claim count]\n- Action: FAIL triggers user checkpoint with specific questions; PARTIAL acceptable with disclosure\n\n**4. Clarity**\n- PASS: target audience can achieve the success criteria using this documentation\n- PARTIAL: some sections need clearer wording\n- FAIL: significant clarity gaps preventing successful use\n- Evidence: [sentence]\n- Action: FAIL triggers autonomous rewrite then re-validate; PARTIAL acceptable with disclosure\n\n---\n\n**Gate rules:**\n- scopeCompliance and completeness must both reach PASS for delivery\n- accuracy FAIL triggers user checkpoint \u2014 list specific questions below\n- clarity FAIL triggers autonomous rewrite\n- accuracy PARTIAL and clarity PARTIAL proceed with disclosure\n\n**If all gates pass or are acceptable for delivery:**\n\nCreate SCOPE_MAP.md:\n```\n# Scope Map: [Subject]\n\n## Documented Here (In Scope)\n- [Item]: [brief description] \u2014 see [Section]\n\n## Referenced Only (Out of Scope)\n- [Item]: [brief description] \u2014 [link if available]\n\n## Integration Points\n- [Where this connects to other systems / interface contracts]\n\n## Related Documentation\n- [Links with descriptions]\n```\n\nOptionally create MAINTENANCE_NOTES.md with: when to update this documentation, which sections to review for accuracy, scope boundary reminders (what stays referenced-only).\n\n**DELIVERY SUMMARY:**\n\n- Main documentation: [path] (~[N] words)\n- Scope map: SCOPE_MAP.md\n- Validation results: scopeCompliance=[X], completeness=[X], accuracy=[X], clarity=[X]\n- Scope discipline: [N] temptations stopped, 0 violations in final delivery\n- Audience: [target audience]\n- Success criteria: Reader can [outcome]\n\n---\n\n**IF accuracy FAIL \u2014 user checkpoint required:**\n\nBefore I can deliver, I need your input on accuracy:\n\n[List specific technical claims that need confirmation, with context for each]\n\nOnce you confirm, I'll finalize and deliver.",
155
155
  "requireConfirmation": true,
156
156
  "validationCriteria": [
157
157
  {
@@ -59,8 +59,7 @@
59
59
  "assessmentConsequences": [
60
60
  {
61
61
  "when": {
62
- "dimensionId": "confidence",
63
- "equalsLevel": "low"
62
+ "anyEqualsLevel": "low"
64
63
  },
65
64
  "effect": {
66
65
  "kind": "require_followup",