npm - olympus-ai - Versions diffs - 4.4.13 → 4.4.14 - Mend

olympus-ai 4.4.13 → 4.4.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/dist/hooks/registry.d.ts CHANGED Viewed

@@ -3,6 +3,24 @@
  *
  * Central registry for all hooks. Hooks are organized by event type
  * and sorted by priority (lower runs first).
+ *
+ * Reserved priority ranges (do NOT reuse without updating this table):
+ *
+ *  Priority  Owner / Purpose
+ *  --------  -------------------------------------------------------
+ *   5        SessionStart — early session initialization
+ *  10        SessionStart — standard session initialization
+ *  50        AgentTracking — PreToolUse, tracks Task tool invocations
+ *  70        LearningTool — discovery capture, tool-use learning
+ *  75        PlanLifecycle — plan creation events
+ *  76        PlanLifecycle — plan completion events
+ *  84-86     Test infrastructure (Group 1A) — reserved for U-002/U-003
+ *  90        LearningStop — session-end learning flush
+ *  92        DiscoveryCapture — PostToolUse discovery events
+ * 110        CompletePlan — plan completion and archival hook
+ * 115        CompletePlan — final plan state persistence
+ *
+ * Available gaps: 77-83, 87-89
  */
 import type { HookDefinition, HookEvent } from './types.js';
 /**

package/dist/hooks/registry.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/hooks/registry.ts"],"names":[],"mappings":"AAAA~~;;;;;GAKG~~;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAK5D;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,cAAc,GAAG,IAAI,CAMvD;AAED;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,SAAS,GAAG,cAAc,EAAE,CAEnE;AAED;;;;GAIG;AACH,wBAAgB,WAAW,IAAI,cAAc,EAAE,CAE9C;AAED;;;GAGG;AACH,wBAAgB,UAAU,IAAI,IAAI,CAEjC;AAED;;;;GAIG;AACH,wBAAgB,aAAa,IAAI,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAMtD"}
1	+ {"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/hooks/registry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAK5D;;;;;GAKG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,cAAc,GAAG,IAAI,CAMvD;AAED;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,SAAS,GAAG,cAAc,EAAE,CAEnE;AAED;;;;GAIG;AACH,wBAAgB,WAAW,IAAI,cAAc,EAAE,CAE9C;AAED;;;GAGG;AACH,wBAAgB,UAAU,IAAI,IAAI,CAEjC;AAED;;;;GAIG;AACH,wBAAgB,aAAa,IAAI,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAMtD"}

package/dist/hooks/registry.js CHANGED Viewed

@@ -3,6 +3,24 @@
  *
  * Central registry for all hooks. Hooks are organized by event type
  * and sorted by priority (lower runs first).
+ *
+ * Reserved priority ranges (do NOT reuse without updating this table):
+ *
+ *  Priority  Owner / Purpose
+ *  --------  -------------------------------------------------------
+ *   5        SessionStart — early session initialization
+ *  10        SessionStart — standard session initialization
+ *  50        AgentTracking — PreToolUse, tracks Task tool invocations
+ *  70        LearningTool — discovery capture, tool-use learning
+ *  75        PlanLifecycle — plan creation events
+ *  76        PlanLifecycle — plan completion events
+ *  84-86     Test infrastructure (Group 1A) — reserved for U-002/U-003
+ *  90        LearningStop — session-end learning flush
+ *  92        DiscoveryCapture — PostToolUse discovery events
+ * 110        CompletePlan — plan completion and archival hook
+ * 115        CompletePlan — final plan state persistence
+ *
+ * Available gaps: 77-83, 87-89
  */
 /** Map of event type to registered hooks */
 const hooks = new Map();

package/dist/hooks/registry.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/hooks/registry.ts"],"names":[],"mappings":"AAAA~~;;;;;GAKG~~;AAIH,4CAA4C;AAC5C,MAAM,KAAK,GAAqC,IAAI,GAAG,EAAE,CAAC;AAE1D;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAoB;IAC/C,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IAC/C,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtB,8CAA8C;IAC9C,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC;IACrE,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;AACpC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAgB;IAC/C,OAAO,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,WAAW;IACzB,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,UAAU;IACxB,KAAK,CAAC,KAAK,EAAE,CAAC;AAChB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa;IAC3B,MAAM,MAAM,GAAG,IAAI,GAAG,EAAqB,CAAC;IAC5C,KAAK,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,IAAI,KAAK,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;IACvC,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
1	+ {"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/hooks/registry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,4CAA4C;AAC5C,MAAM,KAAK,GAAqC,IAAI,GAAG,EAAE,CAAC;AAE1D;;;;;GAKG;AACH,MAAM,UAAU,YAAY,CAAC,IAAoB;IAC/C,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IAC/C,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtB,8CAA8C;IAC9C,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC;IACrE,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;AACpC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAgB;IAC/C,OAAO,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,WAAW;IACzB,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,UAAU;IACxB,KAAK,CAAC,KAAK,EAAE,CAAC;AAChB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa;IAC3B,MAAM,MAAM,GAAG,IAAI,GAAG,EAAqB,CAAC;IAC5C,KAAK,MAAM,CAAC,KAAK,EAAE,UAAU,CAAC,IAAI,KAAK,EAAE,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;IACvC,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}

package/dist/installer/index.d.ts CHANGED Viewed

@@ -24,7 +24,7 @@ export declare const HOOKS_DIR: string;
 export declare const SETTINGS_FILE: string;
 export declare const VERSION_FILE: string;
 /** Current version - MUST match package.json */
-export declare const VERSION = "4.4.13";
+export declare const VERSION = "4.4.14";
 /** Installation result */
 export interface InstallResult {
     success: boolean;

package/dist/installer/index.js CHANGED Viewed

@@ -40,7 +40,7 @@ export const HOOKS_DIR = join(CLAUDE_CONFIG_DIR, 'hooks');
 export const SETTINGS_FILE = join(CLAUDE_CONFIG_DIR, 'settings.json');
 export const VERSION_FILE = join(CLAUDE_CONFIG_DIR, '.olympus-version.json');
 /** Current version - MUST match package.json */
-export const VERSION = '4.4.13';
+export const VERSION = '4.4.14';
 /**
  * Read a content file from the resources/ directory.
  *

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "olympus-ai",
-  "version": "4.4.13",
+  "version": "4.4.14",
   "description": "Olympus: Multi-agent orchestration for Claude Code. Summon the gods of code.",
   "type": "module",
   "main": "dist/index.js",

package/resources/claude-md.md CHANGED Viewed

@@ -199,6 +199,50 @@ olympus learn --budget-status
 4. **Verify**: Check your todo list before declaring completion
 5. **Plan First**: For complex tasks, use Prometheus to create a plan
+## Terminal Output Formatting
+Plain paragraphs are invisible in the terminal. Use visual elements to create hierarchy.
+**Phase banners** — fenced with `---`, use `## 🚀` header:
+```text
+---
+## 🚀 Starting Phase Name
+**Key:** `value` — description
+---
+```
+**Action prompts** — ALWAYS use this pattern when the user must do something:
+```text
+---
+## 📌 Action Required
+Do the thing with `command`, then say **"done"**.
+> Secondary instructions go in a blockquote.
+---
+```
+**Status lines** — emoji-first for scannability:
+```text
+✅ Tests pass (3,360 / 3,360)
+⚠️ Missing optional config
+❌ Contract validation failed
+⏳ Generating artifacts...
+```
+**Emoji vocabulary:** 🚀 workflow start, 📌 action required, ✅ success, ⚠️ warning, ❌ error, 🔍 context/analysis, 📋 summary, 💡 recommendation
+**Rules:**
+1. Action prompts use `## 📌 Action Required` — never plain paragraphs
+2. Phase transitions get `---` fenced `##` headers with emoji
+3. File paths, commands, user inputs always in `inline code`
+4. Key action words in **bold**
+5. Secondary instructions in `> blockquotes`
+6. Action prompts go at the END of the response
+7. No emojis in headers written to files (breaks parsers)
 ## Background Task Execution
 For long-running operations, use `run_in_background: true`:

package/resources/rules/common/pathway-behaviors.md ADDED Viewed

@@ -0,0 +1,100 @@
+# Pathway Behaviors
+**Loading note**: The orchestrator loads this file at the start of each Construction unit and
+injects the section matching `checkpoint.pathway_type` into the agent context. Only the
+relevant section is injected — the agent sees its pathway section, not the full file.
+---
+## Bugfix Pathway
+These rules are non-negotiable. Every bugfix unit must satisfy all items in the quality gate
+before the unit is marked complete.
+### Autonomous Diagnosis
+Before writing any code change, the agent must:
+1. Reproduce the defect with a failing test or a documented reproduction recipe
+2. Identify the root cause — the specific code path, assumption, or edge case that causes
+   the failure
+3. Document the root cause in the `## Root Cause` section of the unit's `test-report.md`
+Do not write a fix before completing the diagnosis. A fix that addresses only the symptom
+will likely fail the quality gate.
+### Root Cause Analysis Requirement
+The `test-report.md` for a bugfix unit must contain a `## Root Cause` section with:
+- File path and line number(s) of the defect
+- A one-paragraph explanation of why the failure occurs
+- Confirmation that the fix targets the root cause, not a workaround
+### Missing Test Rule (Non-Negotiable)
+Every bugfix unit MUST produce at least one new test that:
+- Reproduces the original defect (fails before the fix, passes after)
+- Is placed in the appropriate test file for the affected module
+- Is named to clearly identify the defect it guards against
+This rule cannot be overridden via `allowFailures`. The engine blocks unit completion if
+`tests_total === 0` for any bugfix unit, regardless of options passed.
+### Quality Gate Checklist
+All items must be satisfied before the bugfix unit is marked complete:
+- [ ] Root cause identified and documented in `test-report.md`
+- [ ] Fix addresses the root cause (not just the symptom)
+- [ ] All existing tests still pass (zero regressions)
+- [ ] At least one new test reproduces the original defect
+- [ ] New test passes with the fix applied
+---
+## Optimization Pathway
+### Baseline Measurement
+Before making any code changes:
+1. Run the relevant benchmark or performance test
+2. Record the baseline metric (e.g., execution time, memory usage, throughput) in the
+   `## Baseline` section of the unit's `test-report.md`
+3. Note the test command used and any environment conditions that could affect the result
+Do not proceed with changes until the baseline is captured. A comparison without a baseline
+is not a valid optimization result.
+### Performance Comparison
+After applying changes:
+1. Run the same benchmark or performance test under the same conditions
+2. Record the result in the `## Performance Comparison` section of `test-report.md`:
+   - Baseline metric
+   - Post-change metric
+   - Delta (absolute and percentage)
+3. Confirm the improvement meets the acceptance criteria defined in the unit spec
+If performance has not improved, investigate before marking the unit complete.
+### Rollback Plan
+Document in `test-report.md` how to revert the optimization if it causes a regression in
+production:
+- Which files were changed
+- The prior behavior that would need to be restored
+- Any feature flags or config knobs that could disable the optimization without a deploy
+---
+## Standard / Enhancement / Greenfield Pathways
+No additional behavioral constraints apply beyond the standard rules defined in:
+- `resources/rules/construction/code-generation.md`
+- `resources/rules/construction/test-generation.md`
+Follow those rules as written. The orchestrator does not inject any additional context for
+these pathways.

package/resources/rules/common/terminal-formatting.md ADDED Viewed

@@ -0,0 +1,161 @@
+# Terminal Output Formatting Standards
+Claude Code renders markdown via an Ink/React TUI renderer with ANSI escape codes. Plain paragraphs are visually invisible — they blend into the response stream. These rules ensure critical information is scannable.
+## The Core Problem
+In a terminal, the user sees a continuous stream of white text. Only these elements create visual differentiation:
+| Element | Renders As | Use For |
+|---------|-----------|---------|
+| `# H1` / `## H2` | Colored text (red/orange) | Section breaks, banners |
+| `**bold**` | Bold weight | Key terms, labels |
+| `` `inline code` `` | Distinct background/color | Commands, file paths, values |
+| `> blockquote` | Indented with left border | Callouts, important notes |
+| `---` | Horizontal rule | Visual fences/separators |
+| Emojis | Colored glyphs | Status indicators, anchors |
+| Code blocks | Syntax-highlighted box | Code, structured data |
+**Rule: Anything the user must act on or notice MUST use at least two of these elements. Never deliver critical info as a plain paragraph.**
+---
+## Emoji Vocabulary (Standardized)
+Use consistently so users learn the visual language:
+| Emoji | Meaning | When to Use |
+|-------|---------|-------------|
+| 🚀 | Workflow start / launch | Welcome banners, phase kickoffs |
+| 📌 | Action required | User needs to do something NOW |
+| ✅ | Success / complete | Task done, tests pass, validation pass |
+| ⚠️ | Warning / attention | Non-blocking but important |
+| ❌ | Error / failure | Something broke, blocking issue |
+| 🔍 | Context / analysis | Showing inferred or extracted info |
+| 📋 | Summary / recap | Phase summaries, status reports |
+| 💡 | Recommendation / tip | AI suggestions, best practices |
+| ⏳ | In progress / waiting | Long-running operations |
+| 🔄 | Iteration / retry | Re-running, looping back |
+**Do NOT use emojis in markdown headers that will be written to files** (breaks TOCs and parsers). Emojis are for terminal display output only.
+---
+## Output Patterns
+### Pattern 1: Welcome / Phase Banner
+```markdown
+---
+## 🚀 Welcome to AI-DLC! Starting a new workflow for Group 1B
+**Workflow:** `group-1b-test-quality` — Test Quality & Traceability
+**Builds on:** Group 1A test infrastructure (confirmed ✅)
+---
+```
+Why it works: `---` fences create a visual box. `##` renders in color. Emoji grabs the eye. Bold labels + inline code for scannable key-value pairs.
+---
+### Pattern 2: Action Required (MOST IMPORTANT)
+Use this whenever the user needs to do something. This is the pattern that matters most.
+```markdown
+---
+## 📌 Action Required
+Fill in the `[Answer]:` tags in the file below, then say **"done"** to continue.
+> For multi-select questions, provide comma-separated letters (e.g., `A, B, E`).
+**File:** `aidlc-docs/group-1b-test-quality/inception/intent-questions.md`
+---
+```
+Why it works: The `📌 Action Required` header is an unmissable colored+emoji signal. The command uses `inline code` and `**bold**`. The secondary instruction lives in a `> blockquote` so it's visually subordinate but still distinct. File path is in code formatting.
+---
+### Pattern 3: Status / Progress Update
+```markdown
+✅ All 3,360 tests pass — Group 1A prerequisite confirmed.
+⏳ Generating intent questions file...
+```
+Why it works: Emoji-first lines create a scannable status log. Each line starts with a visual indicator.
+---
+### Pattern 4: Validation Summary
+```markdown
+## 📋 Validation Summary
+| Check | Status | Detail |
+|-------|--------|--------|
+| Tests pass | ✅ | 3,360 / 3,360 |
+| Prerequisites met | ✅ | Group 1A complete |
+| Manifest valid | ⚠️ | Missing `coverage-config.json` |
+```
+Why it works: Tables with emoji status create an at-a-glance dashboard. The `##` header separates it from surrounding text.
+---
+### Pattern 5: Error / Blocking Issue
+```markdown
+---
+## ❌ Blocking Issue
+**What failed:** Contract validation for `UserService.create()`
+**Why:** Missing required field `email` in test fixture at `tests/fixtures/users.ts:42`
+> Fix the fixture and re-run with **"retry"**.
+---
+```
+---
+### Pattern 6: Context Extraction / Inference Review
+```markdown
+## 🔍 Context Extracted from Your Input
+- **Problem:** AI-generated tests have documented failure modes...
+- **Primary users:** Olympus OSS users running AI-DLC workflows...
+- **Scope:** Four capabilities — anti-pattern detection, traceability...
+- **Success criteria:** Each capability integrates into the pipeline...
+> Please correct anything that is wrong, then say **"confirmed"**.
+```
+Why it works: The `🔍` signals "I'm showing you what I understood." Bold labels on each bullet make it scannable. The blockquote at the end is the action prompt.
+---
+## Anti-Patterns (Never Do This)
+| Bad | Good | Why |
+|-----|------|-----|
+| Plain paragraph for action prompts | `## 📌 Action Required` header | Paragraphs are invisible |
+| Burying instructions in the middle of text | Action prompt at the END, fenced with `---` | Users scan bottom-first for "what do I do now" |
+| Multiple action items in one paragraph | One action per bold line or bullet | Cognitive overload |
+| Emoji in file-output headers (`# 🔍 Intent`) | Emoji only in terminal display text | Breaks file parsers/TOCs |
+| Wall of unbroken text explaining context | Bullet list with `**bold labels**` | Scannable vs. readable |
+---
+## Quick Rules (For Skill Authors)
+1. **Every phase transition** gets a `---` fenced `##` header with emoji
+2. **Every action prompt** uses `## 📌 Action Required` pattern
+3. **Every status update** starts with an emoji indicator (✅ ⚠️ ❌ ⏳)
+4. **Every user-facing value** (file paths, commands, inputs) uses `inline code`
+5. **Every "what to do next"** instruction uses `**bold**` for the key action word
+6. **Never deliver critical information as a plain paragraph**
+7. **Action prompts go at the END** of the response, not buried in the middle

package/resources/rules/construction/build-and-test.md CHANGED Viewed

@@ -269,6 +269,64 @@ Create `aidlc-docs/construction/build-and-test/e2e-test-instructions.md`:
 ---
+## Step 6b: Run Smoke Tests
+Run the full test suite (or a targeted subset for bugfix pathway) and generate a structured
+test report that aggregates results across all units.
+**Scope parameter** — determined by `pathway_type`:
+| Scope | Pathway | What runs |
+|-------|---------|-----------|
+| `full` | standard, comprehensive, brownfield-enhancement, brownfield-refactor | Entire test suite |
+| `targeted` | bugfix | Only tests for changed units (scoped by file path filter) |
+| `summary` | minimal, optimization | Smoke pass — quick sanity check only |
+**Delegation**: Delegate test execution to `qa-tester` using the same pattern established
+in the Agent Delegation Strategy section. The orchestrator generates the scope instruction
+first, then delegates; after the agent reports, the orchestrator runs the test command
+independently to verify (see Orchestrator Verification Requirements above).
+**Output artifact**: `aidlc-docs/{workflow-id}/construction/build-and-test/test-report.md`
+Aggregate per-unit test reports from `aidlc-docs/{workflow-id}/construction/{unit}/testing/test-report.md`
+into the build-level report using this schema:
+```markdown
+# Test Report — Build and Test
+## Summary
+| Metric | Value |
+|--------|-------|
+| total_tests | [X] |
+| passed | [X] |
+| failed | [X] |
+| skipped | [X] |
+## Per-Unit Breakdown
+| Unit | Total | Passed | Failed | Skipped | Status |
+|------|-------|--------|--------|---------|--------|
+| [unit-id] | [X] | [X] | [X] | [X] | Pass/Fail |
+## Failure Details
+[For each failing test: unit, test name, file path, error message]
+## Remediation Guidance
+[Populated only when failures exist — links to the failing unit's test-report.md]
+\`\`\`
+**Critical failure gate**: If `tests_failed > 0` and `allowFailures` is not set:
+- Block workflow progression
+- Surface the failing unit's `test-report.md` path in the Remediation Guidance section
+- Do NOT proceed to Step 7 until all failures are resolved or `allowFailures` is explicitly
+  confirmed by the user
+---
 ## Step 7: Generate Test Summary
 Create `aidlc-docs/construction/build-and-test/build-and-test-summary.md`:

package/resources/rules/construction/code-generation.md CHANGED Viewed

@@ -341,5 +341,8 @@ When generating UI code (web, mobile, desktop), ensure elements are automation-f
 - All steps in unit code generation plan marked [x]
 - All unit stories implemented according to plan
 - All code and tests generated (tests will be executed in Build & Test phase)
+- After code generation completes for a unit, proceed to the **Test Generation** stage
+  (see `resources/rules/construction/test-generation.md`) before moving to the next unit
+  or Build & Test.
 - Deployment artifacts generated
 - Complete unit ready for build and verification

package/resources/rules/construction/test-generation.md ADDED Viewed

@@ -0,0 +1,82 @@
+# Test Generation - Detailed Steps
+## Overview
+This stage generates and runs tests for the current unit after code generation completes.
+- Agent responsible: `qa-tester` (primary) or `olympian` for test writing
+- Output artifact: `aidlc-docs/{workflowId}/construction/{unitId}/testing/test-report.md`
+## Prerequisites
+- Code generation must be complete (`code-summary.md` must exist at `aidlc-docs/{workflowId}/construction/{unitId}/code/code-summary.md`)
+- Unit files in scope are read from `code-summary.md`
+- If `code-summary.md` does not exist, halt and report to orchestrator before proceeding
+## Step 1 — Framework Detection (Hybrid)
+- **1a**: The engine stores the detected framework in `test_framework` on `ConstructionUnitProgress`
+- **1b**: Agent independently verifies: read `package.json`, `vitest.config.*`, `jest.config.*` at project root
+- **1c**: If engine value and agent value disagree, agent value wins; log the discrepancy
+Known frameworks and their test commands:
+| Framework | Test Command |
+|-----------|-------------|
+| `vitest` | `npx vitest run` |
+| `jest` | `npx jest` |
+| `mocha` | `npx mocha` |
+| Unknown | Ask user before proceeding |
+## Step 2 — Determine Test Types (Auditable Criteria)
+Evaluate each criterion explicitly and record which test types apply in `test-report.md`:
+- **Unit tests**: Required for all pure functions, class methods, utilities. File naming: `*.test.ts` or `*.spec.ts` co-located with source.
+- **Integration tests**: Required when the unit touches 2 or more modules, external APIs, databases, or file I/O. Placed in `tests/integration/`.
+- **E2E tests**: Required only when the unit includes a user-facing entry point (HTTP endpoint, CLI command, UI page). Placed in `tests/e2e/`.
+## Step 3 — Generate Tests
+- Scope: only modify or create files listed in `code-summary.md`'s "Files created/modified" sections
+- Do NOT modify files from other units
+- Follow existing test file conventions in the project (import style, describe/it structure, mock patterns)
+- Use `data-testid` attributes for UI component tests
+## Step 4 — Run Tests
+- Execute the framework test command for the unit's files only (scope by file path filter where possible)
+- Capture: total count, passed count, failed count
+- Write results into `test-report.md`
+## Step 5 — Failure Handling
+- On first failure: attempt one automated fix per failing test (fix the test or the implementation; prefer fixing the test unless the implementation has a clear bug)
+- On second failure: attempt a second fix with a different strategy
+- After two failed attempts: escalate — write the failure details to `test-report.md` and set `tests_failed` count; do NOT attempt a third fix
+- Escalation message format: surface to the orchestrator with file path, test name, error message
+## Engine Gating Rules
+- The engine blocks unit completion if `tests_total === 0` (no tests detected)
+- The engine blocks unit completion if `tests_failed > 0`
+- Both blocks can be overridden by setting `allowFailures: true` in `TestGenerationOptions`
+- Override must be logged in `test-report.md` under the `## Override` section
+## Code Modification Scope
+- The agent may ONLY modify files listed in `code-summary.md` for this unit
+- `code-summary.md` is at: `aidlc-docs/{workflowId}/construction/{unitId}/code/code-summary.md`
+- If `code-summary.md` does not exist, halt and report to orchestrator before proceeding
+## Output Artifact
+- Path: `aidlc-docs/{workflowId}/construction/{unitId}/testing/test-report.md`
+- Must exist before the unit is marked complete
+## Completion Criteria
+- `test-report.md` written with actual counts (not placeholders)
+- `tests_total > 0`
+- `tests_failed === 0` (or override documented)
+- `ConstructionUnitProgress.stages['test-generation'].status === 'completed'`

package/resources/skills/continue/SKILL.md CHANGED Viewed

@@ -138,6 +138,8 @@ If `current_phase === 'construction'`:
 - Check `construction_units` for the active unit
 - Determine which design stage is `in_progress` or `not_started`
 - Resume from that point
+- If a `construction_units` entry has `stages['test-generation'].status === 'in_progress'` or `test_generation_status === 'in_progress'`, resume at test-generation for that unit
+- Note: test-generation runs after code-generation for each unit; check `test_generation_status` in the unit progress
 ### 4d. Other Phases
@@ -163,6 +165,7 @@ Based on the resume point determined in Step 4, read the corresponding rule file
 | nfr-design | `~/.claude/olympus/rules/construction/nfr-design.md` |
 | infrastructure-design | `~/.claude/olympus/rules/construction/infrastructure-design.md` |
 | code-generation | `~/.claude/olympus/rules/construction/code-generation.md` |
+| test-generation | `~/.claude/olympus/rules/construction/test-generation.md` |
 ---
@@ -217,6 +220,7 @@ Wait for user response before proceeding.
 | infrastructure-design | `oracle-medium` |
 | code-generation (backend) | `olympian` or `olympian-high` |
 | code-generation (frontend) | `frontend-engineer` or `frontend-engineer-high` |
+| test-generation | `qa-tester` |
 | build-and-test | `qa-tester` |
 ### If user chose B (Review)