murmur8 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.blueprint/features/feature_feedback-test/FEATURE_SPEC.md +229 -0
- package/.blueprint/features/feature_feedback-test/IMPLEMENTATION_PLAN.md +25 -0
- package/.blueprint/features/feature_feedback-test/handoff-alex.md +20 -0
- package/.blueprint/features/feature_feedback-test/handoff-cass.md +21 -0
- package/.blueprint/features/feature_feedback-test/handoff-nigel.md +20 -0
- package/.blueprint/features/feature_feedback-test/story-config-management.md +103 -0
- package/.blueprint/features/feature_feedback-test/story-parse-pipeline.md +65 -0
- package/.blueprint/features/feature_feedback-test/story-validation-normalisation.md +99 -0
- package/README.md +18 -0
- package/SKILL.md +35 -24
- package/package.json +1 -1
- package/src/commands/history.js +41 -2
- package/src/history.js +31 -0
- package/src/index.js +2 -1
- package/src/murm.js +50 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# Feature Specification — Feedback Module Test Suite
|
|
2
|
+
|
|
3
|
+
## 1. Feature Intent
|
|
4
|
+
**Why this feature exists.**
|
|
5
|
+
|
|
6
|
+
- **Problem being addressed:** The `src/feedback.js` module provides foundational logic for the agent feedback loop — schema validation, quality gate evaluation, key normalisation, config management, and feedback parsing. No test file directly imports and exercises this module's exported API. Existing tests (feature_feedback-loop, feature_compressed-feedback) re-implement helper logic inline rather than testing the real module, leaving the production code untested.
|
|
7
|
+
- **User need:** Developers maintaining or extending `src/feedback.js` need confidence that the exported functions behave correctly and that regressions are caught immediately by the test suite.
|
|
8
|
+
- **System alignment:** Per SYSTEM_SPEC.md:Section 7 (Implementation Rules), "tests are contracts" and the suite must be green before a feature is considered complete. Untested production modules violate this principle and expose the pipeline to silent breakage.
|
|
9
|
+
|
|
10
|
+
> This feature creates a direct unit-test harness for `src/feedback.js`, closing the coverage gap introduced by the feedback-loop and compressed-feedback features.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## 2. Scope
|
|
15
|
+
|
|
16
|
+
### In Scope
|
|
17
|
+
|
|
18
|
+
- Unit tests that `require('../src/feedback')` and call its exported functions directly
|
|
19
|
+
- Coverage of all exported functions:
|
|
20
|
+
- `validateFeedback(feedback)` — schema validation
|
|
21
|
+
- `normalizeFeedbackKeys(feedback)` — `rec` → `recommendation` normalisation
|
|
22
|
+
- `parseFeedbackFromOutput(output)` — regex extraction and JSON parsing
|
|
23
|
+
- `shouldPause(feedback, config)` — quality gate decision logic
|
|
24
|
+
- `getDefaultConfig()` — default config shape and values
|
|
25
|
+
- `readConfig()` — file read with fallback to defaults
|
|
26
|
+
- `writeConfig(config)` — file write
|
|
27
|
+
- `setConfigValue(key, value)` — validated config mutation
|
|
28
|
+
- `resetConfig()` — restores defaults
|
|
29
|
+
- `displayConfig()` — smoke test (no crash, correct output shape)
|
|
30
|
+
- File system isolation using `tmp` directories, matching the pattern established by `feature_feedback-loop.test.js`
|
|
31
|
+
- Edge cases: corrupt config file, missing config file, boundary rating values, both `rec` and `recommendation` keys present
|
|
32
|
+
|
|
33
|
+
### Out of Scope
|
|
34
|
+
|
|
35
|
+
- Testing agent prompt text (covered by feature_compressed-feedback)
|
|
36
|
+
- Integration tests spanning multiple modules (covered by feature_feedback-loop)
|
|
37
|
+
- Testing `displayConfig` output formatting exhaustively (smoke test only)
|
|
38
|
+
- Testing the insights calibration or issue-correlation logic (covered by feature_feedback-loop:Feedback Insights)
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## 3. Actors Involved
|
|
43
|
+
|
|
44
|
+
### Developer / Test Runner
|
|
45
|
+
|
|
46
|
+
- **Can do:** Run `node --test test/feature_feedback-test.test.js` to verify `src/feedback.js` behaviour
|
|
47
|
+
- **Cannot do:** Modify production code via test execution
|
|
48
|
+
|
|
49
|
+
### src/feedback.js (module under test)
|
|
50
|
+
|
|
51
|
+
- **Provides:** All exported functions listed in Section 2
|
|
52
|
+
- **Constrained by:** Existing call sites; test must not require API changes
|
|
53
|
+
|
|
54
|
+
### File System (test isolation)
|
|
55
|
+
|
|
56
|
+
- **Provides:** Temporary directories for config file read/write tests
|
|
57
|
+
- **Pattern:** `fs.mkdtempSync` setup / `fs.rmSync` teardown per describe block
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## 4. Behaviour Overview
|
|
62
|
+
|
|
63
|
+
### Happy Path: All Exported Functions Are Tested
|
|
64
|
+
|
|
65
|
+
1. Test file imports `src/feedback.js` module
|
|
66
|
+
2. Each exported function has one or more test cases covering:
|
|
67
|
+
- Correct inputs → expected outputs
|
|
68
|
+
- Boundary inputs → correct handling
|
|
69
|
+
- Invalid inputs → appropriate rejection or graceful degradation
|
|
70
|
+
3. File system tests use isolated `tmp` directories to avoid cross-test pollution
|
|
71
|
+
4. `process.chdir` is restored after each file-system test group
|
|
72
|
+
5. All tests pass green; CI accepts the file
|
|
73
|
+
|
|
74
|
+
### Alternative: Config File Corruption
|
|
75
|
+
|
|
76
|
+
1. Test writes deliberately malformed JSON to the config file path
|
|
77
|
+
2. `readConfig()` catches the parse error and returns defaults
|
|
78
|
+
3. No exception propagates; test asserts returned value equals `getDefaultConfig()`
|
|
79
|
+
|
|
80
|
+
### Alternative: Boundary Rating Validation
|
|
81
|
+
|
|
82
|
+
1. Tests cover ratings 1, 5 (valid boundaries) and 0, 6 (invalid outside range)
|
|
83
|
+
2. Tests cover `rating: 3.5` (non-integer, invalid) and `rating: 3` (integer, valid)
|
|
84
|
+
3. `validateFeedback` returns `{ valid: false, errors: [...] }` for all invalid cases
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## 5. State & Lifecycle Interactions
|
|
89
|
+
|
|
90
|
+
- **State-creating:** None — the test file does not introduce new runtime state
|
|
91
|
+
- **State-constrained:** Tests manage transient file system state (tmp directories)
|
|
92
|
+
- **Module lifecycle:** `require('../src/feedback')` is resolved once per test file run; config file paths are relative and resolved against `process.cwd()` which tests temporarily redirect
|
|
93
|
+
|
|
94
|
+
**Key constraint:** `src/feedback.js` uses a module-level constant `CONFIG_FILE = '.claude/feedback-config.json'` resolved relative to `process.cwd()`. Tests must `process.chdir(testDir)` before any call that reads/writes config, and restore `process.cwd()` in teardown.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## 6. Rules & Decision Logic
|
|
99
|
+
|
|
100
|
+
### Rule 1: Direct Module Import Required
|
|
101
|
+
|
|
102
|
+
- **Description:** Tests must import the real `src/feedback.js` rather than re-implementing its logic
|
|
103
|
+
- **Rationale:** Inline reimplementation does not catch bugs in production code
|
|
104
|
+
- **Inputs:** `require('../src/feedback')`
|
|
105
|
+
- **Outputs:** Live module reference
|
|
106
|
+
- **Type:** Structural constraint
|
|
107
|
+
|
|
108
|
+
### Rule 2: Isolated File System Per Describe Block
|
|
109
|
+
|
|
110
|
+
- **Description:** Each describe block that touches the config file must set up and tear down its own `tmp` directory
|
|
111
|
+
- **Inputs:** `fs.mkdtempSync`, `process.chdir`
|
|
112
|
+
- **Outputs:** Isolated state per describe block
|
|
113
|
+
- **Type:** Deterministic
|
|
114
|
+
|
|
115
|
+
### Rule 3: Boundary Coverage for Rating
|
|
116
|
+
|
|
117
|
+
- **Description:** Rating validation must be tested at values 0, 1, 3, 5, 6 and non-integer 3.5
|
|
118
|
+
- **Type:** Deterministic
|
|
119
|
+
|
|
120
|
+
### Rule 4: Dual-Key Normalisation Coverage
|
|
121
|
+
|
|
122
|
+
- **Description:** `normalizeFeedbackKeys` must be tested for: `rec` only, `recommendation` only, both present (recommendation wins), neither present
|
|
123
|
+
- **Type:** Deterministic
|
|
124
|
+
|
|
125
|
+
### Rule 5: Parse-and-Validate Pipeline
|
|
126
|
+
|
|
127
|
+
- **Description:** At least one test must chain `parseFeedbackFromOutput` → `normalizeFeedbackKeys` → `validateFeedback` to verify the end-to-end extraction path works against the real module
|
|
128
|
+
- **Type:** Integration within module boundary
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## 7. Dependencies
|
|
133
|
+
|
|
134
|
+
### System Components
|
|
135
|
+
|
|
136
|
+
- **src/feedback.js:** Module under test — no modifications required
|
|
137
|
+
- **node:test, node:assert:** Node.js built-in test runner and assertions (Node 18+)
|
|
138
|
+
- **fs, path, os:** Standard library for file system isolation
|
|
139
|
+
|
|
140
|
+
### File Dependencies
|
|
141
|
+
|
|
142
|
+
- Input: `src/feedback.js` (read-only from test perspective)
|
|
143
|
+
- Output: `test/feature_feedback-test.test.js` (new file)
|
|
144
|
+
|
|
145
|
+
### Existing Test Patterns
|
|
146
|
+
|
|
147
|
+
- Isolation pattern from `test/feature_feedback-loop.test.js` (setupTestDir / teardownTestDir)
|
|
148
|
+
- Module import pattern from `test/feature_theme-adoption.test.js` and `test/feature_config-factory.test.js`
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## 8. Non-Functional Considerations
|
|
153
|
+
|
|
154
|
+
### Performance
|
|
155
|
+
|
|
156
|
+
- All tests are synchronous file system operations on tmp dirs; expected runtime < 100ms total
|
|
157
|
+
|
|
158
|
+
### Maintainability
|
|
159
|
+
|
|
160
|
+
- Tests are structured to mirror `src/feedback.js` exported API, making it easy to add tests as the module evolves
|
|
161
|
+
- Describe block names match function groups: `validateFeedback`, `normalizeFeedbackKeys`, `parseFeedbackFromOutput`, `shouldPause`, `Config Management`
|
|
162
|
+
|
|
163
|
+
### Error Tolerance
|
|
164
|
+
|
|
165
|
+
- Tmp directory teardown uses `{ force: true }` to tolerate partial cleanup on test failure
|
|
166
|
+
|
|
167
|
+
### No Side Effects
|
|
168
|
+
|
|
169
|
+
- Tests do not modify any project-level `.claude/` files; all file I/O is confined to `tmp` directories
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## 9. Assumptions & Open Questions
|
|
174
|
+
|
|
175
|
+
### Assumptions
|
|
176
|
+
|
|
177
|
+
- ASSUMPTION: `src/feedback.js` exports are stable; no API changes are required to make it testable
|
|
178
|
+
- ASSUMPTION: `process.chdir` correctly redirects the module's relative path resolution for `CONFIG_FILE`
|
|
179
|
+
- ASSUMPTION: Node.js 18+ is available (required by SYSTEM_SPEC.md:Section 2)
|
|
180
|
+
- ASSUMPTION: `displayConfig` writes to stdout; smoke test asserts it does not throw
|
|
181
|
+
|
|
182
|
+
### Open Questions
|
|
183
|
+
|
|
184
|
+
- Should `displayConfig` be tested with a captured stdout mock, or is a non-throw assertion sufficient? (INFERRED: non-throw is sufficient for this feature)
|
|
185
|
+
- Should `setConfigValue` with unknown keys be tested? (INFERRED: yes, as the function throws a typed error that should be verified)
|
|
186
|
+
- Are there any async code paths in `src/feedback.js`? (INFERRED: no — all operations are synchronous based on current implementation)
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## 10. Impact on System Specification
|
|
191
|
+
|
|
192
|
+
### Reinforces Existing Assumptions
|
|
193
|
+
|
|
194
|
+
- Per SYSTEM_SPEC.md:Section 7, "tests are contracts" and "green suite required" — this feature closes a gap where contracts were implied but not enforced
|
|
195
|
+
- Per SYSTEM_SPEC.md:Section 8 (Traceability), tests that directly import production modules create a firmer traceability chain than tests using reimplemented helpers
|
|
196
|
+
|
|
197
|
+
### No Contradiction
|
|
198
|
+
|
|
199
|
+
This feature introduces no new behaviour, state, or API. It adds test coverage for existing production code. No system spec update is required.
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## 11. Handover to BA (Cass)
|
|
204
|
+
|
|
205
|
+
### Story Themes
|
|
206
|
+
|
|
207
|
+
1. **Direct Module Tests:** Tests that import and exercise `validateFeedback`, `normalizeFeedbackKeys`, `parseFeedbackFromOutput`, and `shouldPause` via the real module
|
|
208
|
+
2. **Config Management Tests:** Tests for `readConfig`, `writeConfig`, `setConfigValue`, `resetConfig`, and `getDefaultConfig` with file system isolation
|
|
209
|
+
3. **End-to-End Parse Pipeline:** A chained test covering `parseFeedbackFromOutput` → `normalizeFeedbackKeys` → `validateFeedback` as an integrated path
|
|
210
|
+
|
|
211
|
+
### Expected Story Boundaries
|
|
212
|
+
|
|
213
|
+
- Story 1: Validation and normalisation functions (no file system needed)
|
|
214
|
+
- Story 2: Config management functions (file system isolation required)
|
|
215
|
+
- Story 3: Parse pipeline (combines Stories 1 and 2 patterns)
|
|
216
|
+
|
|
217
|
+
### Areas Needing Careful Story Framing
|
|
218
|
+
|
|
219
|
+
- `process.chdir` usage must be clearly framed as test infrastructure, not production behaviour
|
|
220
|
+
- The distinction between this test file and `feature_feedback-loop.test.js` must be explicit: this tests the real module; that uses inline helpers
|
|
221
|
+
- `displayConfig` story should be framed as a smoke test, not a full output assertion
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
## 12. Change Log (Feature-Level)
|
|
226
|
+
|
|
227
|
+
| Date | Change | Reason | Raised By |
|
|
228
|
+
|------------|-------------------------------------|-----------------------------------------|-----------|
|
|
229
|
+
| 2026-05-19 | Initial feature specification | Close test coverage gap for src/feedback.js | Alex |
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Implementation Plan — feedback-test
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
This feature adds a test suite for `src/feedback.js`. All 34 tests were written and verified green by Nigel prior to this planning phase — `src/feedback.js` already exports every required function and no production code changes are needed. Implementation is test-only: the test file and its artifact already exist and pass.
|
|
6
|
+
|
|
7
|
+
## Files to Create/Modify
|
|
8
|
+
|
|
9
|
+
| Path | Action | Purpose |
|
|
10
|
+
|------|--------|---------|
|
|
11
|
+
| `test/feature_feedback-test.test.js` | Already created (Nigel) | 34 tests covering all exported feedback functions |
|
|
12
|
+
| `test/artifacts/feature_feedback-test/test-spec.md` | Already created (Nigel) | AC-to-test-ID mapping and assumptions |
|
|
13
|
+
| `src/feedback.js` | No change required | All required exports already present and correct |
|
|
14
|
+
|
|
15
|
+
## Implementation Steps
|
|
16
|
+
|
|
17
|
+
1. **Verify tests pass as-is** — Run `node --test test/feature_feedback-test.test.js` to confirm all 34 tests are green. Addresses all test IDs (T-VN-*, T-CM-*, T-PP-*).
|
|
18
|
+
|
|
19
|
+
2. **No production code changes needed** — `src/feedback.js` already exports `validateFeedback`, `normalizeFeedbackKeys`, `parseFeedbackFromOutput`, `shouldPause`, `getDefaultConfig`, `readConfig`, `writeConfig`, `setConfigValue`, `displayConfig`, and `resetConfig` with correct behaviour.
|
|
20
|
+
|
|
21
|
+
3. **Commit the new test artefacts** — Stage and commit `test/feature_feedback-test.test.js` and `test/artifacts/feature_feedback-test/test-spec.md` along with this plan.
|
|
22
|
+
|
|
23
|
+
## Risks / Questions
|
|
24
|
+
|
|
25
|
+
- None. Nigel confirmed all 34 tests pass against the unmodified production file before handoff.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
## Handoff Summary
|
|
2
|
+
**For:** Cass
|
|
3
|
+
**Feature:** feedback-test
|
|
4
|
+
|
|
5
|
+
### Key Decisions
|
|
6
|
+
- Scope is unit tests for `src/feedback.js` exclusively — the production module is imported directly, not reimplemented inline
|
|
7
|
+
- All nine exported functions are covered: `validateFeedback`, `normalizeFeedbackKeys`, `parseFeedbackFromOutput`, `shouldPause`, `getDefaultConfig`, `readConfig`, `writeConfig`, `setConfigValue`, `resetConfig`
|
|
8
|
+
- File system tests use `tmp` directory isolation with `process.chdir` (matching `feature_feedback-loop.test.js` pattern)
|
|
9
|
+
- `displayConfig` is smoke-tested only (non-throw assertion); full stdout capture is out of scope
|
|
10
|
+
- One chained integration test covers the full parse pipeline: `parseFeedbackFromOutput` → `normalizeFeedbackKeys` → `validateFeedback`
|
|
11
|
+
|
|
12
|
+
### Files Created
|
|
13
|
+
- .blueprint/features/feature_feedback-test/FEATURE_SPEC.md
|
|
14
|
+
|
|
15
|
+
### Open Questions
|
|
16
|
+
- Whether `displayConfig` warrants stdout capture mocking (deferred; non-throw is sufficient for now)
|
|
17
|
+
- Async paths in `src/feedback.js`: inferred none exist, but Cass should confirm before writing stories
|
|
18
|
+
|
|
19
|
+
### Critical Context
|
|
20
|
+
The key distinction from existing feedback tests: `feature_feedback-loop.test.js` and `feature_compressed-feedback.test.js` both re-implement feedback logic as inline helpers — they do not import `src/feedback.js`. This feature exists precisely to test the real production module. Stories must keep this boundary clear. The output file is `test/feature_feedback-test.test.js` and must use `require('../src/feedback')`.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
## Handoff Summary
|
|
2
|
+
**For:** Nigel
|
|
3
|
+
**Feature:** feedback-test
|
|
4
|
+
|
|
5
|
+
### Key Decisions
|
|
6
|
+
- Split into three stories matching Alex's expected boundaries: validation/normalisation (pure), config management (file I/O), and parse pipeline (chained integration)
|
|
7
|
+
- `normalizeFeedbackKeys` with both `rec` and `recommendation` present: production code does NOT delete `rec` when `recommendation` already exists — both keys are preserved; ACs reflect actual implementation
|
|
8
|
+
- File system isolation pattern documented explicitly per `feature_feedback-loop.test.js`; `process.chdir` restore is mandatory in teardown
|
|
9
|
+
- `displayConfig` framed as smoke test (no-throw only); no stdout capture required
|
|
10
|
+
- All functions confirmed synchronous — no async/await needed in tests
|
|
11
|
+
|
|
12
|
+
### Files Created
|
|
13
|
+
- .blueprint/features/feature_feedback-test/story-validation-normalisation.md
|
|
14
|
+
- .blueprint/features/feature_feedback-test/story-config-management.md
|
|
15
|
+
- .blueprint/features/feature_feedback-test/story-parse-pipeline.md
|
|
16
|
+
|
|
17
|
+
### Open Questions
|
|
18
|
+
- None
|
|
19
|
+
|
|
20
|
+
### Critical Context
|
|
21
|
+
Output file is `test/feature_feedback-test.test.js` using `require('../src/feedback')` — not inline reimplementations. `CONFIG_FILE` is resolved relative to `process.cwd()`; all config tests must `chdir` into a `tmp` dir before calling any read/write function. Story-parse-pipeline covers Rule 5 (chained integration test) from FEATURE_SPEC.md. Rating boundary values to test: 0, 1, 3, 5, 6, 3.5.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
## Handoff Summary
|
|
2
|
+
**For:** Codey
|
|
3
|
+
**Feature:** feedback-test
|
|
4
|
+
|
|
5
|
+
### Key Decisions
|
|
6
|
+
- Tests import `require('../src/feedback')` directly — no inline reimplementation anywhere
|
|
7
|
+
- Config tests use `before`/`after` (not `beforeEach`/`afterEach`) since all config tests share one tmp dir with sequential state
|
|
8
|
+
- `normalizeFeedbackKeys` dual-key test asserts both keys are preserved (production does NOT delete `rec` when `recommendation` already exists)
|
|
9
|
+
- `displayConfig` covered as smoke test only (no stdout capture)
|
|
10
|
+
- All 34 tests are synchronous; no async/await used
|
|
11
|
+
|
|
12
|
+
### Files Created
|
|
13
|
+
- test/artifacts/feature_feedback-test/test-spec.md
|
|
14
|
+
- test/feature_feedback-test.test.js
|
|
15
|
+
|
|
16
|
+
### Open Questions
|
|
17
|
+
- None
|
|
18
|
+
|
|
19
|
+
### Critical Context
|
|
20
|
+
All 34 tests pass green (`node --test test/feature_feedback-test.test.js`). No changes to `src/feedback.js` are required — the existing exports satisfy all ACs. The Config Management describe block uses a single shared tmp dir (`before`/`after`), so tests within it run sequentially and depend on one another for state (e.g. T-CM-4.1 leaves a modified config file that T-CM-5.1 then resets). Codey need not modify production code; this feature is test-only.
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Story: Config Management Functions
|
|
2
|
+
|
|
3
|
+
## User Story
|
|
4
|
+
|
|
5
|
+
As a developer maintaining `src/feedback.js`,
|
|
6
|
+
I want direct unit tests for `getDefaultConfig`, `readConfig`, `writeConfig`, `setConfigValue`, `resetConfig`, and `displayConfig`,
|
|
7
|
+
so that config persistence logic in the production module is verified against real file I/O in an isolated environment.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Acceptance Criteria
|
|
12
|
+
|
|
13
|
+
**Given** `getDefaultConfig()` is called,
|
|
14
|
+
**When** the function returns,
|
|
15
|
+
**Then** the result has `minRatingThreshold: 3.0`, `enabled: true`, and an `issueMappings` object containing all six standard mappings defined in `src/feedback.js`.
|
|
16
|
+
|
|
17
|
+
**Given** a `tmp` directory is set as `process.cwd()` and no config file exists there,
|
|
18
|
+
**When** `readConfig()` is called,
|
|
19
|
+
**Then** it returns a value equal to `getDefaultConfig()` and does not throw.
|
|
20
|
+
|
|
21
|
+
**Given** a `tmp` directory is set as `process.cwd()` and `.claude/feedback-config.json` contains valid JSON,
|
|
22
|
+
**When** `readConfig()` is called,
|
|
23
|
+
**Then** it returns the parsed config object matching the written content.
|
|
24
|
+
|
|
25
|
+
**Given** a `tmp` directory is set as `process.cwd()` and `.claude/feedback-config.json` contains malformed JSON (e.g. `{bad json`),
|
|
26
|
+
**When** `readConfig()` is called,
|
|
27
|
+
**Then** it returns `getDefaultConfig()` and does not throw.
|
|
28
|
+
|
|
29
|
+
**Given** a `tmp` directory is set as `process.cwd()`,
|
|
30
|
+
**When** `writeConfig(config)` is called with a valid config object,
|
|
31
|
+
**Then** `.claude/feedback-config.json` is created at the expected path and its content parses back to the original config object.
|
|
32
|
+
|
|
33
|
+
**Given** a `tmp` directory is set as `process.cwd()` and a config file exists,
|
|
34
|
+
**When** `setConfigValue('minRatingThreshold', '4.5')` is called,
|
|
35
|
+
**Then** `readConfig()` returns a config with `minRatingThreshold: 4.5`.
|
|
36
|
+
|
|
37
|
+
**Given** a `tmp` directory is set as `process.cwd()`,
|
|
38
|
+
**When** `setConfigValue('enabled', 'false')` is called,
|
|
39
|
+
**Then** `readConfig()` returns a config with `enabled: false`.
|
|
40
|
+
|
|
41
|
+
**Given** a `tmp` directory is set as `process.cwd()`,
|
|
42
|
+
**When** `setConfigValue` is called with an unknown key (e.g. `'nonExistentKey'`),
|
|
43
|
+
**Then** it throws an `Error` whose message contains `'Unknown config key'`.
|
|
44
|
+
|
|
45
|
+
**Given** a `tmp` directory is set as `process.cwd()` and a modified config file exists,
|
|
46
|
+
**When** `resetConfig()` is called,
|
|
47
|
+
**Then** `readConfig()` returns a value equal to `getDefaultConfig()`.
|
|
48
|
+
|
|
49
|
+
**Given** a `tmp` directory is set as `process.cwd()`,
|
|
50
|
+
**When** `displayConfig()` is called,
|
|
51
|
+
**Then** it does not throw (smoke test only — output format is not asserted).
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## File System Isolation Pattern
|
|
56
|
+
|
|
57
|
+
Each describe block that exercises config file I/O must follow this pattern:
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
before(): testDir = fs.mkdtempSync(os.tmpdir() + path.sep + 'feedback-test-')
|
|
61
|
+
originalCwd = process.cwd()
|
|
62
|
+
process.chdir(testDir)
|
|
63
|
+
|
|
64
|
+
after(): process.chdir(originalCwd)
|
|
65
|
+
fs.rmSync(testDir, { recursive: true, force: true })
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
This mirrors the isolation pattern in `test/feature_feedback-loop.test.js`.
|
|
69
|
+
|
|
70
|
+
The `CONFIG_FILE` constant in `src/feedback.js` is `.claude/feedback-config.json`, resolved relative to `process.cwd()`. Tests must `chdir` before calling any function that reads or writes config.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## `setConfigValue` Invalid Input Cases
|
|
75
|
+
|
|
76
|
+
| key | value | Expected behaviour |
|
|
77
|
+
|-----------------------|-----------|-------------------------------------------------|
|
|
78
|
+
| `minRatingThreshold` | `'0.5'` | throws — below minimum (1.0) |
|
|
79
|
+
| `minRatingThreshold` | `'5.5'` | throws — above maximum (5.0) |
|
|
80
|
+
| `minRatingThreshold` | `'abc'` | throws — not a number |
|
|
81
|
+
| `enabled` | `'yes'` | throws — not `'true'` or `'false'` |
|
|
82
|
+
| `nonExistentKey` | `'val'` | throws — unknown key |
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Out of Scope
|
|
87
|
+
|
|
88
|
+
- Testing `displayConfig` output format or colour rendering
|
|
89
|
+
- Testing stdout mock/capture for `displayConfig`
|
|
90
|
+
- Testing `validateFeedback`, `normalizeFeedbackKeys`, `parseFeedbackFromOutput`, `shouldPause` (covered in story-validation-normalisation.md)
|
|
91
|
+
- End-to-end pipeline chain (covered in story-parse-pipeline.md)
|
|
92
|
+
- Any modification of `src/feedback.js` production code
|
|
93
|
+
- Modifying project-level `.claude/` files
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## Implementation Notes
|
|
98
|
+
|
|
99
|
+
- Import: `const { getDefaultConfig, readConfig, writeConfig, setConfigValue, resetConfig, displayConfig } = require('../src/feedback')`
|
|
100
|
+
- Also import: `fs`, `os`, `path` for file system isolation
|
|
101
|
+
- Group under a single `describe('Config Management', ...)` or per-function sub-describes
|
|
102
|
+
- `displayConfig` reads config via `readConfig()`, so it also requires `chdir` setup
|
|
103
|
+
- See: `.blueprint/features/feature_feedback-test/FEATURE_SPEC.md` for full rules and constraints
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Story: End-to-End Parse Pipeline
|
|
2
|
+
|
|
3
|
+
## User Story
|
|
4
|
+
|
|
5
|
+
As a developer maintaining `src/feedback.js`,
|
|
6
|
+
I want an integrated test that chains `parseFeedbackFromOutput` → `normalizeFeedbackKeys` → `validateFeedback` using the real production module,
|
|
7
|
+
so that the complete feedback extraction and validation path is verified end-to-end within the module boundary.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Acceptance Criteria
|
|
12
|
+
|
|
13
|
+
**Given** an agent output string containing a valid `FEEDBACK: { "rating": 4, "issues": [], "rec": "proceed" }` block,
|
|
14
|
+
**When** the output is passed to `parseFeedbackFromOutput`, the result to `normalizeFeedbackKeys`, and that result to `validateFeedback`,
|
|
15
|
+
**Then** `parseFeedbackFromOutput` returns a non-null object, `normalizeFeedbackKeys` returns an object with `recommendation: 'proceed'` (not `rec`), and `validateFeedback` returns `{ valid: true, errors: [] }`.
|
|
16
|
+
|
|
17
|
+
**Given** an agent output string containing `FEEDBACK: { "rating": 2, "issues": ["unclear-scope"], "rec": "pause" }`,
|
|
18
|
+
**When** the same three-step chain is applied,
|
|
19
|
+
**Then** `validateFeedback` returns `{ valid: true, errors: [] }` (both `rec`-normalised recommendation and rating are valid), and the normalised object has `recommendation: 'pause'`.
|
|
20
|
+
|
|
21
|
+
**Given** an agent output string with `FEEDBACK: { "rating": 0, "issues": [], "recommendation": "proceed" }`,
|
|
22
|
+
**When** the three-step chain is applied,
|
|
23
|
+
**Then** `validateFeedback` returns `{ valid: false, errors: [...] }` with an error referencing the invalid rating.
|
|
24
|
+
|
|
25
|
+
**Given** an agent output string with no `FEEDBACK:` marker,
|
|
26
|
+
**When** `parseFeedbackFromOutput` is called,
|
|
27
|
+
**Then** it returns `null` and the pipeline terminates at that stage (normalisation and validation are not called with null).
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Pipeline Sequence (Explicit)
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
input: raw output string
|
|
35
|
+
└─► parseFeedbackFromOutput(output)
|
|
36
|
+
→ null → pipeline terminates (no further steps)
|
|
37
|
+
→ parsed object → continue
|
|
38
|
+
└─► normalizeFeedbackKeys(parsed)
|
|
39
|
+
→ normalised object
|
|
40
|
+
└─► validateFeedback(normalised)
|
|
41
|
+
→ { valid, errors }
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
All three functions are called on the real `src/feedback.js` module export. No step reimplements logic inline.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Out of Scope
|
|
49
|
+
|
|
50
|
+
- File system I/O (not required for this pipeline — all functions are in-memory)
|
|
51
|
+
- `shouldPause` integration (not part of the parse pipeline; covered in story-validation-normalisation.md)
|
|
52
|
+
- Config management functions (covered in story-config-management.md)
|
|
53
|
+
- Any modification of `src/feedback.js` production code
|
|
54
|
+
- Exhaustive permutations of each step (those are covered in story-validation-normalisation.md)
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Implementation Notes
|
|
59
|
+
|
|
60
|
+
- Import: `const { parseFeedbackFromOutput, normalizeFeedbackKeys, validateFeedback } = require('../src/feedback')`
|
|
61
|
+
- Group under `describe('Parse Pipeline', ...)` or similar
|
|
62
|
+
- No file system setup required — all three functions operate on in-memory values
|
|
63
|
+
- This story's tests serve as the single chained integration test called for in FEATURE_SPEC.md:Rule 5
|
|
64
|
+
- The `rec` → `recommendation` normalisation step is critical: the raw parsed object uses `rec`, and `validateFeedback` accepts both keys — but the test should verify normalisation works correctly in the chain
|
|
65
|
+
- See: `.blueprint/features/feature_feedback-test/FEATURE_SPEC.md` for Rule 5 context
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# Story: Validation and Normalisation Functions
|
|
2
|
+
|
|
3
|
+
## User Story
|
|
4
|
+
|
|
5
|
+
As a developer maintaining `src/feedback.js`,
|
|
6
|
+
I want direct unit tests for `validateFeedback`, `normalizeFeedbackKeys`, `parseFeedbackFromOutput`, and `shouldPause`,
|
|
7
|
+
so that regressions in the production module are caught immediately by the test suite without relying on inline reimplementations.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Acceptance Criteria
|
|
12
|
+
|
|
13
|
+
**Given** a feedback object with a valid integer rating (1–5), an array of strings as issues, and a valid recommendation (`proceed`, `pause`, or `revise`),
|
|
14
|
+
**When** `validateFeedback(feedback)` is called,
|
|
15
|
+
**Then** it returns `{ valid: true, errors: [] }`.
|
|
16
|
+
|
|
17
|
+
**Given** a feedback object with a rating of `0` (below range), `6` (above range), `3.5` (non-integer), or a value that is not a number,
|
|
18
|
+
**When** `validateFeedback(feedback)` is called,
|
|
19
|
+
**Then** it returns `{ valid: false, errors: [...] }` containing an appropriate error message for each invalid rating.
|
|
20
|
+
|
|
21
|
+
**Given** a feedback object where `issues` is not an array, or contains non-string elements,
|
|
22
|
+
**When** `validateFeedback(feedback)` is called,
|
|
23
|
+
**Then** it returns `{ valid: false, errors: [...] }` containing an error message describing the issues field violation.
|
|
24
|
+
|
|
25
|
+
**Given** a feedback object with `rec` key only (no `recommendation` key),
|
|
26
|
+
**When** `normalizeFeedbackKeys(feedback)` is called,
|
|
27
|
+
**Then** it returns an object with `recommendation` set to the `rec` value and no `rec` key present.
|
|
28
|
+
|
|
29
|
+
**Given** a feedback object with both `rec` and `recommendation` keys,
|
|
30
|
+
**When** `normalizeFeedbackKeys(feedback)` is called,
|
|
31
|
+
**Then** `recommendation` retains its original value (wins over `rec`) and `rec` is not deleted (both remain as-is per the production implementation).
|
|
32
|
+
|
|
33
|
+
**Given** an agent output string containing a `FEEDBACK: { ... }` JSON block with valid content,
|
|
34
|
+
**When** `parseFeedbackFromOutput(output)` is called,
|
|
35
|
+
**Then** it returns the parsed feedback object.
|
|
36
|
+
|
|
37
|
+
**Given** a feedback object with `recommendation: 'pause'` and a rating above `minRatingThreshold`,
|
|
38
|
+
**When** `shouldPause(feedback, config)` is called,
|
|
39
|
+
**Then** it returns `true`.
|
|
40
|
+
|
|
41
|
+
**Given** a feedback object with `recommendation: 'proceed'` and a rating below `minRatingThreshold`,
|
|
42
|
+
**When** `shouldPause(feedback, config)` is called,
|
|
43
|
+
**Then** it returns `true` (rating-based gate triggers independently of recommendation).
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Test Boundary Details
|
|
48
|
+
|
|
49
|
+
### `validateFeedback` — rating boundary values to test
|
|
50
|
+
| Value | Expected valid |
|
|
51
|
+
|-------|---------------|
|
|
52
|
+
| 0 | false |
|
|
53
|
+
| 1 | true |
|
|
54
|
+
| 3 | true |
|
|
55
|
+
| 5 | true |
|
|
56
|
+
| 6 | false |
|
|
57
|
+
| 3.5 | false |
|
|
58
|
+
|
|
59
|
+
### `normalizeFeedbackKeys` — key scenarios to test
|
|
60
|
+
| Scenario | Expected result |
|
|
61
|
+
|---------------------------------------|------------------------------------------|
|
|
62
|
+
| `rec` only | Renamed to `recommendation`; `rec` removed |
|
|
63
|
+
| `recommendation` only | Unchanged |
|
|
64
|
+
| Both `rec` and `recommendation` | Both keys preserved; `recommendation` value unchanged |
|
|
65
|
+
| Neither `rec` nor `recommendation` | Object returned unchanged |
|
|
66
|
+
|
|
67
|
+
### `parseFeedbackFromOutput` — scenarios to test
|
|
68
|
+
| Input | Expected result |
|
|
69
|
+
|---------------------------------------|-------------------|
|
|
70
|
+
| Valid `FEEDBACK: { ... }` block | Parsed object |
|
|
71
|
+
| No `FEEDBACK:` marker | `null` |
|
|
72
|
+
| Malformed JSON after `FEEDBACK:` | `null` |
|
|
73
|
+
|
|
74
|
+
### `shouldPause` — scenarios to test
|
|
75
|
+
| rating | minRatingThreshold | recommendation | Expected |
|
|
76
|
+
|--------|--------------------|----------------|----------|
|
|
77
|
+
| 4 | 3.0 | 'proceed' | false |
|
|
78
|
+
| 2 | 3.0 | 'proceed' | true |
|
|
79
|
+
| 4 | 3.0 | 'pause' | true |
|
|
80
|
+
| 2 | 3.0 | 'pause' | true |
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Out of Scope
|
|
85
|
+
|
|
86
|
+
- Config file system interaction (covered in story-config-management.md)
|
|
87
|
+
- End-to-end parse pipeline chain (covered in story-parse-pipeline.md)
|
|
88
|
+
- `displayConfig` output assertion (smoke-tested in story-config-management.md)
|
|
89
|
+
- Any modification of `src/feedback.js` production code
|
|
90
|
+
- Testing agent prompt text or insights correlation logic
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Implementation Notes
|
|
95
|
+
|
|
96
|
+
- Import: `const { validateFeedback, normalizeFeedbackKeys, parseFeedbackFromOutput, shouldPause } = require('../src/feedback')`
|
|
97
|
+
- No file system setup required for this story — all functions are pure or in-memory
|
|
98
|
+
- Describe block names should match function names: `validateFeedback`, `normalizeFeedbackKeys`, `parseFeedbackFromOutput`, `shouldPause`
|
|
99
|
+
- See: `.blueprint/features/feature_feedback-test/FEATURE_SPEC.md` for full rules
|
package/README.md
CHANGED
|
@@ -327,6 +327,24 @@ analyzes: recommends: calibrates:
|
|
|
327
327
|
• Trends • And feedback issues
|
|
328
328
|
```
|
|
329
329
|
|
|
330
|
+
### Accessing Module Data
|
|
331
|
+
|
|
332
|
+
Data is collected from both invocation methods and accessible via CLI commands:
|
|
333
|
+
|
|
334
|
+
| Data | `/implement-feature` (skill) | `npx murmur8 murm` (CLI) | How to access |
|
|
335
|
+
|------|------------------------------|--------------------------|---------------|
|
|
336
|
+
| **Per-stage timing** (alex, cass, nigel, codey) | Recorded by orchestrating agent | Merged from worktree on successful merge | `npx murmur8 history` |
|
|
337
|
+
| **Feedback ratings** (agent-to-agent) | Recorded by feedback micro-Tasks | Merged from worktree on successful merge | `npx murmur8 history`, `npx murmur8 insights --feedback` |
|
|
338
|
+
| **Token cost per stage** | Recorded by orchestrating agent | Merged from worktree on successful merge | `npx murmur8 history --cost` |
|
|
339
|
+
| **Batch summary** (total duration, feature outcomes) | N/A (single feature) | Recorded at batch completion | `npx murmur8 history` |
|
|
340
|
+
| **Success/failure status** | Recorded per run | Recorded per feature + batch | `npx murmur8 history --stats` |
|
|
341
|
+
| **Retry attempts & strategies** | Recorded on failure | Merged from worktree on successful merge | `npx murmur8 insights --failures` |
|
|
342
|
+
| **Bottleneck analysis** | Derived from history | Derived from history | `npx murmur8 insights --bottlenecks` |
|
|
343
|
+
| **Smart retry recommendations** | Used live during pipeline | Used live during pipeline | Automatic on failure |
|
|
344
|
+
| **Diff preview** | Shown before commit | Shown per worktree before merge | Interactive during pipeline |
|
|
345
|
+
|
|
346
|
+
**How worktree history merging works:** When `npx murmur8 murm` runs, each feature pipeline executes `/implement-feature` inside an isolated git worktree. The skill records per-stage data to `.claude/pipeline-history.json` within that worktree. After a successful merge, murmur8 reads this file and appends its entries to the main project's history before cleaning up the worktree. Failed/conflicted worktrees preserve their history for debugging.
|
|
347
|
+
|
|
330
348
|
## Directory Structure
|
|
331
349
|
|
|
332
350
|
```
|
package/SKILL.md
CHANGED
|
@@ -138,7 +138,7 @@ If no history exists, skip this step silently.
|
|
|
138
138
|
### Step 5: Initialize
|
|
139
139
|
Create/read `{QUEUE}`. Ensure dirs exist: `mkdir -p {FEAT_DIR} {TEST_DIR}`
|
|
140
140
|
|
|
141
|
-
Unless `--no-history`,
|
|
141
|
+
Unless `--no-history`, note the pipeline start time (ISO 8601 UTC) in your working context as `PIPELINE_START`.
|
|
142
142
|
|
|
143
143
|
---
|
|
144
144
|
|
|
@@ -146,7 +146,7 @@ Unless `--no-history`, start a history entry (slug, startedAt, stages, feedback)
|
|
|
146
146
|
|
|
147
147
|
**Announce:** `} Alex — creating feature spec`
|
|
148
148
|
|
|
149
|
-
**History:**
|
|
149
|
+
**History:** Note `ALEX_START` (ISO 8601 UTC) before spawning.
|
|
150
150
|
|
|
151
151
|
**Runtime prompt:** `.blueprint/prompts/alex-runtime.md`
|
|
152
152
|
|
|
@@ -204,7 +204,7 @@ Brief summary (5 bullets max): intent, key behaviours, scope, story themes, tens
|
|
|
204
204
|
|
|
205
205
|
**On completion:**
|
|
206
206
|
1. Verify `{FEAT_SPEC}` and `{FEAT_DIR}/handoff-alex.md` exist
|
|
207
|
-
2.
|
|
207
|
+
2. Note `ALEX_END` and compute `ALEX_DURATION_MS`
|
|
208
208
|
3. Update queue: move feature to `cassQueue`
|
|
209
209
|
4. If `--pause-after=alex`: Show output path, ask user to continue
|
|
210
210
|
|
|
@@ -247,7 +247,7 @@ FEEDBACK: {"rating":N,"issues":["..."],"rec":"proceed|pause|revise"}
|
|
|
247
247
|
|
|
248
248
|
**Announce:** ` } Cass — writing user stories`
|
|
249
249
|
|
|
250
|
-
**History:**
|
|
250
|
+
**History:** Note `CASS_START` (ISO 8601 UTC) before spawning.
|
|
251
251
|
|
|
252
252
|
**Runtime prompt:** `.blueprint/prompts/cass-runtime.md`
|
|
253
253
|
|
|
@@ -311,7 +311,7 @@ Brief summary: story count, filenames, behaviours covered (5 bullets max)
|
|
|
311
311
|
**On completion:**
|
|
312
312
|
1. Verify at least one `story-*.md` exists in `{FEAT_DIR}`
|
|
313
313
|
2. Verify `{FEAT_DIR}/handoff-cass.md` exists
|
|
314
|
-
2.
|
|
314
|
+
2. Note `CASS_END` and compute `CASS_DURATION_MS`
|
|
315
315
|
3. Update queue: move feature to `nigelQueue`
|
|
316
316
|
4. If `--pause-after=cass`: Show story paths, ask user to continue
|
|
317
317
|
|
|
@@ -349,7 +349,7 @@ FEEDBACK: {"rating":N,"issues":["..."],"rec":"proceed|pause|revise"}
|
|
|
349
349
|
|
|
350
350
|
**Announce:** ` } Nigel — building test spec`
|
|
351
351
|
|
|
352
|
-
**History:**
|
|
352
|
+
**History:** Note `NIGEL_SPEC_START` (ISO 8601 UTC) before spawning.
|
|
353
353
|
|
|
354
354
|
**Runtime prompt:** `.blueprint/prompts/nigel-runtime.md`
|
|
355
355
|
|
|
@@ -412,7 +412,7 @@ Brief summary: test case count planned, AC coverage %, assumptions (5 bullets ma
|
|
|
412
412
|
|
|
413
413
|
**On completion:**
|
|
414
414
|
1. Verify `{TEST_SPEC}` and `{FEAT_DIR}/handoff-nigel.md` exist
|
|
415
|
-
2.
|
|
415
|
+
2. Note `NIGEL_SPEC_END` and compute `NIGEL_SPEC_DURATION_MS`
|
|
416
416
|
|
|
417
417
|
**On failure:** See [Error Handling with Retry](#error-handling-with-smart-retry)
|
|
418
418
|
|
|
@@ -422,7 +422,7 @@ Brief summary: test case count planned, AC coverage %, assumptions (5 bullets ma
|
|
|
422
422
|
|
|
423
423
|
**Announce:** ` } Nigel — writing executable tests`
|
|
424
424
|
|
|
425
|
-
**History:**
|
|
425
|
+
**History:** Note `NIGEL_TESTS_START` (ISO 8601 UTC) before spawning.
|
|
426
426
|
|
|
427
427
|
Use the Task tool with `subagent_type="general-purpose"`:
|
|
428
428
|
|
|
@@ -460,7 +460,7 @@ Brief summary: test count, file(s) written, any tests deferred
|
|
|
460
460
|
|
|
461
461
|
**On completion:**
|
|
462
462
|
1. Verify `{TEST_FILE}` exists
|
|
463
|
-
2.
|
|
463
|
+
2. Note `NIGEL_TESTS_END` and compute `NIGEL_TESTS_DURATION_MS`
|
|
464
464
|
3. Update queue: move feature to `codeyQueue`
|
|
465
465
|
4. If `--pause-after=nigel`: Show test paths, ask user to continue
|
|
466
466
|
|
|
@@ -499,7 +499,7 @@ FEEDBACK: {"rating":N,"issues":["..."],"rec":"proceed|pause|revise"}
|
|
|
499
499
|
|
|
500
500
|
**Announce:** ` } Codey — drafting implementation plan`
|
|
501
501
|
|
|
502
|
-
**History:**
|
|
502
|
+
**History:** Note `CODEY_PLAN_START` (ISO 8601 UTC) before spawning.
|
|
503
503
|
|
|
504
504
|
**Runtime prompt:** `.blueprint/prompts/codey-plan-runtime.md`
|
|
505
505
|
|
|
@@ -556,7 +556,7 @@ Brief summary: files planned, step count, identified risks
|
|
|
556
556
|
|
|
557
557
|
**On completion:**
|
|
558
558
|
1. Verify `{PLAN}` exists
|
|
559
|
-
2.
|
|
559
|
+
2. Note `CODEY_PLAN_END` and compute `CODEY_PLAN_DURATION_MS`
|
|
560
560
|
3. If `--pause-after=codey-plan`: Show plan path, ask user to continue
|
|
561
561
|
|
|
562
562
|
**On failure:** See [Error Handling with Retry](#error-handling-with-smart-retry)
|
|
@@ -567,7 +567,7 @@ Brief summary: files planned, step count, identified risks
|
|
|
567
567
|
|
|
568
568
|
**Announce:** ` } Codey — implementing feature`
|
|
569
569
|
|
|
570
|
-
**History:**
|
|
570
|
+
**History:** Note `CODEY_IMPL_START` (ISO 8601 UTC) before spawning.
|
|
571
571
|
|
|
572
572
|
**Runtime prompt:** `.blueprint/prompts/codey-implement-runtime.md`
|
|
573
573
|
|
|
@@ -637,13 +637,13 @@ for each step in IMPLEMENTATION_PLAN.steps:
|
|
|
637
637
|
|
|
638
638
|
**On all steps complete:**
|
|
639
639
|
1. Run full test suite: `node --test {TEST_FILE}`
|
|
640
|
-
2.
|
|
640
|
+
2. Note `CODEY_IMPL_END`, compute `CODEY_IMPL_DURATION_MS`, and note `STEPS_COMPLETED`
|
|
641
641
|
3. Update queue: move feature to `completed`
|
|
642
642
|
4. Proceed to auto-commit (unless `--no-commit`)
|
|
643
643
|
|
|
644
644
|
**On partial failure:**
|
|
645
645
|
1. Record which steps completed and which failed
|
|
646
|
-
2.
|
|
646
|
+
2. Note partial completion: `STEPS_COMPLETED=M`, `TOTAL_STEPS=N`, `FAILED_AT_STEP=step`
|
|
647
647
|
3. Report to user with option to continue manually
|
|
648
648
|
|
|
649
649
|
**On failure:** See [Error Handling with Retry](#error-handling-with-smart-retry)
|
|
@@ -694,17 +694,28 @@ After commit, remove the slug's row from `{BACKLOG}` (if it exists). Stage with
|
|
|
694
694
|
|
|
695
695
|
**Modules:** `src/history.js`, `src/cost.js`
|
|
696
696
|
|
|
697
|
-
Unless `--no-history` flag is set,
|
|
697
|
+
Unless `--no-history` flag is set, build the history entry JSON from the timestamps noted during the run and write it via the CLI:
|
|
698
698
|
|
|
699
|
-
```
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
699
|
+
```bash
|
|
700
|
+
node bin/cli.js history record '{
|
|
701
|
+
"slug": "{slug}",
|
|
702
|
+
"status": "success",
|
|
703
|
+
"startedAt": "<PIPELINE_START>",
|
|
704
|
+
"completedAt": "<now ISO 8601>",
|
|
705
|
+
"totalDurationMs": <elapsed ms>,
|
|
706
|
+
"commitHash": "<hash or null>",
|
|
707
|
+
"stages": {
|
|
708
|
+
"alex": { "startedAt": "<ALEX_START>", "completedAt": "<ALEX_END>", "durationMs": <ALEX_DURATION_MS>, "status": "success" },
|
|
709
|
+
"cass": { "startedAt": "<CASS_START>", "completedAt": "<CASS_END>", "durationMs": <CASS_DURATION_MS>, "status": "success" },
|
|
710
|
+
"nigel-spec": { "startedAt": "<NIGEL_SPEC_START>", "completedAt": "<NIGEL_SPEC_END>", "durationMs": <NIGEL_SPEC_DURATION_MS>, "status": "success" },
|
|
711
|
+
"nigel-tests": { "startedAt": "<NIGEL_TESTS_START>", "completedAt": "<NIGEL_TESTS_END>", "durationMs": <NIGEL_TESTS_DURATION_MS>, "status": "success" },
|
|
712
|
+
"codey-plan": { "startedAt": "<CODEY_PLAN_START>", "completedAt": "<CODEY_PLAN_END>", "durationMs": <CODEY_PLAN_DURATION_MS>, "status": "success" },
|
|
713
|
+
"codey-implement": { "startedAt": "<CODEY_IMPL_START>", "completedAt": "<CODEY_IMPL_END>", "durationMs": <CODEY_IMPL_DURATION_MS>, "status": "success", "stepsCompleted": <N> }
|
|
714
|
+
}
|
|
715
|
+
}'
|
|
716
|
+
```
|
|
717
|
+
|
|
718
|
+
Omit stages that were skipped (e.g. cass when `--skip-stories` was used). Set `status` to `"failed"` and add `"failedStage": "<stage>"` on failure, or `"paused"` and `"pausedAfter": "<stage>"` on pause.
|
|
708
719
|
|
|
709
720
|
**Display summary:** Stage status (✓/✗), test count, duration, commit hash, feedback ratings, cost breakdown per stage.
|
|
710
721
|
|
package/package.json
CHANGED
package/src/commands/history.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* history command - View pipeline execution history
|
|
3
3
|
*/
|
|
4
|
-
const { displayHistory, showStats, clearHistory, exportHistory } = require('../history');
|
|
4
|
+
const { displayHistory, showStats, clearHistory, exportHistory, recordHistory, updateStage } = require('../history');
|
|
5
5
|
const { parseFlags } = require('./utils');
|
|
6
6
|
|
|
7
7
|
const description = 'View pipeline execution history';
|
|
@@ -10,7 +10,46 @@ async function run(args) {
|
|
|
10
10
|
const flags = parseFlags(args);
|
|
11
11
|
const subArg = args[1];
|
|
12
12
|
|
|
13
|
-
if (subArg === '
|
|
13
|
+
if (subArg === 'record') {
|
|
14
|
+
const jsonArg = args[2];
|
|
15
|
+
if (!jsonArg) {
|
|
16
|
+
console.error('Usage: history record \'{"slug":"...","status":"...","startedAt":"...","completedAt":"...","totalDurationMs":N}\'');
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
let entry;
|
|
20
|
+
try {
|
|
21
|
+
entry = JSON.parse(jsonArg);
|
|
22
|
+
} catch (err) {
|
|
23
|
+
console.error(`Invalid JSON: ${err.message}`);
|
|
24
|
+
process.exit(1);
|
|
25
|
+
}
|
|
26
|
+
if (!entry.slug || !entry.status || !entry.startedAt || !entry.completedAt || entry.totalDurationMs === undefined) {
|
|
27
|
+
console.error('Entry must include: slug, status, startedAt, completedAt, totalDurationMs');
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
30
|
+
const ok = recordHistory(entry);
|
|
31
|
+
if (!ok) process.exit(1);
|
|
32
|
+
console.log(`Recorded history entry for "${entry.slug}" (${entry.status})`);
|
|
33
|
+
} else if (subArg === 'update-stage') {
|
|
34
|
+
// history update-stage <slug> <stage> '<json>'
|
|
35
|
+
const slug = args[2];
|
|
36
|
+
const stage = args[3];
|
|
37
|
+
const jsonArg = args[4];
|
|
38
|
+
if (!slug || !stage || !jsonArg) {
|
|
39
|
+
console.error('Usage: history update-stage <slug> <stage> \'{"durationMs":N,"status":"success"}\'');
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
42
|
+
let stageData;
|
|
43
|
+
try {
|
|
44
|
+
stageData = JSON.parse(jsonArg);
|
|
45
|
+
} catch (err) {
|
|
46
|
+
console.error(`Invalid JSON: ${err.message}`);
|
|
47
|
+
process.exit(1);
|
|
48
|
+
}
|
|
49
|
+
const ok = updateStage(slug, stage, stageData);
|
|
50
|
+
if (!ok) process.exit(1);
|
|
51
|
+
console.log(`Updated stage "${stage}" for "${slug}"`);
|
|
52
|
+
} else if (subArg === 'clear') {
|
|
14
53
|
await clearHistory({ force: flags.force });
|
|
15
54
|
} else if (subArg === 'export') {
|
|
16
55
|
const exportOpts = {};
|
package/src/history.js
CHANGED
|
@@ -90,6 +90,36 @@ function storeStageFeedback(slug, stage, feedback) {
|
|
|
90
90
|
}
|
|
91
91
|
}
|
|
92
92
|
|
|
93
|
+
/**
|
|
94
|
+
* Updates (merges) stage data into the most recent history entry for a slug.
|
|
95
|
+
* Used by the CLI skill to record per-stage timing after each pipeline step.
|
|
96
|
+
* @param {string} slug - Feature slug
|
|
97
|
+
* @param {string} stage - Stage name (alex, cass, nigel, codey-plan, codey-implement)
|
|
98
|
+
* @param {object} data - Stage fields to merge (startedAt, completedAt, durationMs, status, etc.)
|
|
99
|
+
* @returns {boolean} True if updated successfully
|
|
100
|
+
*/
|
|
101
|
+
function updateStage(slug, stage, data) {
|
|
102
|
+
try {
|
|
103
|
+
const history = readHistoryFile();
|
|
104
|
+
if (history.error) {
|
|
105
|
+
console.warn('Warning: History file is corrupted, cannot update stage.');
|
|
106
|
+
return false;
|
|
107
|
+
}
|
|
108
|
+
const entry = history.findLast(e => e.slug === slug);
|
|
109
|
+
if (!entry) {
|
|
110
|
+
console.warn(`Warning: No history entry found for slug: ${slug}`);
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
if (!entry.stages) entry.stages = {};
|
|
114
|
+
entry.stages[stage] = { ...entry.stages[stage], ...data };
|
|
115
|
+
writeHistoryFile(history);
|
|
116
|
+
return true;
|
|
117
|
+
} catch (err) {
|
|
118
|
+
console.warn(`Warning: Failed to update stage: ${err.message}`);
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
93
123
|
function formatDuration(ms) {
|
|
94
124
|
const seconds = Math.floor(ms / 1000);
|
|
95
125
|
const minutes = Math.floor(seconds / 60);
|
|
@@ -427,6 +457,7 @@ module.exports = {
|
|
|
427
457
|
writeHistoryFile,
|
|
428
458
|
recordHistory,
|
|
429
459
|
storeStageFeedback,
|
|
460
|
+
updateStage,
|
|
430
461
|
displayHistory,
|
|
431
462
|
showStats,
|
|
432
463
|
clearHistory,
|
package/src/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const { init } = require('./init');
|
|
2
2
|
const { update } = require('./update');
|
|
3
3
|
const { validate, formatOutput, checkNodeVersion } = require('./validate');
|
|
4
|
-
const { recordHistory, displayHistory, showStats, clearHistory, storeStageFeedback } = require('./history');
|
|
4
|
+
const { recordHistory, displayHistory, showStats, clearHistory, storeStageFeedback, updateStage } = require('./history');
|
|
5
5
|
const {
|
|
6
6
|
readConfig,
|
|
7
7
|
writeConfig,
|
|
@@ -108,6 +108,7 @@ module.exports = {
|
|
|
108
108
|
showStats,
|
|
109
109
|
clearHistory,
|
|
110
110
|
storeStageFeedback,
|
|
111
|
+
updateStage,
|
|
111
112
|
// Retry module exports
|
|
112
113
|
readConfig,
|
|
113
114
|
writeConfig,
|
package/src/murm.js
CHANGED
|
@@ -5,6 +5,7 @@ const { execSync, spawn } = require('child_process');
|
|
|
5
5
|
const fs = require('fs');
|
|
6
6
|
const readline = require('readline');
|
|
7
7
|
const theme = require('./theme');
|
|
8
|
+
const { recordHistory, readHistoryFile, writeHistoryFile } = require('./history');
|
|
8
9
|
|
|
9
10
|
const CONFIG_FILE = '.claude/murm-config.json';
|
|
10
11
|
const LOCK_FILE = '.claude/murm.lock';
|
|
@@ -19,6 +20,27 @@ const LEGACY_QUEUE_FILE = '.claude/parallel-queue.json';
|
|
|
19
20
|
let runningProcesses = new Map();
|
|
20
21
|
let isAborting = false;
|
|
21
22
|
|
|
23
|
+
const HISTORY_FILE = '.claude/pipeline-history.json';
|
|
24
|
+
|
|
25
|
+
function mergeWorktreeHistory(worktreePath) {
|
|
26
|
+
const worktreeHistoryPath = path.join(worktreePath, HISTORY_FILE);
|
|
27
|
+
if (!fs.existsSync(worktreeHistoryPath)) return [];
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
const worktreeEntries = JSON.parse(fs.readFileSync(worktreeHistoryPath, 'utf8'));
|
|
31
|
+
if (!Array.isArray(worktreeEntries) || worktreeEntries.length === 0) return [];
|
|
32
|
+
|
|
33
|
+
const mainHistory = readHistoryFile();
|
|
34
|
+
if (mainHistory.error) return worktreeEntries;
|
|
35
|
+
|
|
36
|
+
mainHistory.push(...worktreeEntries);
|
|
37
|
+
writeHistoryFile(mainHistory);
|
|
38
|
+
return worktreeEntries;
|
|
39
|
+
} catch {
|
|
40
|
+
return [];
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
22
44
|
/**
|
|
23
45
|
* Migrate a legacy file path to the new path.
|
|
24
46
|
* If the old file exists and the new one doesn't, rename it.
|
|
@@ -1284,6 +1306,11 @@ async function runMurm(slugs, options = {}) {
|
|
|
1284
1306
|
if (mergeResult.success) {
|
|
1285
1307
|
feature.status = 'murm_complete';
|
|
1286
1308
|
console.log(`[${timestamp}] ${result.slug}: ${theme.MESSAGES.mergedAndLanded} \u2713`);
|
|
1309
|
+
// Merge per-stage history from worktree before cleanup
|
|
1310
|
+
const merged = mergeWorktreeHistory(feature.worktreePath);
|
|
1311
|
+
if (merged.length > 0) {
|
|
1312
|
+
feature.historyMerged = true;
|
|
1313
|
+
}
|
|
1287
1314
|
removeWorktree(result.slug);
|
|
1288
1315
|
} else if (mergeResult.conflict) {
|
|
1289
1316
|
feature.status = 'merge_conflict';
|
|
@@ -1342,6 +1369,29 @@ async function runMurm(slugs, options = {}) {
|
|
|
1342
1369
|
});
|
|
1343
1370
|
}
|
|
1344
1371
|
|
|
1372
|
+
// Record batch-level history
|
|
1373
|
+
recordHistory({
|
|
1374
|
+
slug: slugs.join('+'),
|
|
1375
|
+
mode: 'murmuration',
|
|
1376
|
+
status: summary.failed === 0 && summary.conflicts === 0 ? 'success' : 'partial',
|
|
1377
|
+
startedAt: queue.startedAt,
|
|
1378
|
+
completedAt: new Date().toISOString(),
|
|
1379
|
+
totalDurationMs: Date.now() - new Date(queue.startedAt).getTime(),
|
|
1380
|
+
baseBranch,
|
|
1381
|
+
features: queue.features.map(f => ({
|
|
1382
|
+
slug: f.slug,
|
|
1383
|
+
status: f.status,
|
|
1384
|
+
startedAt: f.startedAt,
|
|
1385
|
+
completedAt: f.completedAt
|
|
1386
|
+
})),
|
|
1387
|
+
summary: {
|
|
1388
|
+
total: slugs.length,
|
|
1389
|
+
completed: summary.completed,
|
|
1390
|
+
failed: summary.failed,
|
|
1391
|
+
conflicts: summary.conflicts
|
|
1392
|
+
}
|
|
1393
|
+
});
|
|
1394
|
+
|
|
1345
1395
|
return { success: summary.failed === 0 && summary.conflicts === 0, summary };
|
|
1346
1396
|
} finally {
|
|
1347
1397
|
// Always release lock when done
|