@nathapp/nax 0.26.0 → 0.27.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitlab-ci.yml +1 -0
- package/CLAUDE.md +38 -8
- package/docs/ROADMAP.md +36 -19
- package/nax/features/review-quality/prd.json +55 -0
- package/package.json +1 -1
- package/src/optimizer/index.ts +2 -1
- package/src/pipeline/runner.ts +2 -1
- package/src/pipeline/stages/autofix.ts +5 -0
- package/src/pipeline/stages/rectify.ts +5 -0
- package/src/pipeline/stages/regression.ts +6 -1
- package/src/pipeline/stages/verify.ts +2 -1
- package/src/pipeline/types.ts +9 -0
- package/src/review/runner.ts +50 -1
- package/src/tdd/orchestrator.ts +11 -1
- package/src/tdd/rectification-gate.ts +18 -13
- package/src/tdd/types.ts +2 -0
- package/src/version.ts +20 -4
- package/test/integration/review/review-plugin-integration.test.ts +12 -7
- package/test/unit/review/runner.test.ts +117 -0
package/.gitlab-ci.yml
CHANGED
package/CLAUDE.md
CHANGED
|
@@ -92,16 +92,46 @@ Runner.run() [src/execution/runner.ts — thin orchestrator only]
|
|
|
92
92
|
2. **Plan complex tasks**: for multi-file changes, write a short plan before implementing.
|
|
93
93
|
3. **Implement in small chunks**: one logical concern per commit.
|
|
94
94
|
|
|
95
|
-
## Code Intelligence (Solograph MCP)
|
|
95
|
+
## Code Intelligence (Solograph MCP) — MANDATORY
|
|
96
96
|
|
|
97
|
-
|
|
97
|
+
**Always use solograph MCP tools before writing code or analyzing architecture.** Do NOT use `web_search` or `kb_search` as substitutes.
|
|
98
98
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
|
102
|
-
|
|
103
|
-
| `codegraph_query` |
|
|
104
|
-
| `
|
|
99
|
+
### Tool Selection Guide
|
|
100
|
+
|
|
101
|
+
| Tool | Capability | When to Use | Availability |
|
|
102
|
+
|:-----|:-----------|:-----------|:-------------|
|
|
103
|
+
| `codegraph_query` | Structural queries (Cypher) — find calls, dependencies, imports | **Preferred for dependency analysis, call tracing, symbol lookup** | ✅ Always works (in-memory graph) |
|
|
104
|
+
| `project_code_search` | Semantic search (Redis vector DB) — pattern matching by meaning | Natural language queries like "find auth patterns" | ⚠️ Requires explicit `project_code_reindex` + Redis daemon |
|
|
105
|
+
| `codegraph_explain` | Architecture overview for unfamiliar subsystems | Understand module relationships before major changes | ✅ Always works |
|
|
106
|
+
| `project_code_reindex` | Index project for semantic search | After creating/deleting source files | ✅ Always works |
|
|
107
|
+
|
|
108
|
+
### Recommended Workflow
|
|
109
|
+
|
|
110
|
+
For nax, **prefer `codegraph_query`** for routine tasks:
|
|
111
|
+
- Finding where functions are called (`calculateAggregateMetrics` called by `status-cost.ts`)
|
|
112
|
+
- Analyzing dependencies before refactoring
|
|
113
|
+
- Tracing import/export chains
|
|
114
|
+
- Querying symbol definitions and relationships
|
|
115
|
+
|
|
116
|
+
**Use `project_code_search` only if:**
|
|
117
|
+
- You need semantic similarity ("find authentication patterns")
|
|
118
|
+
- Redis is indexed and running (not guaranteed in all sessions)
|
|
119
|
+
|
|
120
|
+
### Example Queries
|
|
121
|
+
|
|
122
|
+
```cypher
|
|
123
|
+
-- Find files calling calculateAggregateMetrics
|
|
124
|
+
MATCH (f:File)-[:CALLS]->(s:Symbol {name: "calculateAggregateMetrics"})
|
|
125
|
+
RETURN f.path
|
|
126
|
+
|
|
127
|
+
-- Find all imports of aggregator.ts
|
|
128
|
+
MATCH (f:File)-[:IMPORTS]->(target:File {path: "src/metrics/aggregator.ts"})
|
|
129
|
+
RETURN f.path
|
|
130
|
+
|
|
131
|
+
-- Find symbols defined in a file
|
|
132
|
+
MATCH (f:File {path: "src/metrics/aggregator.ts"})-[:DEFINES]->(s:Symbol)
|
|
133
|
+
RETURN s.name, s.type
|
|
134
|
+
```
|
|
105
135
|
|
|
106
136
|
## Coding Standards & Forbidden Patterns
|
|
107
137
|
|
package/docs/ROADMAP.md
CHANGED
|
@@ -135,6 +135,19 @@
|
|
|
135
135
|
|
|
136
136
|
---
|
|
137
137
|
|
|
138
|
+
## v0.27.0 — Review Quality ✅ Shipped (2026-03-08)
|
|
139
|
+
|
|
140
|
+
**Theme:** Fix review stage reliability — dirty working tree false-positive, stale precheck, dead config fields
|
|
141
|
+
**Status:** ✅ Shipped (2026-03-08)
|
|
142
|
+
**Spec:** `nax/features/review-quality/prd.json`
|
|
143
|
+
|
|
144
|
+
### Stories
|
|
145
|
+
- [x] **RQ-001:** Assert clean working tree before running review typecheck/lint (BUG-049)
|
|
146
|
+
- [x] **RQ-002:** Fix `checkOptionalCommands` precheck to use correct config resolution path (BUG-050)
|
|
147
|
+
- [x] **RQ-003:** Consolidate dead `quality.commands.typecheck/lint` into review resolution chain (BUG-051)
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
138
151
|
## v0.26.0 — Routing Persistence ✅ Shipped (2026-03-08)
|
|
139
152
|
|
|
140
153
|
- **RRP-001:** Persist initial routing classification to `prd.json` on first classification
|
|
@@ -148,16 +161,16 @@
|
|
|
148
161
|
## v0.25.0 — Trigger Completion ✅ Shipped (2026-03-07)
|
|
149
162
|
|
|
150
163
|
**Theme:** Wire all 8 unwired interaction triggers, 3 missing hook events, and add plugin integration tests
|
|
151
|
-
**Status:**
|
|
164
|
+
**Status:** ✅ Shipped (2026-03-07)
|
|
152
165
|
**Spec:** [docs/specs/trigger-completion.md](specs/trigger-completion.md)
|
|
153
166
|
|
|
154
167
|
### Stories
|
|
155
|
-
- [
|
|
156
|
-
- [
|
|
157
|
-
- [
|
|
158
|
-
- [
|
|
159
|
-
- [
|
|
160
|
-
- [
|
|
168
|
+
- [x] **TC-001:** Wire `cost-exceeded` + `cost-warning` triggers — fire at 80%/100% of cost limit in sequential-executor.ts
|
|
169
|
+
- [x] **TC-002:** Wire `max-retries` trigger — fire on permanent story failure via `story:failed` event in wireInteraction
|
|
170
|
+
- [x] **TC-003:** Wire `security-review`, `merge-conflict`, `pre-merge` triggers — review rejection, git conflict detection, pre-completion gate
|
|
171
|
+
- [x] **TC-004:** Wire `story-ambiguity` + `review-gate` triggers — ambiguity keyword detection, per-story human checkpoint
|
|
172
|
+
- [x] **TC-005:** Wire missing hook events — `on-resume`, `on-session-end`, `on-error` to pipeline events
|
|
173
|
+
- [x] **TC-006:** Auto plugin + Telegram + Webhook integration tests — mock LLM/network, cover approve/reject/HMAC flows
|
|
161
174
|
|
|
162
175
|
---
|
|
163
176
|
|
|
@@ -308,17 +321,21 @@
|
|
|
308
321
|
- [x] ~~**BUG-022:** Story interleaving — `getNextStory()` round-robins instead of exhausting retries on current story → fixed in v0.18.0~~
|
|
309
322
|
- [x] ~~**BUG-023:** Agent failure silent — no exitCode/stderr in JSONL → fixed in v0.18.0~~
|
|
310
323
|
- [x] ~~**BUG-025:** `needsHumanReview` not triggering interactive plugin → fixed in v0.18.0~~
|
|
311
|
-
|
|
312
|
-
- [x]
|
|
313
|
-
- [x]
|
|
314
|
-
- [x]
|
|
315
|
-
- [x]
|
|
316
|
-
- [x]
|
|
317
|
-
|
|
318
|
-
- [x] ~~**BUG-
|
|
319
|
-
- [x] ~~**BUG-
|
|
320
|
-
- [x] ~~**BUG-
|
|
321
|
-
- [x] ~~**BUG-
|
|
324
|
+
- [x] ~~**BUG-029:** Escalation resets story to `pending`. Fixed.~~
|
|
325
|
+
- [x] ~~**BUG-030:** Review lint failure resets. Fixed.~~
|
|
326
|
+
- [x] ~~**BUG-031:** Keyword fallback classifier inconsistency. Fixed.~~
|
|
327
|
+
- [x] ~~**BUG-032:** Routing stage overrides escalated modelTier. Fixed.~~
|
|
328
|
+
- [x] ~~**BUG-033:** LLM routing timeout/retry. Fixed.~~
|
|
329
|
+
- [x] ~~**BUG-037:** Test output summary (verify stage) tail. Fixed.~~
|
|
330
|
+
- [x] ~~**BUG-038:** smart-runner over-matching. Fixed.~~
|
|
331
|
+
- [x] ~~**BUG-043:** Scoped test command construction. Fixed.~~
|
|
332
|
+
- [x] ~~**BUG-044:** Scoped/full-suite test command logging. Fixed.~~
|
|
333
|
+
- [x] ~~**BUG-049:** Review typecheck runs on dirty working tree. Fixed in v0.27.0.~~
|
|
334
|
+
- [x] ~~**BUG-050:** `checkOptionalCommands` precheck uses legacy config fields. Fixed in v0.27.0.~~
|
|
335
|
+
- [x] ~~**BUG-051:** `quality.commands.typecheck/lint` are dead config. Fixed in v0.27.0.~~
|
|
336
|
+
- [x] ~~**BUG-052:** `console.warn` in runtime pipeline code bypasses JSONL logger. Fixed in v0.26.0.~~
|
|
337
|
+
- [ ] **BUG-054:** Redundant scoped verify after TDD full-suite gate passes. When rectification gate runs full test suite and passes, the pipeline verify stage re-runs scoped tests (subset). **Fix:** Skip verify if full-suite gate already passed.
|
|
338
|
+
- [ ] **BUG-055:** Pipeline skip messages conflate "not needed" with "disabled". `runner.ts:54` logs "skipped (disabled)" for all stages where `enabled()` returns false, even if just because tests passed. **Fix:** Differentiate log message.
|
|
322
339
|
|
|
323
340
|
### Features
|
|
324
341
|
- [x] ~~`nax unlock` command~~
|
|
@@ -344,4 +361,4 @@ Sequential canary → stable: `v0.12.0-canary.0` → `canary.N` → `v0.12.0`
|
|
|
344
361
|
Canary: `npm publish --tag canary`
|
|
345
362
|
Stable: `npm publish` (latest)
|
|
346
363
|
|
|
347
|
-
*Last updated: 2026-03-
|
|
364
|
+
*Last updated: 2026-03-08 (v0.27.0 shipped — Review Quality)*
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"project": "nax-review-quality",
|
|
3
|
+
"branchName": "feat/review-quality",
|
|
4
|
+
"feature": "review-quality",
|
|
5
|
+
"updatedAt": "2026-03-08T03:03:00.000Z",
|
|
6
|
+
"userStories": [
|
|
7
|
+
{
|
|
8
|
+
"id": "RQ-001",
|
|
9
|
+
"title": "Assert clean working tree before running review typecheck/lint (BUG-049)",
|
|
10
|
+
"description": "The review stage runs bun run typecheck and bun run lint on the working tree, not the committed state. If the agent forgets to git add a file (e.g. types.ts with a new interface field), the uncommitted change is still on disk, typecheck passes against the local working tree, but the committed code has a type error. This was observed in the routing-persistence run: RRP-003 committed contentHash refs in routing.ts without the matching StoryRouting.contentHash field in types.ts — typecheck passed because types.ts was locally modified but not staged. Fix: before running built-in checks in review/runner.ts, assert that the working tree has no uncommitted changes to tracked files (git diff --name-only HEAD returns empty). If dirty, fail the review with a clear message listing the uncommitted files so the agent can stage and commit them.",
|
|
11
|
+
"acceptanceCriteria": [
|
|
12
|
+
"Before running typecheck or lint in runReview(), call git diff --name-only HEAD (covers both staged and unstaged tracked-file changes)",
|
|
13
|
+
"If output is non-empty, return a ReviewResult with success: false and failureReason listing the uncommitted files",
|
|
14
|
+
"Log at warn level via getSafeLogger() with stage 'review' and message 'Uncommitted changes detected before review: <files>'",
|
|
15
|
+
"If working tree is clean, proceed with typecheck/lint as before — no regression for normal flow",
|
|
16
|
+
"Unit tests: dirty working tree (mock git diff) returns review failure before running typecheck; clean working tree allows typecheck to run normally",
|
|
17
|
+
"Unit tests: untracked files only (git diff HEAD returns empty) — review proceeds since only tracked changes matter"
|
|
18
|
+
],
|
|
19
|
+
"complexity": "simple",
|
|
20
|
+
"status": "pending",
|
|
21
|
+
"tags": ["bug", "review", "typecheck"]
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"id": "RQ-002",
|
|
25
|
+
"title": "Fix checkOptionalCommands precheck to use correct config resolution path (BUG-050)",
|
|
26
|
+
"description": "The precheck check checkOptionalCommands() in src/precheck/checks-warnings.ts checks config.execution.lintCommand and config.execution.typecheckCommand — these are legacy fields that no longer exist in the current config schema. The actual runtime resolution chain used by review/runner.ts is: (1) execution.typecheckCommand, (2) review.commands.typecheck, (3) package.json scripts. As a result, the precheck always warns 'Optional commands not configured: lint, typecheck' even when review.commands.typecheck and review.commands.lint are properly set. Fix: update checkOptionalCommands() to resolve via the same priority chain as review/runner.ts:resolveCommand().",
|
|
27
|
+
"acceptanceCriteria": [
|
|
28
|
+
"checkOptionalCommands() resolves typecheck via: execution.typecheckCommand -> review.commands.typecheck -> package.json typecheck script",
|
|
29
|
+
"checkOptionalCommands() resolves lint via: execution.lintCommand -> review.commands.lint -> package.json lint script",
|
|
30
|
+
"If config.review.commands.typecheck is set, precheck passes with no warning",
|
|
31
|
+
"If neither execution field, review.commands, nor package.json script exists, precheck still warns 'not configured'",
|
|
32
|
+
"Unit tests: config with review.commands.typecheck set -> check passes; config with neither -> check warns; config with package.json script -> check passes"
|
|
33
|
+
],
|
|
34
|
+
"complexity": "simple",
|
|
35
|
+
"status": "pending",
|
|
36
|
+
"tags": ["bug", "precheck", "config"]
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"id": "RQ-003",
|
|
40
|
+
"title": "Consolidate dead quality.commands.typecheck/lint into review resolution chain (BUG-051)",
|
|
41
|
+
"description": "QualityConfig.commands.typecheck and QualityConfig.commands.lint are declared in src/config/types.ts and documented in nax config --explain, but are never read by runtime code. The review runner reads only review.commands.typecheck/lint. Fix: make review/runner.ts:resolveCommand() also check quality.commands as a fallback after review.commands and before package.json. This gives quality.commands.typecheck semantic meaning without a breaking change. Do NOT remove the fields from QualityConfig — backward compatibility.",
|
|
42
|
+
"acceptanceCriteria": [
|
|
43
|
+
"review/runner.ts:resolveCommand() priority chain for typecheck: (1) execution.typecheckCommand, (2) review.commands.typecheck, (3) quality.commands.typecheck, (4) package.json typecheck script",
|
|
44
|
+
"review/runner.ts:resolveCommand() priority chain for lint: (1) execution.lintCommand, (2) review.commands.lint, (3) quality.commands.lint, (4) package.json lint script",
|
|
45
|
+
"Setting quality.commands.typecheck in config.json now correctly runs that command in the review stage",
|
|
46
|
+
"review.commands.typecheck still takes precedence over quality.commands.typecheck when both are set",
|
|
47
|
+
"CLI config --explain description for quality.commands.typecheck updated to note it is used as fallback in review stage",
|
|
48
|
+
"Unit tests: quality.commands.typecheck set with review.commands.typecheck absent -> quality command used; both set -> review command takes precedence"
|
|
49
|
+
],
|
|
50
|
+
"complexity": "simple",
|
|
51
|
+
"status": "pending",
|
|
52
|
+
"tags": ["bug", "config", "review"]
|
|
53
|
+
}
|
|
54
|
+
]
|
|
55
|
+
}
|
package/package.json
CHANGED
package/src/optimizer/index.ts
CHANGED
|
@@ -14,6 +14,7 @@ export { NoopOptimizer } from "./noop.optimizer.js";
|
|
|
14
14
|
export { RuleBasedOptimizer } from "./rule-based.optimizer.js";
|
|
15
15
|
|
|
16
16
|
import type { NaxConfig } from "../config/schema.js";
|
|
17
|
+
import { getSafeLogger } from "../logger/index.js";
|
|
17
18
|
import type { PluginRegistry } from "../plugins/registry.js";
|
|
18
19
|
import { NoopOptimizer } from "./noop.optimizer.js";
|
|
19
20
|
import { RuleBasedOptimizer } from "./rule-based.optimizer.js";
|
|
@@ -56,7 +57,7 @@ export function resolveOptimizer(config: NaxConfig, pluginRegistry?: PluginRegis
|
|
|
56
57
|
return new NoopOptimizer();
|
|
57
58
|
default:
|
|
58
59
|
// Unknown strategy, fallback to noop
|
|
59
|
-
|
|
60
|
+
getSafeLogger()?.warn("optimizer", `Unknown optimizer strategy '${strategy}', using noop`);
|
|
60
61
|
return new NoopOptimizer();
|
|
61
62
|
}
|
|
62
63
|
}
|
package/src/pipeline/runner.ts
CHANGED
|
@@ -51,7 +51,8 @@ export async function runPipeline(
|
|
|
51
51
|
|
|
52
52
|
// Skip disabled stages
|
|
53
53
|
if (!stage.enabled(context)) {
|
|
54
|
-
|
|
54
|
+
const reason = stage.skipReason?.(context) ?? "disabled";
|
|
55
|
+
logger.debug("pipeline", `Stage "${stage.name}" skipped (${reason})`);
|
|
55
56
|
i++;
|
|
56
57
|
continue;
|
|
57
58
|
}
|
|
@@ -29,6 +29,11 @@ export const autofixStage: PipelineStage = {
|
|
|
29
29
|
return autofixEnabled;
|
|
30
30
|
},
|
|
31
31
|
|
|
32
|
+
skipReason(ctx: PipelineContext): string {
|
|
33
|
+
if (!ctx.reviewResult || ctx.reviewResult.success) return "not needed (review passed)";
|
|
34
|
+
return "disabled (autofix not enabled in config)";
|
|
35
|
+
},
|
|
36
|
+
|
|
32
37
|
async execute(ctx: PipelineContext): Promise<StageResult> {
|
|
33
38
|
const logger = getLogger();
|
|
34
39
|
const { reviewResult } = ctx;
|
|
@@ -27,6 +27,11 @@ export const rectifyStage: PipelineStage = {
|
|
|
27
27
|
return ctx.config.execution.rectification?.enabled ?? false;
|
|
28
28
|
},
|
|
29
29
|
|
|
30
|
+
skipReason(ctx: PipelineContext): string {
|
|
31
|
+
if (!ctx.verifyResult || ctx.verifyResult.success) return "not needed (verify passed)";
|
|
32
|
+
return "disabled (rectification not enabled in config)";
|
|
33
|
+
},
|
|
34
|
+
|
|
30
35
|
async execute(ctx: PipelineContext): Promise<StageResult> {
|
|
31
36
|
const logger = getLogger();
|
|
32
37
|
const { verifyResult } = ctx;
|
|
@@ -26,12 +26,17 @@ export const regressionStage: PipelineStage = {
|
|
|
26
26
|
const mode = ctx.config.execution.regressionGate?.mode ?? "deferred";
|
|
27
27
|
if (mode !== "per-story") return false;
|
|
28
28
|
// Only run when verify passed (or was skipped/not set)
|
|
29
|
-
// Only run when verify passed (or was skipped/not set)
|
|
30
29
|
if (ctx.verifyResult && !ctx.verifyResult.success) return false;
|
|
31
30
|
const gateEnabled = ctx.config.execution.regressionGate?.enabled ?? true;
|
|
32
31
|
return gateEnabled;
|
|
33
32
|
},
|
|
34
33
|
|
|
34
|
+
skipReason(ctx: PipelineContext): string {
|
|
35
|
+
const mode = ctx.config.execution.regressionGate?.mode ?? "deferred";
|
|
36
|
+
if (mode !== "per-story") return `not needed (regression mode is '${mode}', not 'per-story')`;
|
|
37
|
+
return "disabled (regression gate not enabled in config)";
|
|
38
|
+
},
|
|
39
|
+
|
|
35
40
|
async execute(ctx: PipelineContext): Promise<StageResult> {
|
|
36
41
|
const logger = getLogger();
|
|
37
42
|
const testCommand = ctx.config.review?.commands?.test ?? ctx.config.quality.commands.test ?? "bun test";
|
|
@@ -45,7 +45,8 @@ function buildScopedCommand(testFiles: string[], baseCommand: string, testScoped
|
|
|
45
45
|
|
|
46
46
|
export const verifyStage: PipelineStage = {
|
|
47
47
|
name: "verify",
|
|
48
|
-
enabled: () =>
|
|
48
|
+
enabled: (ctx: PipelineContext) => !ctx.fullSuiteGatePassed,
|
|
49
|
+
skipReason: () => "not needed (full-suite gate already passed)",
|
|
49
50
|
|
|
50
51
|
async execute(ctx: PipelineContext): Promise<StageResult> {
|
|
51
52
|
const logger = getLogger();
|
package/src/pipeline/types.ts
CHANGED
|
@@ -108,6 +108,8 @@ export interface PipelineContext {
|
|
|
108
108
|
retryAsLite?: boolean;
|
|
109
109
|
/** Failure category from TDD orchestrator (set by executionStage on TDD failure) */
|
|
110
110
|
tddFailureCategory?: FailureCategory;
|
|
111
|
+
/** Set to true when TDD full-suite gate already passed — verify stage skips to avoid redundant run (BUG-054) */
|
|
112
|
+
fullSuiteGatePassed?: boolean;
|
|
111
113
|
}
|
|
112
114
|
|
|
113
115
|
/**
|
|
@@ -167,6 +169,13 @@ export interface PipelineStage {
|
|
|
167
169
|
*/
|
|
168
170
|
enabled: (ctx: PipelineContext) => boolean;
|
|
169
171
|
|
|
172
|
+
/**
|
|
173
|
+
* Optional human-readable reason why the stage was skipped.
|
|
174
|
+
* Distinguishes "not needed" (conditions not met) from "disabled" (config).
|
|
175
|
+
* Used by the pipeline runner for better observability (BUG-055).
|
|
176
|
+
*/
|
|
177
|
+
skipReason?: (ctx: PipelineContext) => string;
|
|
178
|
+
|
|
170
179
|
/**
|
|
171
180
|
* Execute the stage logic.
|
|
172
181
|
*
|
package/src/review/runner.ts
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
import { spawn } from "bun";
|
|
8
8
|
import type { ExecutionConfig } from "../config/schema";
|
|
9
|
+
import { getSafeLogger } from "../logger";
|
|
9
10
|
import type { ReviewCheckName, ReviewCheckResult, ReviewConfig, ReviewResult } from "./types";
|
|
10
11
|
|
|
11
12
|
/** Default commands for each check type */
|
|
@@ -159,6 +160,40 @@ async function runCheck(check: ReviewCheckName, command: string, workdir: string
|
|
|
159
160
|
}
|
|
160
161
|
}
|
|
161
162
|
|
|
163
|
+
/**
|
|
164
|
+
* Get uncommitted tracked files via git diff --name-only HEAD.
|
|
165
|
+
* Returns empty array if git command fails or working tree is clean.
|
|
166
|
+
*/
|
|
167
|
+
async function getUncommittedFilesImpl(workdir: string): Promise<string[]> {
|
|
168
|
+
try {
|
|
169
|
+
const proc = spawn({
|
|
170
|
+
cmd: ["git", "diff", "--name-only", "HEAD"],
|
|
171
|
+
cwd: workdir,
|
|
172
|
+
stdout: "pipe",
|
|
173
|
+
stderr: "pipe",
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
const exitCode = await proc.exited;
|
|
177
|
+
if (exitCode !== 0) {
|
|
178
|
+
return [];
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const output = await new Response(proc.stdout).text();
|
|
182
|
+
return output.trim().split("\n").filter(Boolean);
|
|
183
|
+
} catch {
|
|
184
|
+
return [];
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
|
|
190
|
+
* RQ-001: getUncommittedFiles enables mocking of the git dirty-tree check.
|
|
191
|
+
*/
|
|
192
|
+
export const _deps = {
|
|
193
|
+
/** Returns tracked files with uncommitted changes (git diff --name-only HEAD). */
|
|
194
|
+
getUncommittedFiles: getUncommittedFilesImpl,
|
|
195
|
+
};
|
|
196
|
+
|
|
162
197
|
/**
|
|
163
198
|
* Run all configured review checks
|
|
164
199
|
*/
|
|
@@ -168,16 +203,30 @@ export async function runReview(
|
|
|
168
203
|
executionConfig?: ExecutionConfig,
|
|
169
204
|
): Promise<ReviewResult> {
|
|
170
205
|
const startTime = Date.now();
|
|
206
|
+
const logger = getSafeLogger();
|
|
171
207
|
const checks: ReviewCheckResult[] = [];
|
|
172
208
|
let firstFailure: string | undefined;
|
|
173
209
|
|
|
210
|
+
// RQ-001: Check for uncommitted tracked files before running checks
|
|
211
|
+
const uncommittedFiles = await _deps.getUncommittedFiles(workdir);
|
|
212
|
+
if (uncommittedFiles.length > 0) {
|
|
213
|
+
const fileList = uncommittedFiles.join(", ");
|
|
214
|
+
logger?.warn("review", `Uncommitted changes detected before review: ${fileList}`);
|
|
215
|
+
return {
|
|
216
|
+
success: false,
|
|
217
|
+
checks: [],
|
|
218
|
+
totalDurationMs: Date.now() - startTime,
|
|
219
|
+
failureReason: `Working tree has uncommitted changes:\n${uncommittedFiles.map((f) => ` - ${f}`).join("\n")}\n\nStage and commit these files before running review.`,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
174
223
|
for (const checkName of config.checks) {
|
|
175
224
|
// Resolve command using resolution strategy
|
|
176
225
|
const command = await resolveCommand(checkName, config, executionConfig, workdir);
|
|
177
226
|
|
|
178
227
|
// Skip if explicitly disabled or not found
|
|
179
228
|
if (command === null) {
|
|
180
|
-
|
|
229
|
+
getSafeLogger()?.warn("review", `Skipping ${checkName} check (command not configured or disabled)`);
|
|
181
230
|
continue;
|
|
182
231
|
}
|
|
183
232
|
|
package/src/tdd/orchestrator.ts
CHANGED
|
@@ -255,7 +255,16 @@ export async function runThreeSessionTdd(options: ThreeSessionTddOptions): Promi
|
|
|
255
255
|
}
|
|
256
256
|
|
|
257
257
|
// Full-Suite Gate (v0.11 Rectification)
|
|
258
|
-
await runFullSuiteGate(
|
|
258
|
+
const fullSuiteGatePassed = await runFullSuiteGate(
|
|
259
|
+
story,
|
|
260
|
+
config,
|
|
261
|
+
workdir,
|
|
262
|
+
agent,
|
|
263
|
+
implementerTier,
|
|
264
|
+
contextMarkdown,
|
|
265
|
+
lite,
|
|
266
|
+
logger,
|
|
267
|
+
);
|
|
259
268
|
|
|
260
269
|
// Session 3: Verifier
|
|
261
270
|
const session3Ref = (await captureGitRef(workdir)) ?? "HEAD";
|
|
@@ -379,5 +388,6 @@ export async function runThreeSessionTdd(options: ThreeSessionTddOptions): Promi
|
|
|
379
388
|
verdict,
|
|
380
389
|
totalCost,
|
|
381
390
|
lite,
|
|
391
|
+
fullSuiteGatePassed,
|
|
382
392
|
};
|
|
383
393
|
}
|
|
@@ -34,9 +34,9 @@ export async function runFullSuiteGate(
|
|
|
34
34
|
contextMarkdown: string | undefined,
|
|
35
35
|
lite: boolean,
|
|
36
36
|
logger: ReturnType<typeof getLogger>,
|
|
37
|
-
): Promise<
|
|
37
|
+
): Promise<boolean> {
|
|
38
38
|
const rectificationEnabled = config.execution.rectification?.enabled ?? false;
|
|
39
|
-
if (!rectificationEnabled) return;
|
|
39
|
+
if (!rectificationEnabled) return false;
|
|
40
40
|
|
|
41
41
|
const rectificationConfig = config.execution.rectification;
|
|
42
42
|
const testCmd = config.quality?.commands?.test ?? "bun test";
|
|
@@ -54,7 +54,7 @@ export async function runFullSuiteGate(
|
|
|
54
54
|
const testSummary = parseBunTestOutput(fullSuiteResult.output);
|
|
55
55
|
|
|
56
56
|
if (testSummary.failed > 0) {
|
|
57
|
-
await runRectificationLoop(
|
|
57
|
+
return await runRectificationLoop(
|
|
58
58
|
story,
|
|
59
59
|
config,
|
|
60
60
|
workdir,
|
|
@@ -69,14 +69,18 @@ export async function runFullSuiteGate(
|
|
|
69
69
|
fullSuiteTimeout,
|
|
70
70
|
);
|
|
71
71
|
}
|
|
72
|
-
|
|
72
|
+
// No failures detected despite non-zero exit — treat as passed
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
if (fullSuitePassed) {
|
|
73
76
|
logger.info("tdd", "Full suite gate passed", { storyId: story.id });
|
|
74
|
-
|
|
75
|
-
logger.warn("tdd", "Full suite gate execution failed (no output)", {
|
|
76
|
-
storyId: story.id,
|
|
77
|
-
exitCode: fullSuiteResult.exitCode,
|
|
78
|
-
});
|
|
77
|
+
return true;
|
|
79
78
|
}
|
|
79
|
+
logger.warn("tdd", "Full suite gate execution failed (no output)", {
|
|
80
|
+
storyId: story.id,
|
|
81
|
+
exitCode: fullSuiteResult.exitCode,
|
|
82
|
+
});
|
|
83
|
+
return false;
|
|
80
84
|
}
|
|
81
85
|
|
|
82
86
|
/** Run the rectification retry loop when full suite gate detects regressions. */
|
|
@@ -93,7 +97,7 @@ async function runRectificationLoop(
|
|
|
93
97
|
rectificationConfig: NonNullable<NaxConfig["execution"]["rectification"]>,
|
|
94
98
|
testCmd: string,
|
|
95
99
|
fullSuiteTimeout: number,
|
|
96
|
-
): Promise<
|
|
100
|
+
): Promise<boolean> {
|
|
97
101
|
const rectificationState: RectificationState = {
|
|
98
102
|
attempt: 0,
|
|
99
103
|
initialFailures: testSummary.failed,
|
|
@@ -156,7 +160,7 @@ async function runRectificationLoop(
|
|
|
156
160
|
storyId: story.id,
|
|
157
161
|
attempt: rectificationState.attempt,
|
|
158
162
|
});
|
|
159
|
-
|
|
163
|
+
return true;
|
|
160
164
|
}
|
|
161
165
|
|
|
162
166
|
if (retryFullSuite.output) {
|
|
@@ -177,7 +181,8 @@ async function runRectificationLoop(
|
|
|
177
181
|
attempts: rectificationState.attempt,
|
|
178
182
|
remainingFailures: rectificationState.currentFailures,
|
|
179
183
|
});
|
|
180
|
-
|
|
181
|
-
logger.info("tdd", "Full suite gate passed", { storyId: story.id });
|
|
184
|
+
return false;
|
|
182
185
|
}
|
|
186
|
+
logger.info("tdd", "Full suite gate passed", { storyId: story.id });
|
|
187
|
+
return true;
|
|
183
188
|
}
|
package/src/tdd/types.ts
CHANGED
|
@@ -78,4 +78,6 @@ export interface ThreeSessionTddResult {
|
|
|
78
78
|
* undefined = verdict was not attempted (e.g. early-exit before session 3 ran)
|
|
79
79
|
*/
|
|
80
80
|
verdict?: import("./verdict").VerifierVerdict | null;
|
|
81
|
+
/** Whether the TDD full-suite gate passed (used by verify stage to skip redundant run, BUG-054) */
|
|
82
|
+
fullSuiteGatePassed?: boolean;
|
|
81
83
|
}
|
package/src/version.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Version and build info for nax.
|
|
3
3
|
*
|
|
4
4
|
* GIT_COMMIT is injected at build time via --define in the bun build script.
|
|
5
|
-
* When running from source (
|
|
5
|
+
* When running from source (bin/nax.ts), falls back to runtime git rev-parse.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import pkg from "../package.json";
|
|
@@ -11,13 +11,29 @@ declare const GIT_COMMIT: string;
|
|
|
11
11
|
|
|
12
12
|
export const NAX_VERSION: string = pkg.version;
|
|
13
13
|
|
|
14
|
-
/** Short git commit hash
|
|
14
|
+
/** Short git commit hash — injected at build time, or resolved at runtime from git. */
|
|
15
15
|
export const NAX_COMMIT: string = (() => {
|
|
16
|
+
// Build-time injection (bun build --define GIT_COMMIT=...)
|
|
17
|
+
// Guard: must be a non-empty string that looks like a real commit hash
|
|
16
18
|
try {
|
|
17
|
-
|
|
19
|
+
if (typeof GIT_COMMIT === "string" && /^[0-9a-f]{6,10}$/.test(GIT_COMMIT)) return GIT_COMMIT;
|
|
18
20
|
} catch {
|
|
19
|
-
|
|
21
|
+
// not injected — fall through to runtime resolution
|
|
20
22
|
}
|
|
23
|
+
// Runtime fallback: resolve from the source file's git repo (Bun-native)
|
|
24
|
+
try {
|
|
25
|
+
const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
|
|
26
|
+
cwd: import.meta.dir,
|
|
27
|
+
stderr: "ignore",
|
|
28
|
+
});
|
|
29
|
+
if (result.exitCode === 0) {
|
|
30
|
+
const hash = result.stdout.toString().trim();
|
|
31
|
+
if (/^[0-9a-f]{6,10}$/.test(hash)) return hash;
|
|
32
|
+
}
|
|
33
|
+
} catch {
|
|
34
|
+
// git not available
|
|
35
|
+
}
|
|
36
|
+
return "dev";
|
|
21
37
|
})();
|
|
22
38
|
|
|
23
39
|
export const NAX_BUILD_INFO = `v${NAX_VERSION} (${NAX_COMMIT})`;
|
|
@@ -173,7 +173,7 @@ describe("Review Stage - Plugin Integration", () => {
|
|
|
173
173
|
expect(receivedWorkdir).toBe(tempDir);
|
|
174
174
|
});
|
|
175
175
|
|
|
176
|
-
test("
|
|
176
|
+
test("review fails when there are uncommitted changes (RQ-001)", async () => {
|
|
177
177
|
const tempDir = mkdtempSync(join(tmpdir(), "nax-review-plugin-"));
|
|
178
178
|
|
|
179
179
|
// Create a file first
|
|
@@ -181,15 +181,16 @@ describe("Review Stage - Plugin Integration", () => {
|
|
|
181
181
|
|
|
182
182
|
await initGitRepo(tempDir);
|
|
183
183
|
|
|
184
|
-
// Now modify the file after git init
|
|
184
|
+
// Now modify the file after git init WITHOUT committing
|
|
185
|
+
// This violates RQ-001 (dirty working tree)
|
|
185
186
|
writeFileSync(join(tempDir, "test.ts"), "// modified");
|
|
186
187
|
|
|
187
|
-
let
|
|
188
|
+
let reviewerCalled = false;
|
|
188
189
|
const mockReviewer: IReviewPlugin = {
|
|
189
190
|
name: "test-reviewer",
|
|
190
191
|
description: "Test reviewer",
|
|
191
|
-
async check(_workdir
|
|
192
|
-
|
|
192
|
+
async check(_workdir) {
|
|
193
|
+
reviewerCalled = true;
|
|
193
194
|
return { passed: true, output: "OK" };
|
|
194
195
|
},
|
|
195
196
|
};
|
|
@@ -204,9 +205,13 @@ describe("Review Stage - Plugin Integration", () => {
|
|
|
204
205
|
const registry = new PluginRegistry([mockPlugin]);
|
|
205
206
|
const ctx = createMockContext(tempDir, registry);
|
|
206
207
|
|
|
207
|
-
await reviewStage.execute(ctx);
|
|
208
|
+
const result = await reviewStage.execute(ctx);
|
|
208
209
|
|
|
209
|
-
|
|
210
|
+
// RQ-001: Review should fail with dirty working tree
|
|
211
|
+
expect(result.action).toBe("escalate");
|
|
212
|
+
expect(result.reason).toContain("Working tree has uncommitted changes");
|
|
213
|
+
// Reviewer should not be called due to dirty tree check
|
|
214
|
+
expect(reviewerCalled).toBe(false);
|
|
210
215
|
});
|
|
211
216
|
|
|
212
217
|
test("reviewer receives empty array when no files changed", async () => {
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for src/review/runner.ts
|
|
3
|
+
* RQ-001: Assert clean working tree before running review typecheck/lint (BUG-049)
|
|
4
|
+
*
|
|
5
|
+
* Tests verify that runReview() checks for uncommitted tracked-file changes
|
|
6
|
+
* (via git diff --name-only HEAD) before running typecheck or lint.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|
10
|
+
import { _deps, runReview } from "../../../src/review/runner";
|
|
11
|
+
import type { ReviewConfig } from "../../../src/review/types";
|
|
12
|
+
|
|
13
|
+
/** Minimal ReviewConfig with typecheck enabled but command set to disable via executionConfig */
|
|
14
|
+
const typecheckConfig: ReviewConfig = {
|
|
15
|
+
enabled: true,
|
|
16
|
+
checks: ["typecheck"],
|
|
17
|
+
commands: {},
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/** ReviewConfig with no checks — used to isolate the dirty-tree guard logic */
|
|
21
|
+
const noChecksConfig: ReviewConfig = {
|
|
22
|
+
enabled: true,
|
|
23
|
+
checks: [],
|
|
24
|
+
commands: {},
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
describe("runReview — dirty working tree guard (RQ-001)", () => {
|
|
28
|
+
let originalGetUncommittedFiles: typeof _deps.getUncommittedFiles;
|
|
29
|
+
|
|
30
|
+
beforeEach(() => {
|
|
31
|
+
originalGetUncommittedFiles = _deps.getUncommittedFiles;
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
afterEach(() => {
|
|
35
|
+
mock.restore();
|
|
36
|
+
_deps.getUncommittedFiles = originalGetUncommittedFiles;
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
describe("dirty working tree", () => {
|
|
40
|
+
test("returns failure with uncommitted files listed in failureReason", async () => {
|
|
41
|
+
_deps.getUncommittedFiles = mock(async (_workdir: string) => [
|
|
42
|
+
"src/types.ts",
|
|
43
|
+
"src/routing.ts",
|
|
44
|
+
]);
|
|
45
|
+
|
|
46
|
+
const result = await runReview(typecheckConfig, "/tmp/fake-workdir");
|
|
47
|
+
|
|
48
|
+
expect(result.success).toBe(false);
|
|
49
|
+
expect(result.failureReason).toBeDefined();
|
|
50
|
+
expect(result.failureReason).toContain("src/types.ts");
|
|
51
|
+
expect(result.failureReason).toContain("src/routing.ts");
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test("does not run typecheck when working tree is dirty", async () => {
|
|
55
|
+
_deps.getUncommittedFiles = mock(async (_workdir: string) => ["src/types.ts"]);
|
|
56
|
+
|
|
57
|
+
// If typecheck were run it would fail (no real workdir), but we expect
|
|
58
|
+
// an early return with zero checks executed.
|
|
59
|
+
const result = await runReview(typecheckConfig, "/tmp/fake-workdir");
|
|
60
|
+
|
|
61
|
+
expect(result.checks).toHaveLength(0);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test("calls getUncommittedFiles with the provided workdir", async () => {
|
|
65
|
+
const mockFn = mock(async (_workdir: string) => ["src/types.ts"]);
|
|
66
|
+
_deps.getUncommittedFiles = mockFn;
|
|
67
|
+
|
|
68
|
+
await runReview(typecheckConfig, "/tmp/my-project");
|
|
69
|
+
|
|
70
|
+
expect(mockFn).toHaveBeenCalledWith("/tmp/my-project");
|
|
71
|
+
});
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
describe("clean working tree", () => {
|
|
75
|
+
test("proceeds past dirty-tree guard when no uncommitted files", async () => {
|
|
76
|
+
_deps.getUncommittedFiles = mock(async (_workdir: string) => []);
|
|
77
|
+
|
|
78
|
+
// typecheckCommand: null disables the check so no real process is spawned.
|
|
79
|
+
const result = await runReview(typecheckConfig, "/tmp/fake-workdir", {
|
|
80
|
+
typecheckCommand: null,
|
|
81
|
+
maxIterations: 5,
|
|
82
|
+
iterationDelayMs: 0,
|
|
83
|
+
costLimit: 10,
|
|
84
|
+
sessionTimeoutSeconds: 300,
|
|
85
|
+
verificationTimeoutSeconds: 60,
|
|
86
|
+
maxStoriesPerFeature: 20,
|
|
87
|
+
contextProviderTokenBudget: 2000,
|
|
88
|
+
rectification: { enabled: false, maxIterations: 3 },
|
|
89
|
+
regressionGate: { enabled: false },
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
expect(result.success).toBe(true);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test("calls getUncommittedFiles before running checks", async () => {
|
|
96
|
+
const mockFn = mock(async (_workdir: string) => []);
|
|
97
|
+
_deps.getUncommittedFiles = mockFn;
|
|
98
|
+
|
|
99
|
+
await runReview(noChecksConfig, "/tmp/clean-workdir");
|
|
100
|
+
|
|
101
|
+
expect(mockFn).toHaveBeenCalledWith("/tmp/clean-workdir");
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
describe("untracked files only", () => {
|
|
106
|
+
test("review proceeds when git diff HEAD returns empty (only untracked files exist)", async () => {
|
|
107
|
+
// git diff --name-only HEAD only reports tracked files with changes.
|
|
108
|
+
// Untracked files are invisible to this command — working tree is considered clean.
|
|
109
|
+
_deps.getUncommittedFiles = mock(async (_workdir: string) => []);
|
|
110
|
+
|
|
111
|
+
const result = await runReview(noChecksConfig, "/tmp/fake-workdir");
|
|
112
|
+
|
|
113
|
+
// Should succeed — no dirty tracked files, review can proceed
|
|
114
|
+
expect(result.success).toBe(true);
|
|
115
|
+
});
|
|
116
|
+
});
|
|
117
|
+
});
|