@nathapp/nax 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mcp.json +8 -0
- package/docs/ROADMAP.md +20 -5
- package/docs/adr/ADR-005-implementation-plan.md +655 -0
- package/docs/adr/ADR-005-pipeline-re-architecture.md +464 -0
- package/package.json +1 -1
- package/src/agents/claude.ts +44 -9
- package/src/config/types.ts +11 -0
- package/src/execution/dry-run.ts +81 -0
- package/src/execution/escalation/tier-outcome.ts +29 -44
- package/src/execution/executor-types.ts +65 -0
- package/src/execution/index.ts +0 -17
- package/src/execution/iteration-runner.ts +132 -0
- package/src/execution/lifecycle/index.ts +0 -1
- package/src/execution/lifecycle/run-regression.ts +5 -5
- package/src/execution/pipeline-result-handler.ts +51 -254
- package/src/execution/sequential-executor.ts +72 -316
- package/src/execution/story-selector.ts +75 -0
- package/src/pipeline/event-bus.ts +276 -0
- package/src/pipeline/runner.ts +51 -77
- package/src/pipeline/stages/autofix.ts +133 -0
- package/src/pipeline/stages/completion.ts +22 -30
- package/src/pipeline/stages/index.ts +30 -13
- package/src/pipeline/stages/rectify.ts +93 -0
- package/src/pipeline/stages/regression.ts +88 -0
- package/src/pipeline/stages/review.ts +19 -153
- package/src/pipeline/stages/verify.ts +18 -2
- package/src/pipeline/subscribers/hooks.ts +133 -0
- package/src/pipeline/subscribers/interaction.ts +68 -0
- package/src/pipeline/subscribers/reporters.ts +174 -0
- package/src/pipeline/types.ts +10 -1
- package/src/review/orchestrator.ts +105 -0
- package/src/tdd/prompts.ts +1 -1
- package/src/verification/index.ts +1 -1
- package/src/verification/orchestrator-types.ts +145 -0
- package/src/verification/orchestrator.ts +76 -0
- package/src/{execution/post-verify-rectification.ts → verification/rectification-loop.ts} +13 -20
- package/src/verification/{gate.ts → runners.ts} +17 -105
- package/src/verification/strategies/acceptance.ts +133 -0
- package/src/verification/strategies/regression.ts +90 -0
- package/src/verification/strategies/scoped.ts +123 -0
- package/test/COVERAGE-GAPS.md +333 -0
- package/test/{acceptance → e2e}/cm-003-default-view.test.ts +1 -0
- package/test/{integration/e2e.test.ts → e2e/plan-analyze-run.test.ts} +1 -0
- package/test/integration/{agent-validation.test.ts → cli/agent-validation.test.ts} +3 -3
- package/test/integration/{cli-config-default-edge-cases.test.ts → cli/cli-config-default-edge-cases.test.ts} +6 -5
- package/test/integration/{cli-config-default-view.test.ts → cli/cli-config-default-view.test.ts} +8 -7
- package/test/integration/{cli-config-diff.test.ts → cli/cli-config-diff.test.ts} +3 -2
- package/test/integration/{cli-config.test.ts → cli/cli-config.test.ts} +3 -2
- package/test/integration/{cli-diagnose.test.ts → cli/cli-diagnose.test.ts} +5 -4
- package/test/integration/{cli-logs.test.ts → cli/cli-logs.test.ts} +12 -3
- package/test/integration/{cli-plugins.test.ts → cli/cli-plugins.test.ts} +4 -3
- package/test/integration/{cli-precheck.test.ts → cli/cli-precheck.test.ts} +4 -3
- package/test/integration/{cli-run-headless.test.ts → cli/cli-run-headless.test.ts} +3 -2
- package/test/integration/{cli.test.ts → cli/cli.test.ts} +2 -1
- package/test/integration/{precheck-integration.test.ts → cli/precheck-integration.test.ts} +10 -9
- package/test/integration/{precheck-orchestrator.test.ts → cli/precheck-orchestrator.test.ts} +4 -3
- package/test/integration/{precheck.test.ts → cli/precheck.test.ts} +5 -4
- package/test/integration/{config-loader.test.ts → config/config-loader.test.ts} +2 -1
- package/test/integration/{config.test.ts → config/config.test.ts} +2 -2
- package/test/integration/config/merger.test.ts +1 -0
- package/test/integration/config/paths.test.ts +1 -0
- package/test/integration/{security-loader.test.ts → config/security-loader.test.ts} +2 -2
- package/test/integration/{context-integration.test.ts → context/context-integration.test.ts} +7 -6
- package/test/integration/{path-security.test.ts → context/context-path-security.test.ts} +2 -2
- package/test/integration/{context-provider-injection.test.ts → context/context-provider-injection.test.ts} +7 -6
- package/test/integration/{context-verification-integration.test.ts → context/context-verification-integration.test.ts} +5 -4
- package/test/integration/{s5-greenfield-fallback.test.ts → context/s5-greenfield-fallback.test.ts} +4 -3
- package/test/integration/{isolation.test.ts → execution/execution-isolation.test.ts} +1 -1
- package/test/integration/{execution.test.ts → execution/execution.test.ts} +8 -8
- package/test/integration/{parallel.test.ts → execution/parallel.test.ts} +2 -1
- package/test/integration/{prd-pause.test.ts → execution/prd-pause.test.ts} +2 -2
- package/test/integration/{prd-resolvers.test.ts → execution/prd-resolvers.test.ts} +3 -2
- package/test/integration/{progress.test.ts → execution/progress.test.ts} +1 -1
- package/test/integration/execution/runner-batching.test.ts +682 -0
- package/test/integration/{runner-config-plugins.test.ts → execution/runner-config-plugins.test.ts} +3 -2
- package/test/integration/execution/runner-escalation.test.ts +561 -0
- package/test/integration/{runner-fixes.test.ts → execution/runner-fixes.test.ts} +4 -3
- package/test/integration/{runner-plugin-integration.test.ts → execution/runner-plugin-integration.test.ts} +6 -5
- package/test/integration/execution/runner-queue-and-attempts.test.ts +476 -0
- package/test/integration/{status-file-integration.test.ts → execution/status-file-integration.test.ts} +9 -8
- package/test/integration/{status-file.test.ts → execution/status-file.test.ts} +3 -2
- package/test/integration/{status-writer.test.ts → execution/status-writer.test.ts} +5 -4
- package/test/integration/{story-id-in-events.test.ts → execution/story-id-in-events.test.ts} +9 -8
- package/test/integration/{interaction-chain-pipeline.test.ts → interaction/interaction-chain-pipeline.test.ts} +26 -14
- package/test/integration/{hooks.test.ts → pipeline/hooks.test.ts} +4 -2
- package/test/integration/{pipeline-acceptance.test.ts → pipeline/pipeline-acceptance.test.ts} +7 -6
- package/test/integration/{pipeline-events.test.ts → pipeline/pipeline-events.test.ts} +7 -6
- package/test/integration/{pipeline.test.ts → pipeline/pipeline.test.ts} +9 -7
- package/test/integration/{reporter-lifecycle.test.ts → pipeline/reporter-lifecycle.test.ts} +9 -7
- package/test/integration/{verify-stage.test.ts → pipeline/verify-stage.test.ts} +7 -5
- package/test/integration/{analyze-integration.test.ts → plan/analyze-integration.test.ts} +3 -2
- package/test/integration/{analyze-scanner.test.ts → plan/analyze-scanner.test.ts} +8 -7
- package/test/integration/{logger.test.ts → plan/logger.test.ts} +1 -1
- package/test/integration/{plan.test.ts → plan/plan.test.ts} +3 -3
- package/test/integration/plugins/config-integration.test.ts +1 -0
- package/test/integration/plugins/config-resolution.test.ts +1 -0
- package/test/integration/plugins/loader.test.ts +1 -0
- package/test/integration/plugins/{registry.test.ts → plugins-registry.test.ts} +1 -0
- package/test/integration/plugins/validator.test.ts +1 -0
- package/test/integration/{review-config-commands.test.ts → review/review-config-commands.test.ts} +4 -3
- package/test/integration/{review-config-schema.test.ts → review/review-config-schema.test.ts} +3 -2
- package/test/integration/{review-plugin-integration.test.ts → review/review-plugin-integration.test.ts} +5 -4
- package/test/integration/{review.test.ts → review/review.test.ts} +3 -2
- package/test/integration/routing/plugin-routing-advanced.test.ts +461 -0
- package/test/integration/{plugin-routing.test.ts → routing/plugin-routing-core.test.ts} +9 -403
- package/test/integration/{routing-stage-bug-021.test.ts → routing/routing-stage-bug-021.test.ts} +8 -7
- package/test/integration/{routing-stage-greenfield.test.ts → routing/routing-stage-greenfield.test.ts} +7 -6
- package/test/integration/{tdd-cleanup.test.ts → tdd/tdd-cleanup.test.ts} +1 -1
- package/test/integration/tdd/tdd-orchestrator-core.test.ts +565 -0
- package/test/integration/tdd/tdd-orchestrator-failureCategory.test.ts +355 -0
- package/test/integration/tdd/tdd-orchestrator-fallback.test.ts +311 -0
- package/test/integration/tdd/tdd-orchestrator-lite.test.ts +289 -0
- package/test/integration/tdd/tdd-orchestrator-prompts.test.ts +260 -0
- package/test/integration/tdd/tdd-orchestrator-verdict.test.ts +536 -0
- package/test/integration/tmp/headless-test/test.jsonl +30 -0
- package/test/integration/{test-scanner.test.ts → verification/test-scanner.test.ts} +1 -1
- package/test/integration/{verification-asset-check.test.ts → verification/verification-asset-check.test.ts} +3 -2
- package/test/unit/acceptance.test.ts +1 -0
- package/test/unit/agent-stderr-capture.test.ts +1 -0
- package/test/unit/agents/claude.test.ts +1 -0
- package/test/unit/analyze-classifier.test.ts +1 -0
- package/test/unit/auto-detect.test.ts +1 -0
- package/test/unit/cli-status.test.ts +1 -0
- package/test/unit/commands/common.test.ts +1 -0
- package/test/unit/commands/logs.test.ts +1 -0
- package/test/unit/commands/unlock.test.ts +1 -0
- package/test/unit/config/defaults.test.ts +1 -0
- package/test/unit/config/regression-gate-schema.test.ts +1 -0
- package/test/unit/config/smart-runner-flag.test.ts +1 -0
- package/test/unit/constitution-generators.test.ts +1 -0
- package/test/unit/constitution.test.ts +1 -0
- package/test/unit/context/context-autodetect.test.ts +297 -0
- package/test/unit/context/context-build.test.ts +575 -0
- package/test/unit/context/context-coverage.test.ts +236 -0
- package/test/unit/context/context-error.test.ts +93 -0
- package/test/unit/context/context-estimate-tokens.test.ts +201 -0
- package/test/unit/context/context-format.test.ts +302 -0
- package/test/unit/context/context-isolation.test.ts +267 -0
- package/test/unit/context/context-sort.test.ts +93 -0
- package/test/unit/context/context-story.test.ts +108 -0
- package/test/{context → unit/context}/prior-failures.test.ts +5 -4
- package/test/unit/context.test.ts +1 -0
- package/test/unit/crash-recovery.test.ts +1 -0
- package/test/unit/escalation.test.ts +1 -0
- package/test/unit/execution/lifecycle/run-completion.test.ts +1 -0
- package/test/unit/execution/lifecycle/run-regression.test.ts +2 -0
- package/test/{execution → unit/execution}/pid-registry.test.ts +2 -1
- package/test/{execution → unit/execution}/structured-failure.test.ts +3 -2
- package/test/unit/execution-logging-stderr.test.ts +1 -0
- package/test/unit/execution-stage.test.ts +1 -0
- package/test/unit/fix-generator.test.ts +1 -0
- package/test/unit/greenfield.test.ts +1 -0
- package/test/unit/interaction/human-review-trigger.test.ts +1 -0
- package/test/unit/interaction-network-failures.test.ts +1 -0
- package/test/unit/interaction-plugins.test.ts +1 -0
- package/test/unit/logging/formatter.test.ts +1 -0
- package/test/unit/merge.test.ts +1 -0
- package/test/unit/pipeline/event-bus.test.ts +105 -0
- package/test/unit/pipeline/routing-partial-override.test.ts +1 -0
- package/test/unit/pipeline/runner-retry.test.ts +89 -0
- package/test/unit/pipeline/stages/autofix.test.ts +97 -0
- package/test/unit/pipeline/stages/rectify.test.ts +101 -0
- package/test/unit/pipeline/stages/regression-stage.test.ts +69 -0
- package/test/unit/pipeline/stages/verify.test.ts +1 -0
- package/test/unit/pipeline/subscribers/hooks.test.ts +45 -0
- package/test/unit/pipeline/subscribers/interaction.test.ts +31 -0
- package/test/unit/pipeline/subscribers/reporters.test.ts +90 -0
- package/test/unit/pipeline/verify-smart-runner.test.ts +1 -0
- package/test/unit/prd-auto-default.test.ts +1 -0
- package/test/unit/prd-failure-category.test.ts +1 -0
- package/test/unit/prd-get-next-story.test.ts +1 -0
- package/test/unit/precheck-checks.test.ts +1 -0
- package/test/unit/precheck-story-size-gate.test.ts +1 -0
- package/test/unit/precheck-types.test.ts +1 -0
- package/test/unit/prompts.test.ts +1 -0
- package/test/unit/rectification.test.ts +2 -1
- package/test/unit/registry.test.ts +1 -0
- package/test/unit/routing/routing-stability.test.ts +1 -0
- package/test/unit/routing/strategies/llm.test.ts +1 -0
- package/test/unit/routing-advanced.test.ts +313 -0
- package/test/unit/routing-core.test.ts +341 -0
- package/test/unit/routing-strategies.test.ts +442 -0
- package/test/unit/storyid-events.test.ts +1 -0
- package/test/{ui → unit/ui}/tui-controls.test.ts +8 -7
- package/test/{ui → unit/ui}/tui-cost-and-pty.test.ts +4 -3
- package/test/{ui → unit/ui}/tui-layout.test.ts +5 -4
- package/test/{ui → unit/ui}/tui-stories.test.ts +5 -4
- package/test/unit/{isolation.test.ts → unit-isolation.test.ts} +1 -0
- package/test/unit/{helpers.test.ts → utils-helpers.test.ts} +1 -0
- package/test/unit/verdict.test.ts +1 -0
- package/test/unit/verification/orchestrator-types.test.ts +54 -0
- package/test/unit/verification/orchestrator.test.ts +66 -0
- package/test/unit/verification/smart-runner-config.test.ts +1 -0
- package/test/unit/verification/smart-runner-discovery.test.ts +8 -7
- package/test/unit/verification/strategies/acceptance.test.ts +33 -0
- package/test/unit/verification/strategies/regression.test.ts +87 -0
- package/test/unit/verification/strategies/scoped.test.ts +100 -0
- package/test/unit/worktree-manager.test.ts +1 -0
- package/src/execution/lifecycle/story-hooks.ts +0 -38
- package/src/execution/post-verify.ts +0 -193
- package/src/execution/rectification.ts +0 -13
- package/src/execution/verification.ts +0 -72
- package/test/integration/rectification-flow.test.ts +0 -512
- package/test/integration/runner.test.ts +0 -1679
- package/test/integration/tdd-orchestrator.test.ts +0 -1762
- package/test/unit/execution/post-verify-regression.test.ts +0 -362
- package/test/unit/execution/post-verify.test.ts +0 -236
- package/test/unit/routing.test.ts +0 -1039
- /package/test/{integration → helpers}/helpers.test.ts +0 -0
- /package/test/integration/worktree/{merge.test.ts → worktree-merge.test.ts} +0 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// RE-ARCH: keep
|
|
2
|
+
import { describe, expect, test } from "bun:test";
|
|
3
|
+
import { regressionStage, _regressionStageDeps } from "../../../../src/pipeline/stages/regression";
|
|
4
|
+
import { makePassResult, makeFailResult } from "../../../../src/verification/orchestrator-types";
|
|
5
|
+
import type { PipelineContext } from "../../../../src/pipeline/types";
|
|
6
|
+
import { DEFAULT_CONFIG } from "../../../../src/config";
|
|
7
|
+
|
|
8
|
+
function makeCtx(mode: "deferred" | "per-story" | "disabled" = "per-story"): PipelineContext {
|
|
9
|
+
return {
|
|
10
|
+
config: {
|
|
11
|
+
...DEFAULT_CONFIG,
|
|
12
|
+
execution: {
|
|
13
|
+
...DEFAULT_CONFIG.execution,
|
|
14
|
+
regressionGate: { enabled: true, mode, timeoutSeconds: 60, acceptOnTimeout: true },
|
|
15
|
+
},
|
|
16
|
+
quality: { ...DEFAULT_CONFIG.quality, commands: { test: "bun test" } },
|
|
17
|
+
} as any,
|
|
18
|
+
prd: { stories: [] } as any,
|
|
19
|
+
story: { id: "US-001", title: "t", status: "in-progress", acceptanceCriteria: [] } as any,
|
|
20
|
+
stories: [],
|
|
21
|
+
routing: { complexity: "simple", modelTier: "fast", testStrategy: "test-after", reasoning: "" },
|
|
22
|
+
workdir: "/tmp",
|
|
23
|
+
hooks: {},
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
describe("regressionStage", () => {
|
|
28
|
+
test("disabled when mode is deferred", () => {
|
|
29
|
+
expect(regressionStage.enabled(makeCtx("deferred"))).toBe(false);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test("disabled when mode is disabled", () => {
|
|
33
|
+
expect(regressionStage.enabled(makeCtx("disabled"))).toBe(false);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test("enabled when mode is per-story", () => {
|
|
37
|
+
expect(regressionStage.enabled(makeCtx("per-story"))).toBe(true);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test("disabled when verifyResult is a failure", () => {
|
|
41
|
+
const ctx = makeCtx("per-story");
|
|
42
|
+
ctx.verifyResult = makeFailResult("US-001", "scoped", "TEST_FAILURE");
|
|
43
|
+
expect(regressionStage.enabled(ctx)).toBe(false);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("returns continue when regression passes", async () => {
|
|
47
|
+
const saved = { ..._regressionStageDeps };
|
|
48
|
+
_regressionStageDeps.verifyRegression = async () => makePassResult("US-001", "regression");
|
|
49
|
+
|
|
50
|
+
const result = await regressionStage.execute(makeCtx("per-story"));
|
|
51
|
+
|
|
52
|
+
Object.assign(_regressionStageDeps, saved);
|
|
53
|
+
|
|
54
|
+
expect(result.action).toBe("continue");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test("returns escalate when regression fails", async () => {
|
|
58
|
+
const saved = { ..._regressionStageDeps };
|
|
59
|
+
_regressionStageDeps.verifyRegression = async () =>
|
|
60
|
+
makeFailResult("US-001", "regression", "TEST_FAILURE", { failCount: 3 });
|
|
61
|
+
|
|
62
|
+
const result = await regressionStage.execute(makeCtx("per-story"));
|
|
63
|
+
|
|
64
|
+
Object.assign(_regressionStageDeps, saved);
|
|
65
|
+
|
|
66
|
+
expect(result.action).toBe("escalate");
|
|
67
|
+
if (result.action === "escalate") expect(result.reason).toContain("3 test");
|
|
68
|
+
});
|
|
69
|
+
});
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
// RE-ARCH: keep
|
|
2
|
+
import { describe, expect, test, mock } from "bun:test";
|
|
3
|
+
import { wireHooks } from "../../../../src/pipeline/subscribers/hooks";
|
|
4
|
+
import { PipelineEventBus } from "../../../../src/pipeline/event-bus";
|
|
5
|
+
import type { LoadedHooksConfig } from "../../../../src/hooks";
|
|
6
|
+
|
|
7
|
+
const EMPTY_HOOKS: LoadedHooksConfig = {};
|
|
8
|
+
|
|
9
|
+
describe("wireHooks", () => {
|
|
10
|
+
test("subscribes to all lifecycle events", () => {
|
|
11
|
+
const bus = new PipelineEventBus();
|
|
12
|
+
wireHooks(bus, EMPTY_HOOKS, "/tmp", "test-feature");
|
|
13
|
+
|
|
14
|
+
// Check subscriptions are registered
|
|
15
|
+
const events = ["run:started", "story:started", "story:completed", "story:failed", "story:paused", "run:paused", "run:completed"] as const;
|
|
16
|
+
for (const ev of events) {
|
|
17
|
+
expect(bus.subscriberCount(ev)).toBe(1);
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
test("returns unsubscribe function that removes all subscriptions", () => {
|
|
22
|
+
const bus = new PipelineEventBus();
|
|
23
|
+
const unsub = wireHooks(bus, EMPTY_HOOKS, "/tmp", "test-feature");
|
|
24
|
+
|
|
25
|
+
unsub();
|
|
26
|
+
|
|
27
|
+
const events = ["run:started", "story:started", "story:completed"] as const;
|
|
28
|
+
for (const ev of events) {
|
|
29
|
+
expect(bus.subscriberCount(ev)).toBe(0);
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test("errors in hooks don't propagate to callers", async () => {
|
|
34
|
+
const bus = new PipelineEventBus();
|
|
35
|
+
const badHooks: LoadedHooksConfig = {
|
|
36
|
+
"on-story-complete": { command: "exit 1", timeout: 1 } as any,
|
|
37
|
+
};
|
|
38
|
+
wireHooks(bus, badHooks, "/tmp", "test-feature");
|
|
39
|
+
|
|
40
|
+
// Should not throw
|
|
41
|
+
expect(() =>
|
|
42
|
+
bus.emit({ type: "story:completed", storyId: "US-001", story: { id: "US-001" } as any, passed: true, durationMs: 100 }),
|
|
43
|
+
).not.toThrow();
|
|
44
|
+
});
|
|
45
|
+
});
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// RE-ARCH: keep
|
|
2
|
+
import { describe, expect, test } from "bun:test";
|
|
3
|
+
import { wireInteraction } from "../../../../src/pipeline/subscribers/interaction";
|
|
4
|
+
import { PipelineEventBus } from "../../../../src/pipeline/event-bus";
|
|
5
|
+
import { DEFAULT_CONFIG } from "../../../../src/config";
|
|
6
|
+
|
|
7
|
+
describe("wireInteraction", () => {
|
|
8
|
+
test("no subscriptions when interactionChain is null", () => {
|
|
9
|
+
const bus = new PipelineEventBus();
|
|
10
|
+
wireInteraction(bus, null, DEFAULT_CONFIG);
|
|
11
|
+
expect(bus.subscriberCount("human-review:requested")).toBe(0);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
test("no subscriptions when human-review trigger is disabled", () => {
|
|
15
|
+
const bus = new PipelineEventBus();
|
|
16
|
+
const config = {
|
|
17
|
+
...DEFAULT_CONFIG,
|
|
18
|
+
interaction: { ...DEFAULT_CONFIG.interaction, triggers: { "human-review": { enabled: false } } },
|
|
19
|
+
} as any;
|
|
20
|
+
const chain = {} as any;
|
|
21
|
+
wireInteraction(bus, chain, config);
|
|
22
|
+
expect(bus.subscriberCount("human-review:requested")).toBe(0);
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
test("returns unsubscribe function", () => {
|
|
26
|
+
const bus = new PipelineEventBus();
|
|
27
|
+
const unsub = wireInteraction(bus, null, DEFAULT_CONFIG);
|
|
28
|
+
expect(typeof unsub).toBe("function");
|
|
29
|
+
unsub(); // should not throw
|
|
30
|
+
});
|
|
31
|
+
});
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
// RE-ARCH: keep
|
|
2
|
+
import { describe, expect, test } from "bun:test";
|
|
3
|
+
import { wireReporters } from "../../../../src/pipeline/subscribers/reporters";
|
|
4
|
+
import { PipelineEventBus } from "../../../../src/pipeline/event-bus";
|
|
5
|
+
import type { IReporter } from "../../../../src/plugins/types";
|
|
6
|
+
|
|
7
|
+
function makeReporter(): IReporter & { calls: string[] } {
|
|
8
|
+
const calls: string[] = [];
|
|
9
|
+
return {
|
|
10
|
+
name: "test-reporter",
|
|
11
|
+
calls,
|
|
12
|
+
async onRunStart() { calls.push("onRunStart"); },
|
|
13
|
+
async onStoryComplete(ev) { calls.push(`onStoryComplete:${ev.status}`); },
|
|
14
|
+
async onRunEnd() { calls.push("onRunEnd"); },
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function makeRegistry(reporter: IReporter) {
|
|
19
|
+
return { getReporters: () => [reporter] } as any;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
describe("wireReporters", () => {
|
|
23
|
+
test("run:started fires onRunStart", async () => {
|
|
24
|
+
const bus = new PipelineEventBus();
|
|
25
|
+
const reporter = makeReporter();
|
|
26
|
+
wireReporters(bus, makeRegistry(reporter), "run-1", Date.now());
|
|
27
|
+
|
|
28
|
+
bus.emit({ type: "run:started", feature: "test", totalStories: 5, workdir: "/tmp" });
|
|
29
|
+
|
|
30
|
+
await Bun.sleep(10); // let fire-and-forget settle
|
|
31
|
+
expect(reporter.calls).toContain("onRunStart");
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
test("story:completed fires onStoryComplete(completed)", async () => {
|
|
35
|
+
const bus = new PipelineEventBus();
|
|
36
|
+
const reporter = makeReporter();
|
|
37
|
+
wireReporters(bus, makeRegistry(reporter), "run-1", Date.now());
|
|
38
|
+
|
|
39
|
+
bus.emit({ type: "story:completed", storyId: "US-001", story: { id: "US-001" } as any, passed: true, durationMs: 100 });
|
|
40
|
+
|
|
41
|
+
await Bun.sleep(10);
|
|
42
|
+
expect(reporter.calls).toContain("onStoryComplete:completed");
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test("story:failed fires onStoryComplete(failed)", async () => {
|
|
46
|
+
const bus = new PipelineEventBus();
|
|
47
|
+
const reporter = makeReporter();
|
|
48
|
+
wireReporters(bus, makeRegistry(reporter), "run-1", Date.now());
|
|
49
|
+
|
|
50
|
+
bus.emit({ type: "story:failed", storyId: "US-001", story: { id: "US-001" } as any, reason: "tests failed", countsTowardEscalation: true });
|
|
51
|
+
|
|
52
|
+
await Bun.sleep(10);
|
|
53
|
+
expect(reporter.calls).toContain("onStoryComplete:failed");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test("story:paused fires onStoryComplete(paused)", async () => {
|
|
57
|
+
const bus = new PipelineEventBus();
|
|
58
|
+
const reporter = makeReporter();
|
|
59
|
+
wireReporters(bus, makeRegistry(reporter), "run-1", Date.now());
|
|
60
|
+
|
|
61
|
+
bus.emit({ type: "story:paused", storyId: "US-001", reason: "needs review", cost: 0.5 });
|
|
62
|
+
|
|
63
|
+
await Bun.sleep(10);
|
|
64
|
+
expect(reporter.calls).toContain("onStoryComplete:paused");
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test("run:completed fires onRunEnd", async () => {
|
|
68
|
+
const bus = new PipelineEventBus();
|
|
69
|
+
const reporter = makeReporter();
|
|
70
|
+
wireReporters(bus, makeRegistry(reporter), "run-1", Date.now());
|
|
71
|
+
|
|
72
|
+
bus.emit({ type: "run:completed", totalStories: 5, passedStories: 4, failedStories: 1, durationMs: 60000 });
|
|
73
|
+
|
|
74
|
+
await Bun.sleep(10);
|
|
75
|
+
expect(reporter.calls).toContain("onRunEnd");
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test("reporter errors don't propagate", async () => {
|
|
79
|
+
const bus = new PipelineEventBus();
|
|
80
|
+
const badReporter: IReporter = {
|
|
81
|
+
name: "bad",
|
|
82
|
+
async onStoryComplete() { throw new Error("reporter crash"); },
|
|
83
|
+
};
|
|
84
|
+
wireReporters(bus, makeRegistry(badReporter), "run-1", Date.now());
|
|
85
|
+
|
|
86
|
+
expect(() =>
|
|
87
|
+
bus.emit({ type: "story:completed", storyId: "US-001", story: { id: "US-001" } as any, passed: true, durationMs: 100 }),
|
|
88
|
+
).not.toThrow();
|
|
89
|
+
});
|
|
90
|
+
});
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
// RE-ARCH: keep
|
|
1
2
|
/**
|
|
2
3
|
* Unit tests for rectification core logic (v0.11)
|
|
3
4
|
*/
|
|
@@ -8,7 +9,7 @@ import {
|
|
|
8
9
|
type RectificationState,
|
|
9
10
|
createRectificationPrompt,
|
|
10
11
|
shouldRetryRectification,
|
|
11
|
-
} from "../../src/
|
|
12
|
+
} from "../../src/verification/rectification";
|
|
12
13
|
import type { TestFailure } from "../../src/execution/test-output-parser";
|
|
13
14
|
import type { UserStory } from "../../src/prd";
|
|
14
15
|
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
// RE-ARCH: keep
|
|
2
|
+
/**
|
|
3
|
+
* Routing Tests
|
|
4
|
+
*
|
|
5
|
+
* Consolidated test suite for routing system including:
|
|
6
|
+
* - Core routing logic (classifyComplexity, determineTestStrategy, routeTask)
|
|
7
|
+
* - Routing strategies (keyword, llm, manual, adaptive)
|
|
8
|
+
* - Strategy chain execution
|
|
9
|
+
* - Async support and chain delegation
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { beforeEach, describe, expect, mock, spyOn, test } from "bun:test";
|
|
13
|
+
import { DEFAULT_CONFIG } from "../../src/config";
|
|
14
|
+
import type { NaxConfig } from "../../src/config";
|
|
15
|
+
import { escalateTier } from "../../src/execution/runner";
|
|
16
|
+
import type { AggregateMetrics } from "../../src/metrics/types";
|
|
17
|
+
import type { UserStory } from "../../src/prd/types";
|
|
18
|
+
import { classifyComplexity, determineTestStrategy, routeTask } from "../../src/routing";
|
|
19
|
+
import { buildStrategyChain } from "../../src/routing/builder";
|
|
20
|
+
import { StrategyChain } from "../../src/routing/chain";
|
|
21
|
+
import { keywordStrategy, llmStrategy, manualStrategy } from "../../src/routing/strategies";
|
|
22
|
+
import { adaptiveStrategy } from "../../src/routing/strategies/adaptive";
|
|
23
|
+
import {
|
|
24
|
+
buildBatchPrompt,
|
|
25
|
+
buildRoutingPrompt,
|
|
26
|
+
clearCache,
|
|
27
|
+
clearCacheForStory,
|
|
28
|
+
getCacheSize,
|
|
29
|
+
llmStrategy as llmStrategyFull,
|
|
30
|
+
parseRoutingResponse,
|
|
31
|
+
routeBatch,
|
|
32
|
+
stripCodeFences,
|
|
33
|
+
validateRoutingDecision,
|
|
34
|
+
} from "../../src/routing/strategies/llm";
|
|
35
|
+
import type { RoutingContext, RoutingDecision, RoutingStrategy } from "../../src/routing/strategy";
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
function createStory(
|
|
39
|
+
id: string,
|
|
40
|
+
title: string,
|
|
41
|
+
description: string,
|
|
42
|
+
acceptanceCriteria: string[] = [],
|
|
43
|
+
tags: string[] = [],
|
|
44
|
+
): UserStory {
|
|
45
|
+
return {
|
|
46
|
+
id,
|
|
47
|
+
title,
|
|
48
|
+
description,
|
|
49
|
+
acceptanceCriteria,
|
|
50
|
+
tags,
|
|
51
|
+
status: "pending",
|
|
52
|
+
dependencies: [],
|
|
53
|
+
passes: false,
|
|
54
|
+
escalations: [],
|
|
55
|
+
attempts: 0,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function createContext(metrics?: AggregateMetrics, config: NaxConfig = DEFAULT_CONFIG): RoutingContext {
|
|
60
|
+
return {
|
|
61
|
+
config,
|
|
62
|
+
metrics,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function createMockMetrics(
|
|
67
|
+
complexityData: Record<string, { predicted: number; actualTierUsed: string; mismatchRate: number }>,
|
|
68
|
+
): AggregateMetrics {
|
|
69
|
+
return {
|
|
70
|
+
totalRuns: 10,
|
|
71
|
+
totalCost: 5.0,
|
|
72
|
+
totalStories: 100,
|
|
73
|
+
firstPassRate: 0.75,
|
|
74
|
+
escalationRate: 0.25,
|
|
75
|
+
avgCostPerStory: 0.05,
|
|
76
|
+
avgCostPerFeature: 0.5,
|
|
77
|
+
modelEfficiency: {
|
|
78
|
+
"claude-haiku-4-5": {
|
|
79
|
+
attempts: 60,
|
|
80
|
+
successes: 50,
|
|
81
|
+
passRate: 0.833,
|
|
82
|
+
avgCost: 0.005,
|
|
83
|
+
totalCost: 0.25,
|
|
84
|
+
},
|
|
85
|
+
"claude-sonnet-4.5": {
|
|
86
|
+
attempts: 30,
|
|
87
|
+
successes: 28,
|
|
88
|
+
passRate: 0.933,
|
|
89
|
+
avgCost: 0.02,
|
|
90
|
+
totalCost: 0.56,
|
|
91
|
+
},
|
|
92
|
+
"claude-opus-4-6": {
|
|
93
|
+
attempts: 10,
|
|
94
|
+
successes: 10,
|
|
95
|
+
passRate: 1.0,
|
|
96
|
+
avgCost: 0.08,
|
|
97
|
+
totalCost: 0.8,
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
complexityAccuracy: complexityData,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
describe("Adaptive Routing Strategy", () => {
|
|
105
|
+
describe("No metrics available", () => {
|
|
106
|
+
test("should fallback to configured strategy when no metrics", async () => {
|
|
107
|
+
const story = createStory("US-001", "Add user login", "Implement user authentication", [
|
|
108
|
+
"User can log in with email and password",
|
|
109
|
+
]);
|
|
110
|
+
|
|
111
|
+
const context = createContext(undefined);
|
|
112
|
+
const decision = await adaptiveStrategy.route(story, context);
|
|
113
|
+
|
|
114
|
+
expect(decision).not.toBeNull();
|
|
115
|
+
expect(decision?.reasoning).toContain("no metrics available");
|
|
116
|
+
expect(decision?.reasoning).toContain("fallback to");
|
|
117
|
+
});
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
describe("Insufficient data fallback", () => {
|
|
121
|
+
test("should fallback when samples below minSamples threshold", async () => {
|
|
122
|
+
const metrics = createMockMetrics({
|
|
123
|
+
simple: {
|
|
124
|
+
predicted: 5,
|
|
125
|
+
actualTierUsed: "fast",
|
|
126
|
+
mismatchRate: 0.2,
|
|
127
|
+
},
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const story = createStory("US-002", "Fix typo", "Fix typo in README", ["Typo is fixed"]);
|
|
131
|
+
|
|
132
|
+
const context = createContext(metrics);
|
|
133
|
+
const decision = await adaptiveStrategy.route(story, context);
|
|
134
|
+
|
|
135
|
+
expect(decision).not.toBeNull();
|
|
136
|
+
expect(decision?.reasoning).toContain("insufficient data");
|
|
137
|
+
expect(decision?.reasoning).toContain("5/10 samples");
|
|
138
|
+
expect(decision?.reasoning).toContain("fallback to");
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
describe("Sufficient data - adaptive routing", () => {
|
|
143
|
+
test("should route to fast tier when low mismatch rate", async () => {
|
|
144
|
+
const metrics = createMockMetrics({
|
|
145
|
+
simple: {
|
|
146
|
+
predicted: 50,
|
|
147
|
+
actualTierUsed: "fast",
|
|
148
|
+
mismatchRate: 0.1,
|
|
149
|
+
},
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
const story = createStory("US-004", "Add button", "Add a submit button to the form", [
|
|
153
|
+
"Button is visible",
|
|
154
|
+
"Button triggers submit",
|
|
155
|
+
]);
|
|
156
|
+
|
|
157
|
+
const context = createContext(metrics);
|
|
158
|
+
const decision = await adaptiveStrategy.route(story, context);
|
|
159
|
+
|
|
160
|
+
expect(decision).not.toBeNull();
|
|
161
|
+
expect(decision?.complexity).toBe("simple");
|
|
162
|
+
expect(decision?.modelTier).toBe("fast");
|
|
163
|
+
expect(decision?.reasoning).toContain("adaptive");
|
|
164
|
+
expect(decision?.reasoning).toContain("simple → fast");
|
|
165
|
+
expect(decision?.reasoning).toContain("samples: 50");
|
|
166
|
+
expect(decision?.reasoning).toContain("mismatch: 10.0%");
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
test("should include cost information in reasoning", async () => {
|
|
170
|
+
const metrics = createMockMetrics({
|
|
171
|
+
complex: {
|
|
172
|
+
predicted: 15,
|
|
173
|
+
actualTierUsed: "powerful",
|
|
174
|
+
mismatchRate: 0.2,
|
|
175
|
+
},
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
const story = createStory(
|
|
179
|
+
"US-006",
|
|
180
|
+
"Refactor authentication",
|
|
181
|
+
"Refactor the auth module to use JWT",
|
|
182
|
+
Array.from({ length: 10 }, (_, i) => `Criterion ${i + 1}`),
|
|
183
|
+
["security", "breaking-change"],
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
const context = createContext(metrics);
|
|
187
|
+
const decision = await adaptiveStrategy.route(story, context);
|
|
188
|
+
|
|
189
|
+
expect(decision).not.toBeNull();
|
|
190
|
+
expect(decision?.reasoning).toContain("cost:");
|
|
191
|
+
expect(decision?.reasoning).toMatch(/\$\d+\.\d{4}/);
|
|
192
|
+
});
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
describe("Edge cases", () => {
|
|
196
|
+
test("should handle zero mismatch rate gracefully", async () => {
|
|
197
|
+
const metrics = createMockMetrics({
|
|
198
|
+
simple: {
|
|
199
|
+
predicted: 100,
|
|
200
|
+
actualTierUsed: "fast",
|
|
201
|
+
mismatchRate: 0.0,
|
|
202
|
+
},
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
const story = createStory("US-014", "Add text", "Add help text", ["Text added"]);
|
|
206
|
+
const context = createContext(metrics);
|
|
207
|
+
const decision = await adaptiveStrategy.route(story, context);
|
|
208
|
+
|
|
209
|
+
expect(decision).not.toBeNull();
|
|
210
|
+
expect(decision?.modelTier).toBe("fast");
|
|
211
|
+
});
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
// ============================================================================
|
|
216
|
+
// LLM Cache Clearing Tests (BUG-028 fix)
|
|
217
|
+
// ============================================================================
|
|
218
|
+
|
|
219
|
+
describe("LLM Cache Clearing on Tier Escalation", () => {
|
|
220
|
+
beforeEach(() => {
|
|
221
|
+
// Clear cache before each test
|
|
222
|
+
clearCache();
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
test("cache hit returns cached decision", () => {
|
|
226
|
+
const story: UserStory = {
|
|
227
|
+
id: "US-cache-001",
|
|
228
|
+
title: "Test story",
|
|
229
|
+
description: "Test story for cache",
|
|
230
|
+
acceptanceCriteria: ["AC1"],
|
|
231
|
+
tags: [],
|
|
232
|
+
dependencies: [],
|
|
233
|
+
status: "pending",
|
|
234
|
+
passes: false,
|
|
235
|
+
escalations: [],
|
|
236
|
+
attempts: 0,
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
const originalDecision: RoutingDecision = {
|
|
240
|
+
complexity: "simple",
|
|
241
|
+
modelTier: "fast",
|
|
242
|
+
testStrategy: "test-after",
|
|
243
|
+
reasoning: "Original decision",
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
const configWithoutLlm = { ...DEFAULT_CONFIG, routing: { ...DEFAULT_CONFIG.routing, llm: undefined } };
|
|
247
|
+
const context: RoutingContext = { config: configWithoutLlm };
|
|
248
|
+
|
|
249
|
+
// Simulate cached decision
|
|
250
|
+
const cachedDecisions = new Map<string, RoutingDecision>();
|
|
251
|
+
cachedDecisions.set(story.id, originalDecision);
|
|
252
|
+
|
|
253
|
+
// Verify initial cache state
|
|
254
|
+
expect(getCacheSize()).toBe(0);
|
|
255
|
+
|
|
256
|
+
// Note: We're testing the behavior through the exported functions
|
|
257
|
+
// In a real scenario, the LLM strategy would populate the cache
|
|
258
|
+
// For this test, we verify the cache clearing mechanism works
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
test("clearCacheForStory removes cache entry", () => {
|
|
262
|
+
const storyId = "US-cache-002";
|
|
263
|
+
|
|
264
|
+
// Clear cache first
|
|
265
|
+
clearCache();
|
|
266
|
+
expect(getCacheSize()).toBe(0);
|
|
267
|
+
|
|
268
|
+
// Clear non-existent entry should not throw
|
|
269
|
+
clearCacheForStory(storyId);
|
|
270
|
+
expect(getCacheSize()).toBe(0);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
test("clearCacheForStory after tier escalation forces re-routing", () => {
|
|
274
|
+
const storyId = "US-cache-003";
|
|
275
|
+
|
|
276
|
+
// Clear all caches
|
|
277
|
+
clearCache();
|
|
278
|
+
expect(getCacheSize()).toBe(0);
|
|
279
|
+
|
|
280
|
+
// Simulate clearing for escalation
|
|
281
|
+
clearCacheForStory(storyId);
|
|
282
|
+
|
|
283
|
+
// Cache should still be empty
|
|
284
|
+
expect(getCacheSize()).toBe(0);
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
test("clearing one story does not affect other cached stories", () => {
|
|
288
|
+
clearCache();
|
|
289
|
+
|
|
290
|
+
const story1Id = "US-escalate-1";
|
|
291
|
+
const story2Id = "US-escalate-2";
|
|
292
|
+
|
|
293
|
+
// Verify we can clear individual stories
|
|
294
|
+
clearCacheForStory(story1Id);
|
|
295
|
+
clearCacheForStory(story2Id);
|
|
296
|
+
|
|
297
|
+
expect(getCacheSize()).toBe(0);
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
test("clearCacheForStory is idempotent", () => {
|
|
301
|
+
const storyId = "US-idempotent";
|
|
302
|
+
|
|
303
|
+
clearCache();
|
|
304
|
+
expect(getCacheSize()).toBe(0);
|
|
305
|
+
|
|
306
|
+
// Clear multiple times should be safe
|
|
307
|
+
clearCacheForStory(storyId);
|
|
308
|
+
clearCacheForStory(storyId);
|
|
309
|
+
clearCacheForStory(storyId);
|
|
310
|
+
|
|
311
|
+
expect(getCacheSize()).toBe(0);
|
|
312
|
+
});
|
|
313
|
+
});
|