npm - @nathapp/nax - Versions diffs - 0.25.0 → 0.27.0 - Mend

@nathapp/nax 0.25.0 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/.gitlab-ci.yml +1 -0
package/CLAUDE.md +70 -56
package/docs/ROADMAP.md +38 -8
package/nax/features/review-quality/prd.json +55 -0
package/nax/features/routing-persistence/prd.json +104 -0
package/nax/features/routing-persistence/progress.txt +1 -0
package/nax/status.json +22 -13
package/package.json +1 -1
package/src/execution/iteration-runner.ts +1 -0
package/src/metrics/aggregator.ts +2 -1
package/src/metrics/tracker.ts +26 -14
package/src/metrics/types.ts +2 -0
package/src/optimizer/index.ts +2 -1
package/src/pipeline/stages/routing.ts +42 -8
package/src/pipeline/types.ts +2 -0
package/src/prd/types.ts +4 -0
package/src/review/runner.ts +50 -1
package/src/routing/content-hash.ts +25 -0
package/src/routing/index.ts +3 -0
package/src/version.ts +20 -4
package/test/integration/review/review-plugin-integration.test.ts +12 -7
package/test/unit/metrics/aggregator.test.ts +164 -0
package/test/unit/metrics/tracker.test.ts +186 -0
package/test/unit/pipeline/stages/routing-idempotence.test.ts +139 -0
package/test/unit/pipeline/stages/routing-initial-complexity.test.ts +321 -0
package/test/unit/pipeline/stages/routing-persistence.test.ts +380 -0
package/test/unit/review/runner.test.ts +117 -0
package/test/unit/routing/content-hash.test.ts +99 -0

package/src/pipeline/stages/routing.ts CHANGED Viewed

@@ -2,15 +2,18 @@
  * Routing Stage
  *
  * Classifies story complexity and determines model tier + test strategy.
- * Uses cached complexity/testStrategy/modelTier from story if available.
+ * Uses cached complexity/testStrategy/modelTier from story if contentHash matches.
  * modelTier: uses escalated tier if explicitly set (BUG-032), otherwise derives from config.
  *
+ * RRP-003: contentHash staleness detection — if story.routing.contentHash is missing or
+ * does not match the current story content, treats cached routing as a miss and re-classifies.
+ *
  * @returns
  * - `continue`: Routing determined, proceed to next stage
  *
  * @example
  * ```ts
- * // Story has cached routing with complexity
+ * // Story has cached routing with matching contentHash
  * await routingStage.execute(ctx);
  * // ctx.routing: { complexity: "simple", modelTier: "fast", testStrategy: "test-after", reasoning: "..." }
  * // modelTier is derived from current config.autoMode.complexityRouting
@@ -19,7 +22,8 @@
 import { isGreenfieldStory } from "../../context/greenfield";
 import { getLogger } from "../../logger";
-import { complexityToModelTier, routeStory } from "../../routing";
+import { savePRD } from "../../prd";
+import { complexityToModelTier, computeStoryContentHash, routeStory } from "../../routing";
 import { clearCache, routeBatch } from "../../routing/strategies/llm";
 import type { PipelineContext, PipelineStage, RoutingResult, StageResult } from "../types";
@@ -30,11 +34,25 @@ export const routingStage: PipelineStage = {
   async execute(ctx: PipelineContext): Promise<StageResult> {
     const logger = getLogger();
-    // If story has cached routing, use cached values (escalated modelTier takes priority)
-    // Otherwise, perform fresh classification
+    // Staleness detection (RRP-003):
+    // - story.routing absent                   → cache miss (no prior routing)
+    // - story.routing + no contentHash         → legacy cache hit (manual / pre-RRP-003 routing, honor as-is)
+    // - story.routing + contentHash matches    → cache hit
+    // - story.routing + contentHash mismatches → cache miss (stale, re-classify)
+    const hasExistingRouting = ctx.story.routing !== undefined;
+    const hasContentHash = ctx.story.routing?.contentHash !== undefined;
+    let currentHash: string | undefined;
+    let hashMatch = false;
+    if (hasContentHash) {
+      currentHash = _routingDeps.computeStoryContentHash(ctx.story);
+      hashMatch = ctx.story.routing?.contentHash === currentHash;
+    }
+    const isCacheHit = hasExistingRouting && (!hasContentHash || hashMatch);
     let routing: { complexity: string; testStrategy: string; modelTier: string; reasoning?: string };
-    if (ctx.story.routing) {
-      // Use cached complexity/testStrategy/modelTier
+    if (isCacheHit) {
+      // Cache hit: legacy routing (no contentHash) or matching contentHash — use cached values
       routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
       // Override with cached values only when they are actually set
       if (ctx.story.routing?.complexity) routing.complexity = ctx.story.routing.complexity;
@@ -50,8 +68,22 @@ export const routingStage: PipelineStage = {
         );
       }
     } else {
-      // Fresh classification
+      // Cache miss: no routing, or contentHash present but mismatched — fresh classification
       routing = await _routingDeps.routeStory(ctx.story, { config: ctx.config }, ctx.workdir, ctx.plugins);
+      // currentHash already computed if a mismatch was detected; compute now if starting fresh
+      currentHash = currentHash ?? _routingDeps.computeStoryContentHash(ctx.story);
+      ctx.story.routing = {
+        ...(ctx.story.routing ?? {}),
+        complexity: routing.complexity as import("../../config").Complexity,
+        initialComplexity:
+          ctx.story.routing?.initialComplexity ?? (routing.complexity as import("../../config").Complexity),
+        testStrategy: routing.testStrategy as import("../../config").TestStrategy,
+        reasoning: routing.reasoning ?? "",
+        contentHash: currentHash,
+      };
+      if (ctx.prdPath) {
+        await _routingDeps.savePRD(ctx.prd, ctx.prdPath);
+      }
     }
     // BUG-010: Greenfield detection — force test-after if no test files exist
@@ -97,4 +129,6 @@ export const _routingDeps = {
   complexityToModelTier,
   isGreenfieldStory,
   clearCache,
+  savePRD,
+  computeStoryContentHash,
 };

package/src/pipeline/types.ts CHANGED Viewed

@@ -65,6 +65,8 @@ export interface PipelineContext {
   routing: RoutingResult;
   /** Working directory (project root) */
   workdir: string;
+  /** Absolute path to the prd.json file (used by routing stage to persist initial classification) */
+  prdPath?: string;
   /** Feature directory (optional, e.g., nax/features/my-feature/) */
   featureDir?: string;
   /** Hooks configuration */

package/src/prd/types.ts CHANGED Viewed

@@ -45,6 +45,10 @@ export interface StructuredFailure {
 /** Routing metadata per story */
 export interface StoryRouting {
   complexity: Complexity;
+  /** Initial complexity from first classification — written once, never overwritten by escalation */
+  initialComplexity?: Complexity;
+  /** Content hash of story fields at time of routing — used to detect stale cached routing (RRP-003) */
+  contentHash?: string;
   /** Model tier (derived at runtime from config, not persisted) */
   modelTier?: ModelTier;
   testStrategy: TestStrategy;

package/src/review/runner.ts CHANGED Viewed

@@ -6,6 +6,7 @@
 import { spawn } from "bun";
 import type { ExecutionConfig } from "../config/schema";
+import { getSafeLogger } from "../logger";
 import type { ReviewCheckName, ReviewCheckResult, ReviewConfig, ReviewResult } from "./types";
 /** Default commands for each check type */
@@ -159,6 +160,40 @@ async function runCheck(check: ReviewCheckName, command: string, workdir: string
   }
 }
+/**
+ * Get uncommitted tracked files via git diff --name-only HEAD.
+ * Returns empty array if git command fails or working tree is clean.
+ */
+async function getUncommittedFilesImpl(workdir: string): Promise<string[]> {
+  try {
+    const proc = spawn({
+      cmd: ["git", "diff", "--name-only", "HEAD"],
+      cwd: workdir,
+      stdout: "pipe",
+      stderr: "pipe",
+    });
+    const exitCode = await proc.exited;
+    if (exitCode !== 0) {
+      return [];
+    }
+    const output = await new Response(proc.stdout).text();
+    return output.trim().split("\n").filter(Boolean);
+  } catch {
+    return [];
+  }
+}
+/**
+ * Swappable dependencies for testing (avoids mock.module() which leaks in Bun 1.x).
+ * RQ-001: getUncommittedFiles enables mocking of the git dirty-tree check.
+ */
+export const _deps = {
+  /** Returns tracked files with uncommitted changes (git diff --name-only HEAD). */
+  getUncommittedFiles: getUncommittedFilesImpl,
+};
 /**
  * Run all configured review checks
  */
@@ -168,16 +203,30 @@ export async function runReview(
   executionConfig?: ExecutionConfig,
 ): Promise<ReviewResult> {
   const startTime = Date.now();
+  const logger = getSafeLogger();
   const checks: ReviewCheckResult[] = [];
   let firstFailure: string | undefined;
+  // RQ-001: Check for uncommitted tracked files before running checks
+  const uncommittedFiles = await _deps.getUncommittedFiles(workdir);
+  if (uncommittedFiles.length > 0) {
+    const fileList = uncommittedFiles.join(", ");
+    logger?.warn("review", `Uncommitted changes detected before review: ${fileList}`);
+    return {
+      success: false,
+      checks: [],
+      totalDurationMs: Date.now() - startTime,
+      failureReason: `Working tree has uncommitted changes:\n${uncommittedFiles.map((f) => `  - ${f}`).join("\n")}\n\nStage and commit these files before running review.`,
+    };
+  }
   for (const checkName of config.checks) {
     // Resolve command using resolution strategy
     const command = await resolveCommand(checkName, config, executionConfig, workdir);
     // Skip if explicitly disabled or not found
     if (command === null) {
-      console.warn(`[nax] Skipping ${checkName} check (command not configured or disabled)`);
+      getSafeLogger()?.warn("review", `Skipping ${checkName} check (command not configured or disabled)`);
       continue;
     }

package/src/routing/content-hash.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Story Content Hash
+ *
+ * Computes a deterministic hash of the story content fields used for routing.
+ * Used by the routing stage (RRP-003) to detect stale cached routing.
+ */
+import type { UserStory } from "../prd/types";
+/**
+ * Compute a deterministic hash of the story content fields used for routing.
+ * Hash input: title + "\0" + description + "\0" + acceptanceCriteria.join("") + "\0" + tags.join("")
+ *
+ * Null-byte separators between fields prevent cross-field collisions.
+ *
+ * @param story - The user story to hash
+ * @returns A hex string content hash
+ */
+export function computeStoryContentHash(story: UserStory): string {
+  const input = `${story.title}\0${story.description}\0${story.acceptanceCriteria.join("")}\0${story.tags.join("")}`;
+  const hasher = new Bun.CryptoHasher("sha256");
+  hasher.update(input);
+  return hasher.digest("hex");
+}

package/src/routing/index.ts CHANGED Viewed

@@ -15,3 +15,6 @@ export { keywordStrategy, llmStrategy, manualStrategy } from "./strategies";
 // Custom strategy loader
 export { loadCustomStrategy } from "./loader";
 export { tryLlmBatchRoute } from "./batch-route";
+// Content hash for staleness detection (RRP-003)
+export { computeStoryContentHash } from "./content-hash";

package/src/version.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * Version and build info for nax.
  *
  * GIT_COMMIT is injected at build time via --define in the bun build script.
- * When running from source (bun run dev), it falls back to "dev".
+ * When running from source (bin/nax.ts), falls back to runtime git rev-parse.
  */
 import pkg from "../package.json";
@@ -11,13 +11,29 @@ declare const GIT_COMMIT: string;
 export const NAX_VERSION: string = pkg.version;
-/** Short git commit hash, injected at build time. Falls back to "dev" from source. */
+/** Short git commit hash — injected at build time, or resolved at runtime from git. */
 export const NAX_COMMIT: string = (() => {
+  // Build-time injection (bun build --define GIT_COMMIT=...)
+  // Guard: must be a non-empty string that looks like a real commit hash
   try {
-    return GIT_COMMIT ?? "dev";
+    if (typeof GIT_COMMIT === "string" && /^[0-9a-f]{6,10}$/.test(GIT_COMMIT)) return GIT_COMMIT;
   } catch {
-    return "dev";
+    // not injected — fall through to runtime resolution
   }
+  // Runtime fallback: resolve from the source file's git repo (Bun-native)
+  try {
+    const result = Bun.spawnSync(["git", "rev-parse", "--short", "HEAD"], {
+      cwd: import.meta.dir,
+      stderr: "ignore",
+    });
+    if (result.exitCode === 0) {
+      const hash = result.stdout.toString().trim();
+      if (/^[0-9a-f]{6,10}$/.test(hash)) return hash;
+    }
+  } catch {
+    // git not available
+  }
+  return "dev";
 })();
 export const NAX_BUILD_INFO = `v${NAX_VERSION} (${NAX_COMMIT})`;

package/test/integration/review/review-plugin-integration.test.ts CHANGED Viewed

@@ -173,7 +173,7 @@ describe("Review Stage - Plugin Integration", () => {
       expect(receivedWorkdir).toBe(tempDir);
     });
-    test("reviewer receives list of changed files", async () => {
+    test("review fails when there are uncommitted changes (RQ-001)", async () => {
       const tempDir = mkdtempSync(join(tmpdir(), "nax-review-plugin-"));
       // Create a file first
@@ -181,15 +181,16 @@ describe("Review Stage - Plugin Integration", () => {
       await initGitRepo(tempDir);
-      // Now modify the file after git init
+      // Now modify the file after git init WITHOUT committing
+      // This violates RQ-001 (dirty working tree)
       writeFileSync(join(tempDir, "test.ts"), "// modified");
-      let receivedFiles: string[] | undefined;
+      let reviewerCalled = false;
       const mockReviewer: IReviewPlugin = {
         name: "test-reviewer",
         description: "Test reviewer",
-        async check(_workdir, changedFiles) {
-          receivedFiles = changedFiles;
+        async check(_workdir) {
+          reviewerCalled = true;
           return { passed: true, output: "OK" };
         },
       };
@@ -204,9 +205,13 @@ describe("Review Stage - Plugin Integration", () => {
       const registry = new PluginRegistry([mockPlugin]);
       const ctx = createMockContext(tempDir, registry);
-      await reviewStage.execute(ctx);
+      const result = await reviewStage.execute(ctx);
-      expect(receivedFiles).toContain("test.ts");
+      // RQ-001: Review should fail with dirty working tree
+      expect(result.action).toBe("escalate");
+      expect(result.reason).toContain("Working tree has uncommitted changes");
+      // Reviewer should not be called due to dirty tree check
+      expect(reviewerCalled).toBe(false);
     });
     test("reviewer receives empty array when no files changed", async () => {

package/test/unit/metrics/aggregator.test.ts ADDED Viewed

@@ -0,0 +1,164 @@
+/**
+ * Metrics Aggregator — RRP-002: complexityAccuracy uses initialComplexity
+ *
+ * AC-6: calculateAggregateMetrics complexityAccuracy compares
+ *       initialComplexity (predicted) vs finalTier (actual), not
+ *       complexity (which may reflect post-escalation state).
+ */
+import { describe, expect, test } from "bun:test";
+import { calculateAggregateMetrics } from "../../../src/metrics/aggregator";
+import type { RunMetrics, StoryMetrics } from "../../../src/metrics/types";
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function makeStoryMetrics(overrides: Partial<StoryMetrics> & { storyId: string }): StoryMetrics {
+  return {
+    storyId: overrides.storyId,
+    complexity: "medium",
+    modelTier: "balanced",
+    modelUsed: "claude-sonnet-4-5",
+    attempts: 1,
+    finalTier: "balanced",
+    success: true,
+    cost: 0.01,
+    durationMs: 5000,
+    firstPassSuccess: true,
+    startedAt: "2026-01-01T00:00:00Z",
+    completedAt: "2026-01-01T00:00:05Z",
+    ...overrides,
+  };
+}
+function makeRun(stories: StoryMetrics[]): RunMetrics {
+  return {
+    runId: "run-001",
+    feature: "test-feature",
+    startedAt: "2026-01-01T00:00:00Z",
+    completedAt: "2026-01-01T00:01:00Z",
+    totalCost: stories.reduce((sum, s) => sum + s.cost, 0),
+    totalStories: stories.length,
+    storiesCompleted: stories.filter((s) => s.success).length,
+    storiesFailed: stories.filter((s) => !s.success).length,
+    totalDurationMs: 60000,
+    stories,
+  };
+}
+// ---------------------------------------------------------------------------
+// AC-6: complexityAccuracy uses initialComplexity as predicted complexity
+// ---------------------------------------------------------------------------
+describe("calculateAggregateMetrics - complexityAccuracy uses initialComplexity", () => {
+  test("complexityAccuracy keyed by initialComplexity when present", () => {
+    // Story originally predicted as 'simple' but escalated (finalTier = 'powerful')
+    const story = makeStoryMetrics({
+      storyId: "US-001",
+      complexity: "medium",          // post-escalation complexity
+      initialComplexity: "simple",   // original prediction
+      modelTier: "fast",
+      finalTier: "powerful",
+      attempts: 2,
+      firstPassSuccess: false,
+    });
+    const runs = [makeRun([story])];
+    const aggregate = calculateAggregateMetrics(runs);
+    // complexityAccuracy should be keyed by initialComplexity ("simple"), not complexity ("medium")
+    expect(aggregate.complexityAccuracy["simple"]).toBeDefined();
+    expect(aggregate.complexityAccuracy["medium"]).toBeUndefined();
+  });
+  test("mismatch detected when initialComplexity tier != finalTier", () => {
+    const escalatedStory = makeStoryMetrics({
+      storyId: "US-001",
+      complexity: "medium",
+      initialComplexity: "simple",
+      modelTier: "fast",
+      finalTier: "powerful",
+      attempts: 2,
+      firstPassSuccess: false,
+    });
+    const runs = [makeRun([escalatedStory])];
+    const aggregate = calculateAggregateMetrics(runs);
+    // simple -> powerful: mismatch expected
+    expect(aggregate.complexityAccuracy["simple"].mismatchRate).toBeGreaterThan(0);
+  });
+  test("no mismatch when initialComplexity tier matches finalTier", () => {
+    const successStory = makeStoryMetrics({
+      storyId: "US-001",
+      complexity: "medium",
+      initialComplexity: "medium",
+      modelTier: "balanced",
+      finalTier: "balanced",
+      attempts: 1,
+      firstPassSuccess: true,
+    });
+    const runs = [makeRun([successStory])];
+    const aggregate = calculateAggregateMetrics(runs);
+    expect(aggregate.complexityAccuracy["medium"].mismatchRate).toBe(0);
+  });
+  test("falls back to complexity when initialComplexity is absent (backward compat)", () => {
+    // Legacy story metrics without initialComplexity
+    const legacyStory = makeStoryMetrics({
+      storyId: "US-001",
+      complexity: "complex",
+      // no initialComplexity
+      modelTier: "powerful",
+      finalTier: "powerful",
+    });
+    const runs = [makeRun([legacyStory])];
+    const aggregate = calculateAggregateMetrics(runs);
+    // Falls back to complexity as key
+    expect(aggregate.complexityAccuracy["complex"]).toBeDefined();
+  });
+  test("mixes initialComplexity-keyed and legacy entries correctly", () => {
+    const modernStory = makeStoryMetrics({
+      storyId: "US-001",
+      complexity: "medium",
+      initialComplexity: "simple",
+      modelTier: "balanced",
+      finalTier: "balanced",
+    });
+    const legacyStory = makeStoryMetrics({
+      storyId: "US-002",
+      complexity: "complex",
+      // no initialComplexity
+      modelTier: "powerful",
+      finalTier: "powerful",
+    });
+    const runs = [makeRun([modernStory, legacyStory])];
+    const aggregate = calculateAggregateMetrics(runs);
+    expect(aggregate.complexityAccuracy["simple"]).toBeDefined();   // from initialComplexity
+    expect(aggregate.complexityAccuracy["complex"]).toBeDefined();  // from complexity fallback
+    expect(aggregate.complexityAccuracy["medium"]).toBeUndefined(); // NOT used (initialComplexity takes over)
+  });
+  test("complexityAccuracy.predicted count matches number of stories with that initialComplexity", () => {
+    const stories = [
+      makeStoryMetrics({ storyId: "US-001", complexity: "medium", initialComplexity: "simple", finalTier: "balanced" }),
+      makeStoryMetrics({ storyId: "US-002", complexity: "medium", initialComplexity: "simple", finalTier: "balanced" }),
+      makeStoryMetrics({ storyId: "US-003", complexity: "complex", initialComplexity: "complex", finalTier: "powerful" }),
+    ];
+    const runs = [makeRun(stories)];
+    const aggregate = calculateAggregateMetrics(runs);
+    expect(aggregate.complexityAccuracy["simple"].predicted).toBe(2);
+    expect(aggregate.complexityAccuracy["complex"].predicted).toBe(1);
+  });
+});

package/test/unit/metrics/tracker.test.ts ADDED Viewed

@@ -0,0 +1,186 @@
+/**
+ * Metrics Tracker — RRP-002: initialComplexity in StoryMetrics
+ *
+ * AC-4: StoryMetrics gains initialComplexity?: string field
+ * AC-5: collectStoryMetrics() reads story.routing.initialComplexity,
+ *       falls back to routing.complexity for backward compat
+ */
+import { describe, expect, test } from "bun:test";
+import { DEFAULT_CONFIG } from "../../../src/config/defaults";
+import type { NaxConfig } from "../../../src/config";
+import type { PipelineContext } from "../../../src/pipeline/types";
+import type { PRD, UserStory } from "../../../src/prd";
+import type { StoryRouting } from "../../../src/prd/types";
+import { collectStoryMetrics } from "../../../src/metrics/tracker";
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function makeStory(overrides?: Partial<UserStory>): UserStory {
+  return {
+    id: "US-001",
+    title: "Test Story",
+    description: "Test description",
+    acceptanceCriteria: [],
+    tags: [],
+    dependencies: [],
+    status: "passed",
+    passes: true,
+    escalations: [],
+    attempts: 1,
+    ...overrides,
+  };
+}
+function makePRD(story: UserStory): PRD {
+  return {
+    project: "test-project",
+    feature: "test-feature",
+    branchName: "feat/test",
+    createdAt: new Date().toISOString(),
+    updatedAt: new Date().toISOString(),
+    userStories: [story],
+  };
+}
+function makeConfig(): NaxConfig {
+  return { ...DEFAULT_CONFIG };
+}
+function makeCtx(story: UserStory, routingOverrides?: Partial<PipelineContext["routing"]>): PipelineContext {
+  return {
+    config: makeConfig(),
+    prd: makePRD(story),
+    story,
+    stories: [story],
+    routing: {
+      complexity: "medium",
+      modelTier: "balanced",
+      testStrategy: "test-after",
+      reasoning: "test",
+      ...routingOverrides,
+    },
+    workdir: "/tmp/nax-tracker-test",
+    hooks: { hooks: {} },
+    agentResult: {
+      success: true,
+      output: "",
+      estimatedCost: 0.01,
+      durationMs: 5000,
+    },
+  } as unknown as PipelineContext;
+}
+// ---------------------------------------------------------------------------
+// AC-5: collectStoryMetrics reads initialComplexity from story.routing
+// ---------------------------------------------------------------------------
+describe("collectStoryMetrics - initialComplexity field", () => {
+  test("includes initialComplexity from story.routing.initialComplexity", () => {
+    const routing: StoryRouting = {
+      complexity: "medium",
+      initialComplexity: "simple", // original prediction before potential escalation
+      testStrategy: "test-after",
+      reasoning: "test",
+    };
+    const story = makeStory({ routing });
+    const ctx = makeCtx(story, { complexity: "medium" });
+    const metrics = collectStoryMetrics(ctx, new Date().toISOString());
+    expect(metrics.initialComplexity).toBe("simple");
+  });
+  test("initialComplexity differs from complexity when story was escalated", () => {
+    const routing: StoryRouting = {
+      complexity: "medium",         // complexity as classified
+      initialComplexity: "simple",  // original first-classify prediction
+      modelTier: "powerful",        // escalated tier
+      testStrategy: "three-session-tdd",
+      reasoning: "escalated",
+    };
+    const story = makeStory({
+      routing,
+      escalations: [
+        {
+          fromTier: "balanced",
+          toTier: "powerful",
+          reason: "test failure",
+          timestamp: new Date().toISOString(),
+        },
+      ],
+      attempts: 2,
+    });
+    const ctx = makeCtx(story, { complexity: "medium", modelTier: "balanced" });
+    const metrics = collectStoryMetrics(ctx, new Date().toISOString());
+    expect(metrics.initialComplexity).toBe("simple");
+    // complexity field unchanged (backward compat)
+    expect(metrics.complexity).toBe("medium");
+  });
+  test("falls back to routing.complexity when story.routing.initialComplexity is absent", () => {
+    // Backward compat: story.routing exists but has no initialComplexity
+    const routing: StoryRouting = {
+      complexity: "complex",
+      testStrategy: "three-session-tdd",
+      reasoning: "legacy routing",
+      // no initialComplexity
+    };
+    const story = makeStory({ routing });
+    const ctx = makeCtx(story, { complexity: "complex" });
+    const metrics = collectStoryMetrics(ctx, new Date().toISOString());
+    expect(metrics.initialComplexity).toBe("complex");
+  });
+  test("falls back to routing.complexity when story.routing is undefined", () => {
+    const story = makeStory({ routing: undefined });
+    const ctx = makeCtx(story, { complexity: "simple" });
+    const metrics = collectStoryMetrics(ctx, new Date().toISOString());
+    expect(metrics.initialComplexity).toBe("simple");
+  });
+});
+// ---------------------------------------------------------------------------
+// AC-4: StoryMetrics type has initialComplexity?: string
+// ---------------------------------------------------------------------------
+describe("StoryMetrics type - initialComplexity field", () => {
+  test("StoryMetrics includes initialComplexity field", () => {
+    const routing: StoryRouting = {
+      complexity: "medium",
+      initialComplexity: "simple",
+      testStrategy: "test-after",
+      reasoning: "test",
+    };
+    const story = makeStory({ routing });
+    const ctx = makeCtx(story, { complexity: "medium" });
+    const metrics = collectStoryMetrics(ctx, new Date().toISOString());
+    // TypeScript will error at compile time if initialComplexity is not on StoryMetrics
+    expect("initialComplexity" in metrics).toBe(true);
+  });
+  test("initialComplexity is a string when present", () => {
+    const routing: StoryRouting = {
+      complexity: "expert",
+      initialComplexity: "expert",
+      testStrategy: "three-session-tdd",
+      reasoning: "test",
+    };
+    const story = makeStory({ routing });
+    const ctx = makeCtx(story, { complexity: "expert" });
+    const metrics = collectStoryMetrics(ctx, new Date().toISOString());
+    expect(typeof metrics.initialComplexity).toBe("string");
+  });
+});