npm - @nathapp/nax - Versions diffs - 0.39.2 → 0.40.0 - Mend

@nathapp/nax 0.39.2 → 0.40.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/nax.js +1962 -1531
package/package.json +2 -2
package/src/acceptance/generator.ts +97 -1
package/src/acceptance/index.ts +12 -0
package/src/acceptance/refinement.ts +156 -0
package/src/acceptance/types.ts +44 -0
package/src/analyze/classifier.ts +1 -6
package/src/cli/prompts-tdd.ts +11 -1
package/src/config/defaults.ts +40 -1
package/src/config/runtime-types.ts +9 -1
package/src/config/schemas.ts +36 -1
package/src/execution/index.ts +0 -1
package/src/execution/runner.ts +0 -1
package/src/execution/sequential-executor.ts +15 -1
package/src/pipeline/stages/acceptance-setup.ts +135 -0
package/src/pipeline/stages/index.ts +7 -0
package/src/pipeline/stages/prompt.ts +11 -4
package/src/pipeline/types.ts +6 -0
package/src/prompts/builder.ts +25 -6
package/src/prompts/sections/conventions.ts +7 -1
package/src/prompts/sections/isolation.ts +21 -10
package/src/prompts/sections/role-task.ts +88 -15
package/src/prompts/sections/story.ts +43 -1
package/src/prompts/types.ts +1 -1
package/src/routing/strategies/llm-prompts.ts +26 -28
package/src/tdd/session-runner.ts +5 -0
package/src/execution/prompts.ts +0 -127

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@nathapp/nax",
-  "version": "0.39.2",
-  "description": "AI Coding Agent Orchestrator \u2014 loops until done",
+  "version": "0.40.0",
+  "description": "AI Coding Agent Orchestrator — loops until done",
   "type": "module",
   "bin": {
     "nax": "./dist/nax.js"

package/src/acceptance/generator.ts CHANGED Viewed

@@ -5,9 +5,18 @@
  * via LLM call to the agent adapter.
  */
+import { join } from "node:path";
+import { ClaudeCodeAdapter } from "../agents/claude";
 import type { AgentAdapter } from "../agents/types";
 import { getLogger } from "../logger";
-import type { AcceptanceCriterion, AcceptanceTestResult, GenerateAcceptanceTestsOptions } from "./types";
+import type { UserStory } from "../prd/types";
+import type {
+  AcceptanceCriterion,
+  AcceptanceTestResult,
+  GenerateAcceptanceTestsOptions,
+  GenerateFromPRDOptions,
+  RefinedCriterion,
+} from "./types";
 /**
  * Parse acceptance criteria from spec.md content.
@@ -31,6 +40,93 @@ import type { AcceptanceCriterion, AcceptanceTestResult, GenerateAcceptanceTests
  * // ]
  * ```
  */
+/**
+ * Injectable dependencies for generateFromPRD — allows tests to mock
+ * adapter.complete() and file writes without real binaries or disk I/O.
+ *
+ * @internal
+ */
+export const _generatorPRDDeps = {
+  adapter: new ClaudeCodeAdapter() as AgentAdapter,
+  writeFile: async (path: string, content: string): Promise<void> => {
+    await Bun.write(path, content);
+  },
+};
+/**
+ * Generate acceptance tests from PRD UserStory[] and RefinedCriterion[].
+ *
+ * This is a stub — implementation is provided by the implementer session.
+ *
+ * @param stories - User stories from the PRD
+ * @param refinedCriteria - Refined criteria produced by the refinement module
+ * @param options - Generation options
+ * @returns Generated test code and processed criteria
+ */
+export async function generateFromPRD(
+  _stories: UserStory[],
+  refinedCriteria: RefinedCriterion[],
+  options: GenerateFromPRDOptions,
+): Promise<AcceptanceTestResult> {
+  const logger = getLogger();
+  const criteria: AcceptanceCriterion[] = refinedCriteria.map((c, i) => ({
+    id: `AC-${i + 1}`,
+    text: c.refined,
+    lineNumber: i + 1,
+  }));
+  if (refinedCriteria.length === 0) {
+    return { testCode: "", criteria: [] };
+  }
+  const criteriaList = refinedCriteria.map((c, i) => `AC-${i + 1}: ${c.refined}`).join("\n");
+  const prompt = `You are a test engineer. Generate acceptance tests for the "${options.featureName}" feature based on the refined acceptance criteria below.
+CODEBASE CONTEXT:
+${options.codebaseContext}
+ACCEPTANCE CRITERIA (refined):
+${criteriaList}
+Generate a complete acceptance.test.ts file using bun:test framework. Each AC maps to exactly one test named "AC-N: <description>".
+Use this structure:
+\`\`\`typescript
+import { describe, test, expect } from "bun:test";
+describe("${options.featureName} - Acceptance Tests", () => {
+  test("AC-1: <description>", async () => {
+    // Test implementation
+  });
+});
+\`\`\`
+Respond with ONLY the TypeScript test code (no markdown code fences, no explanation).`;
+  logger.info("acceptance", "Generating tests from PRD refined criteria", { count: refinedCriteria.length });
+  const testCode = await _generatorPRDDeps.adapter.complete(prompt);
+  const refinedJsonContent = JSON.stringify(
+    refinedCriteria.map((c, i) => ({
+      acId: `AC-${i + 1}`,
+      original: c.original,
+      refined: c.refined,
+      testable: c.testable,
+      storyId: c.storyId,
+    })),
+    null,
+    2,
+  );
+  await _generatorPRDDeps.writeFile(join(options.workdir, "acceptance-refined.json"), refinedJsonContent);
+  return { testCode, criteria };
+}
 export function parseAcceptanceCriteria(specContent: string): AcceptanceCriterion[] {
   const criteria: AcceptanceCriterion[] = [];
   const lines = specContent.split("\n");

package/src/acceptance/index.ts CHANGED Viewed

@@ -7,14 +7,26 @@
 export type {
   AcceptanceCriterion,
   GenerateAcceptanceTestsOptions,
+  GenerateFromPRDOptions,
   AcceptanceTestResult,
+  RefinedCriterion,
+  RefinementContext,
 } from "./types";
+export {
+  buildRefinementPrompt,
+  parseRefinementResponse,
+  refineAcceptanceCriteria,
+  _refineDeps,
+} from "./refinement";
 export {
   parseAcceptanceCriteria,
   buildAcceptanceTestPrompt,
   generateAcceptanceTests,
+  generateFromPRD,
   generateSkeletonTests,
+  _generatorPRDDeps,
 } from "./generator";
 export type {

package/src/acceptance/refinement.ts ADDED Viewed

@@ -0,0 +1,156 @@
+/**
+ * AC Refinement Module
+ *
+ * Takes raw PRD acceptanceCriteria strings and refines them into concrete,
+ * testable assertions using an LLM call via adapter.complete().
+ */
+import type { AgentAdapter } from "../agents";
+import { ClaudeCodeAdapter } from "../agents/claude";
+import { resolveModel } from "../config/schema";
+import { getLogger } from "../logger";
+import { errorMessage } from "../utils/errors";
+import type { RefinedCriterion, RefinementContext } from "./types";
+/**
+ * Injectable dependencies — allows tests to mock adapter.complete()
+ * without needing the claude binary.
+ *
+ * @internal
+ */
+export const _refineDeps = {
+  adapter: new ClaudeCodeAdapter() as AgentAdapter,
+};
+/**
+ * Build the LLM prompt for refining acceptance criteria.
+ *
+ * @param criteria - Raw AC strings from PRD
+ * @param codebaseContext - File tree / dependency context
+ * @returns Formatted prompt string
+ */
+export function buildRefinementPrompt(criteria: string[], codebaseContext: string): string {
+  const criteriaList = criteria.map((c, i) => `${i + 1}. ${c}`).join("\n");
+  return `You are an acceptance criteria refinement assistant. Your task is to convert raw acceptance criteria into concrete, machine-verifiable assertions.
+CODEBASE CONTEXT:
+${codebaseContext}
+ACCEPTANCE CRITERIA TO REFINE:
+${criteriaList}
+For each criterion, produce a refined version that is concrete and automatically testable where possible.
+Respond with ONLY a JSON array (no markdown code fences):
+[{
+  "original": "<exact original criterion text>",
+  "refined": "<concrete, machine-verifiable description>",
+  "testable": true,
+  "storyId": ""
+}]
+Rules:
+- "original" must match the input criterion text exactly
+- "refined" must be a concrete assertion (e.g., "Function returns array of length N", "HTTP status 200 returned")
+- "testable" is false only if the criterion cannot be automatically verified (e.g., "UX feels responsive", "design looks good")
+- "storyId" leave as empty string — it will be assigned by the caller
+- Respond with ONLY the JSON array`;
+}
+/**
+ * Parse the LLM JSON response into RefinedCriterion[].
+ *
+ * Falls back gracefully: if JSON is malformed or a criterion is missing,
+ * uses the original text with testable: true.
+ *
+ * @param response - Raw LLM response text
+ * @param criteria - Original criteria strings (used as fallback)
+ * @returns Array of refined criteria
+ */
+export function parseRefinementResponse(response: string, criteria: string[]): RefinedCriterion[] {
+  if (!response || !response.trim()) {
+    return fallbackCriteria(criteria);
+  }
+  try {
+    const parsed: unknown = JSON.parse(response);
+    if (!Array.isArray(parsed)) {
+      return fallbackCriteria(criteria);
+    }
+    return (parsed as RefinedCriterion[]).map((item, i) => ({
+      original: typeof item.original === "string" && item.original.length > 0 ? item.original : (criteria[i] ?? ""),
+      refined: typeof item.refined === "string" && item.refined.length > 0 ? item.refined : (criteria[i] ?? ""),
+      testable: typeof item.testable === "boolean" ? item.testable : true,
+      storyId: typeof item.storyId === "string" ? item.storyId : "",
+    }));
+  } catch {
+    return fallbackCriteria(criteria);
+  }
+}
+/**
+ * Refine raw acceptance criteria strings into concrete, testable assertions.
+ *
+ * @param criteria - Raw AC strings from PRD
+ * @param context - Refinement context (storyId, codebase context, config)
+ * @returns Promise resolving to array of refined criteria
+ */
+export async function refineAcceptanceCriteria(
+  criteria: string[],
+  context: RefinementContext,
+): Promise<RefinedCriterion[]> {
+  if (criteria.length === 0) {
+    return [];
+  }
+  const { storyId, codebaseContext, config } = context;
+  const logger = getLogger();
+  const modelTier = config.acceptance?.model ?? "fast";
+  const modelEntry = config.models[modelTier] ?? config.models.fast;
+  if (!modelEntry) {
+    throw new Error(`[refinement] config.models.${modelTier} not configured`);
+  }
+  const modelDef = resolveModel(modelEntry);
+  const prompt = buildRefinementPrompt(criteria, codebaseContext);
+  let response: string;
+  try {
+    response = await _refineDeps.adapter.complete(prompt, {
+      jsonMode: true,
+      maxTokens: 4096,
+      model: modelDef.model,
+    });
+  } catch (error) {
+    const reason = errorMessage(error);
+    logger.warn("refinement", "adapter.complete() failed, falling back to original criteria", {
+      storyId,
+      error: reason,
+    });
+    return fallbackCriteria(criteria, storyId);
+  }
+  const parsed = parseRefinementResponse(response, criteria);
+  return parsed.map((item) => ({
+    ...item,
+    storyId: item.storyId || storyId,
+  }));
+}
+/**
+ * Build fallback RefinedCriterion[] using original criterion text.
+ */
+function fallbackCriteria(criteria: string[], storyId = ""): RefinedCriterion[] {
+  return criteria.map((c) => ({
+    original: c,
+    refined: c,
+    testable: true,
+    storyId,
+  }));
+}

package/src/acceptance/types.ts CHANGED Viewed

@@ -6,6 +6,32 @@
 import type { ModelDef, ModelTier, NaxConfig } from "../config/schema";
+/**
+ * A single refined acceptance criterion produced by the refinement module.
+ */
+export interface RefinedCriterion {
+  /** The original criterion text from the PRD */
+  original: string;
+  /** Concrete, machine-verifiable description produced by LLM */
+  refined: string;
+  /** False if the LLM determines the criterion cannot be automatically tested */
+  testable: boolean;
+  /** The story ID this criterion belongs to */
+  storyId: string;
+}
+/**
+ * Context passed to refineAcceptanceCriteria.
+ */
+export interface RefinementContext {
+  /** Story ID for attribution on each RefinedCriterion */
+  storyId: string;
+  /** Codebase context string (file tree, dependencies, etc.) */
+  codebaseContext: string;
+  /** Global config — model tier resolved from config.acceptance.model */
+  config: NaxConfig;
+}
 /**
  * A single acceptance criterion extracted from spec.md.
  *
@@ -42,6 +68,24 @@ export interface AcceptanceCriterion {
  * };
  * ```
  */
+/**
+ * Options for generating acceptance tests from PRD stories and refined criteria.
+ */
+export interface GenerateFromPRDOptions {
+  /** Feature name for context */
+  featureName: string;
+  /** Working directory for context scanning */
+  workdir: string;
+  /** Codebase context (file tree, dependencies, test patterns) */
+  codebaseContext: string;
+  /** Model tier to use for test generation */
+  modelTier: ModelTier;
+  /** Resolved model definition */
+  modelDef: ModelDef;
+  /** Global config for quality settings */
+  config: NaxConfig;
+}
 export interface GenerateAcceptanceTestsOptions {
   /** Full spec.md content */
   specContent: string;

package/src/analyze/classifier.ts CHANGED Viewed

@@ -105,11 +105,6 @@ async function classifyWithLLM(
   scan: CodebaseScan,
   config: NaxConfig,
 ): Promise<StoryClassification[]> {
-  // Check for required environment variables
-  if (!process.env.ANTHROPIC_API_KEY) {
-    throw new Error("ANTHROPIC_API_KEY environment variable not configured — cannot use LLM classification");
-  }
   // Build prompt
   const prompt = buildClassificationPrompt(stories, scan);
@@ -120,7 +115,7 @@ async function classifyWithLLM(
   }
   const modelDef = resolveModel(fastModelEntry);
-  // Make API call via adapter (use haiku for cheap classification)
+  // Make API call via adapter (uses config.models.fast tier)
   const jsonText = await _classifyDeps.adapter.complete(prompt, {
     jsonMode: true,
     maxTokens: 4096,

package/src/cli/prompts-tdd.ts CHANGED Viewed

@@ -31,13 +31,23 @@ export async function handleThreeSessionTddPrompts(
       .withLoader(ctx.workdir, ctx.config)
       .story(story)
       .context(ctx.contextMarkdown)
+      .constitution(ctx.constitution?.content)
+      .testCommand(ctx.config.quality?.commands?.test)
       .build(),
     PromptBuilder.for("implementer", { variant: "standard" })
       .withLoader(ctx.workdir, ctx.config)
       .story(story)
       .context(ctx.contextMarkdown)
+      .constitution(ctx.constitution?.content)
+      .testCommand(ctx.config.quality?.commands?.test)
+      .build(),
+    PromptBuilder.for("verifier")
+      .withLoader(ctx.workdir, ctx.config)
+      .story(story)
+      .context(ctx.contextMarkdown)
+      .constitution(ctx.constitution?.content)
+      .testCommand(ctx.config.quality?.commands?.test)
       .build(),
-    PromptBuilder.for("verifier").withLoader(ctx.workdir, ctx.config).story(story).context(ctx.contextMarkdown).build(),
   ]);
   const sessions = [

package/src/config/defaults.ts CHANGED Viewed

@@ -84,7 +84,43 @@ export const DEFAULT_CONFIG: NaxConfig = {
     dangerouslySkipPermissions: true,
     drainTimeoutMs: 2000,
     shell: "/bin/sh",
-    stripEnvVars: ["CLAUDECODE", "REPL_ID", "AGENT"],
+    stripEnvVars: [
+      // Agent detection markers
+      "CLAUDECODE",
+      "REPL_ID",
+      "AGENT",
+      // Source control tokens
+      "GITLAB_ACCESS_TOKEN",
+      "GITHUB_TOKEN",
+      "GITHUB_ACCESS_TOKEN",
+      "GH_TOKEN",
+      "CI_GIT_TOKEN",
+      "CI_JOB_TOKEN",
+      "BITBUCKET_ACCESS_TOKEN",
+      // Package registry tokens
+      "NPM_TOKEN",
+      "NPM_AUTH_TOKEN",
+      "YARN_NPM_AUTH_TOKEN",
+      // LLM API keys (agent gets these via allowlist in buildAllowedEnv; test runners don't need them)
+      "ANTHROPIC_API_KEY",
+      "OPENAI_API_KEY",
+      "GEMINI_API_KEY",
+      "COHERE_API_KEY",
+      // Cloud / infra credentials
+      "AWS_ACCESS_KEY_ID",
+      "AWS_SECRET_ACCESS_KEY",
+      "AWS_SESSION_TOKEN",
+      "GOOGLE_APPLICATION_CREDENTIALS",
+      "GCLOUD_SERVICE_KEY",
+      "AZURE_CLIENT_SECRET",
+      "AZURE_TENANT_ID",
+      // CI secrets
+      "TELEGRAM_BOT_TOKEN",
+      "SLACK_TOKEN",
+      "SLACK_WEBHOOK_URL",
+      "SENTRY_AUTH_TOKEN",
+      "DATADOG_API_KEY",
+    ],
     environmentalEscalationDivisor: 2,
   },
   tdd: {
@@ -127,6 +163,9 @@ export const DEFAULT_CONFIG: NaxConfig = {
     maxRetries: 2,
     generateTests: true,
     testPath: "acceptance.test.ts",
+    model: "fast" as const,
+    refinement: true,
+    redGate: true,
   },
   context: {
     fileInjection: "disabled",

package/src/config/runtime-types.ts CHANGED Viewed

@@ -238,6 +238,12 @@ export interface AcceptanceConfig {
   generateTests: boolean;
   /** Path to acceptance test file (relative to feature directory) */
   testPath: string;
+  /** Model tier for AC refinement LLM calls (default: "fast") */
+  model: ModelTier;
+  /** Whether to LLM-refine acceptance criteria before generating tests (default: true) */
+  refinement: boolean;
+  /** Whether to run RED gate check after generating acceptance tests (default: true) */
+  redGate: boolean;
 }
 /** Optimizer config (v0.10) */
@@ -387,7 +393,9 @@ export interface RoutingConfig {
 /** Prompt overrides config (PB-003) */
 export interface PromptsConfig {
-  overrides?: Partial<Record<"test-writer" | "implementer" | "verifier" | "single-session" | "tdd-simple", string>>;
+  overrides?: Partial<
+    Record<"test-writer" | "implementer" | "verifier" | "single-session" | "tdd-simple" | "batch", string>
+  >;
 }
 /** Decompose config (SD-003) */

package/src/config/schemas.ts CHANGED Viewed

@@ -127,7 +127,39 @@ const QualityConfigSchema = z.object({
   gracePeriodMs: z.number().int().min(500).max(30000).default(5000),
   drainTimeoutMs: z.number().int().min(0).max(10000).default(2000),
   shell: z.string().default("/bin/sh"),
-  stripEnvVars: z.array(z.string()).default(["CLAUDECODE", "REPL_ID", "AGENT"]),
+  stripEnvVars: z
+    .array(z.string())
+    .default([
+      "CLAUDECODE",
+      "REPL_ID",
+      "AGENT",
+      "GITLAB_ACCESS_TOKEN",
+      "GITHUB_TOKEN",
+      "GITHUB_ACCESS_TOKEN",
+      "GH_TOKEN",
+      "CI_GIT_TOKEN",
+      "CI_JOB_TOKEN",
+      "BITBUCKET_ACCESS_TOKEN",
+      "NPM_TOKEN",
+      "NPM_AUTH_TOKEN",
+      "YARN_NPM_AUTH_TOKEN",
+      "ANTHROPIC_API_KEY",
+      "OPENAI_API_KEY",
+      "GEMINI_API_KEY",
+      "COHERE_API_KEY",
+      "AWS_ACCESS_KEY_ID",
+      "AWS_SECRET_ACCESS_KEY",
+      "AWS_SESSION_TOKEN",
+      "GOOGLE_APPLICATION_CREDENTIALS",
+      "GCLOUD_SERVICE_KEY",
+      "AZURE_CLIENT_SECRET",
+      "AZURE_TENANT_ID",
+      "TELEGRAM_BOT_TOKEN",
+      "SLACK_TOKEN",
+      "SLACK_WEBHOOK_URL",
+      "SENTRY_AUTH_TOKEN",
+      "DATADOG_API_KEY",
+    ]),
   environmentalEscalationDivisor: z.number().min(1).max(10).default(2),
 });
@@ -183,6 +215,9 @@ const AcceptanceConfigSchema = z.object({
   maxRetries: z.number().int().nonnegative(),
   generateTests: z.boolean(),
   testPath: z.string().min(1, "acceptance.testPath must be non-empty"),
+  model: z.enum(["fast", "balanced", "powerful"]).default("fast"),
+  refinement: z.boolean().default(true),
+  redGate: z.boolean().default(true),
 });
 const TestCoverageConfigSchema = z.object({

package/src/execution/index.ts CHANGED Viewed

@@ -2,7 +2,6 @@ export type { RunOptions, RunResult } from "./runner";
 export { run } from "./runner";
 export type { FailureCategory } from "../tdd/types";
 export { appendProgress } from "./progress";
-export { buildSingleSessionPrompt, buildBatchPrompt } from "./prompts";
 export { groupStoriesIntoBatches, type StoryBatch } from "./batching";
 export { escalateTier, getTierConfig, calculateMaxIterations } from "./escalation";
 export { readQueueFile, clearQueueFile } from "./queue-handler";

package/src/execution/runner.ts CHANGED Viewed

@@ -247,6 +247,5 @@ export async function run(options: RunOptions): Promise<RunResult> {
 }
 // Re-exports for backward compatibility with existing test imports
-export { buildSingleSessionPrompt, buildBatchPrompt } from "./prompts";
 export { groupStoriesIntoBatches, type StoryBatch } from "./batching";
 export { escalateTier } from "./escalation";

package/src/execution/sequential-executor.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import { getSafeLogger } from "../logger";
 import type { StoryMetrics } from "../metrics";
 import { pipelineEventBus } from "../pipeline/event-bus";
 import { runPipeline } from "../pipeline/runner";
-import { postRunPipeline } from "../pipeline/stages";
+import { postRunPipeline, preRunPipeline } from "../pipeline/stages";
 import { wireEventsWriter } from "../pipeline/subscribers/events-writer";
 import { wireHooks } from "../pipeline/subscribers/hooks";
 import { wireInteraction } from "../pipeline/subscribers/interaction";
@@ -68,6 +68,20 @@ export async function executeSequential(
   );
   try {
+    // Pre-run pipeline (acceptance test setup with RED gate)
+    logger?.info("execution", "Running pre-run pipeline (acceptance test setup)");
+    const preRunCtx: PipelineContext = {
+      config: ctx.config,
+      prd,
+      workdir: ctx.workdir,
+      featureDir: ctx.featureDir,
+      story: prd.userStories[0],
+      stories: prd.userStories,
+      routing: { complexity: "simple", modelTier: "fast", testStrategy: "test-after", reasoning: "" },
+      hooks: ctx.hooks,
+    };
+    await runPipeline(preRunPipeline, preRunCtx, ctx.eventEmitter);
     while (iterations < ctx.config.execution.maxIterations) {
       iterations++;
       if (Math.round(process.memoryUsage().heapUsed / 1024 / 1024) > 1024)