@nathapp/nax 0.39.2 → 0.40.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +1962 -1531
- package/package.json +2 -2
- package/src/acceptance/generator.ts +97 -1
- package/src/acceptance/index.ts +12 -0
- package/src/acceptance/refinement.ts +156 -0
- package/src/acceptance/types.ts +44 -0
- package/src/analyze/classifier.ts +1 -6
- package/src/cli/prompts-tdd.ts +11 -1
- package/src/config/defaults.ts +40 -1
- package/src/config/runtime-types.ts +9 -1
- package/src/config/schemas.ts +36 -1
- package/src/execution/index.ts +0 -1
- package/src/execution/runner.ts +0 -1
- package/src/execution/sequential-executor.ts +15 -1
- package/src/pipeline/stages/acceptance-setup.ts +135 -0
- package/src/pipeline/stages/index.ts +7 -0
- package/src/pipeline/stages/prompt.ts +11 -4
- package/src/pipeline/types.ts +6 -0
- package/src/prompts/builder.ts +25 -6
- package/src/prompts/sections/conventions.ts +7 -1
- package/src/prompts/sections/isolation.ts +21 -10
- package/src/prompts/sections/role-task.ts +88 -15
- package/src/prompts/sections/story.ts +43 -1
- package/src/prompts/types.ts +1 -1
- package/src/routing/strategies/llm-prompts.ts +26 -28
- package/src/tdd/session-runner.ts +5 -0
- package/src/execution/prompts.ts +0 -127
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nathapp/nax",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "AI Coding Agent Orchestrator
|
|
3
|
+
"version": "0.40.0",
|
|
4
|
+
"description": "AI Coding Agent Orchestrator — loops until done",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"nax": "./dist/nax.js"
|
|
@@ -5,9 +5,18 @@
|
|
|
5
5
|
* via LLM call to the agent adapter.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
+
import { join } from "node:path";
|
|
9
|
+
import { ClaudeCodeAdapter } from "../agents/claude";
|
|
8
10
|
import type { AgentAdapter } from "../agents/types";
|
|
9
11
|
import { getLogger } from "../logger";
|
|
10
|
-
import type {
|
|
12
|
+
import type { UserStory } from "../prd/types";
|
|
13
|
+
import type {
|
|
14
|
+
AcceptanceCriterion,
|
|
15
|
+
AcceptanceTestResult,
|
|
16
|
+
GenerateAcceptanceTestsOptions,
|
|
17
|
+
GenerateFromPRDOptions,
|
|
18
|
+
RefinedCriterion,
|
|
19
|
+
} from "./types";
|
|
11
20
|
|
|
12
21
|
/**
|
|
13
22
|
* Parse acceptance criteria from spec.md content.
|
|
@@ -31,6 +40,93 @@ import type { AcceptanceCriterion, AcceptanceTestResult, GenerateAcceptanceTests
|
|
|
31
40
|
* // ]
|
|
32
41
|
* ```
|
|
33
42
|
*/
|
|
43
|
+
/**
|
|
44
|
+
* Injectable dependencies for generateFromPRD — allows tests to mock
|
|
45
|
+
* adapter.complete() and file writes without real binaries or disk I/O.
|
|
46
|
+
*
|
|
47
|
+
* @internal
|
|
48
|
+
*/
|
|
49
|
+
export const _generatorPRDDeps = {
|
|
50
|
+
adapter: new ClaudeCodeAdapter() as AgentAdapter,
|
|
51
|
+
writeFile: async (path: string, content: string): Promise<void> => {
|
|
52
|
+
await Bun.write(path, content);
|
|
53
|
+
},
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Generate acceptance tests from PRD UserStory[] and RefinedCriterion[].
|
|
58
|
+
*
|
|
59
|
+
* This is a stub — implementation is provided by the implementer session.
|
|
60
|
+
*
|
|
61
|
+
* @param stories - User stories from the PRD
|
|
62
|
+
* @param refinedCriteria - Refined criteria produced by the refinement module
|
|
63
|
+
* @param options - Generation options
|
|
64
|
+
* @returns Generated test code and processed criteria
|
|
65
|
+
*/
|
|
66
|
+
export async function generateFromPRD(
|
|
67
|
+
_stories: UserStory[],
|
|
68
|
+
refinedCriteria: RefinedCriterion[],
|
|
69
|
+
options: GenerateFromPRDOptions,
|
|
70
|
+
): Promise<AcceptanceTestResult> {
|
|
71
|
+
const logger = getLogger();
|
|
72
|
+
|
|
73
|
+
const criteria: AcceptanceCriterion[] = refinedCriteria.map((c, i) => ({
|
|
74
|
+
id: `AC-${i + 1}`,
|
|
75
|
+
text: c.refined,
|
|
76
|
+
lineNumber: i + 1,
|
|
77
|
+
}));
|
|
78
|
+
|
|
79
|
+
if (refinedCriteria.length === 0) {
|
|
80
|
+
return { testCode: "", criteria: [] };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const criteriaList = refinedCriteria.map((c, i) => `AC-${i + 1}: ${c.refined}`).join("\n");
|
|
84
|
+
|
|
85
|
+
const prompt = `You are a test engineer. Generate acceptance tests for the "${options.featureName}" feature based on the refined acceptance criteria below.
|
|
86
|
+
|
|
87
|
+
CODEBASE CONTEXT:
|
|
88
|
+
${options.codebaseContext}
|
|
89
|
+
|
|
90
|
+
ACCEPTANCE CRITERIA (refined):
|
|
91
|
+
${criteriaList}
|
|
92
|
+
|
|
93
|
+
Generate a complete acceptance.test.ts file using bun:test framework. Each AC maps to exactly one test named "AC-N: <description>".
|
|
94
|
+
|
|
95
|
+
Use this structure:
|
|
96
|
+
|
|
97
|
+
\`\`\`typescript
|
|
98
|
+
import { describe, test, expect } from "bun:test";
|
|
99
|
+
|
|
100
|
+
describe("${options.featureName} - Acceptance Tests", () => {
|
|
101
|
+
test("AC-1: <description>", async () => {
|
|
102
|
+
// Test implementation
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
\`\`\`
|
|
106
|
+
|
|
107
|
+
Respond with ONLY the TypeScript test code (no markdown code fences, no explanation).`;
|
|
108
|
+
|
|
109
|
+
logger.info("acceptance", "Generating tests from PRD refined criteria", { count: refinedCriteria.length });
|
|
110
|
+
|
|
111
|
+
const testCode = await _generatorPRDDeps.adapter.complete(prompt);
|
|
112
|
+
|
|
113
|
+
const refinedJsonContent = JSON.stringify(
|
|
114
|
+
refinedCriteria.map((c, i) => ({
|
|
115
|
+
acId: `AC-${i + 1}`,
|
|
116
|
+
original: c.original,
|
|
117
|
+
refined: c.refined,
|
|
118
|
+
testable: c.testable,
|
|
119
|
+
storyId: c.storyId,
|
|
120
|
+
})),
|
|
121
|
+
null,
|
|
122
|
+
2,
|
|
123
|
+
);
|
|
124
|
+
|
|
125
|
+
await _generatorPRDDeps.writeFile(join(options.workdir, "acceptance-refined.json"), refinedJsonContent);
|
|
126
|
+
|
|
127
|
+
return { testCode, criteria };
|
|
128
|
+
}
|
|
129
|
+
|
|
34
130
|
export function parseAcceptanceCriteria(specContent: string): AcceptanceCriterion[] {
|
|
35
131
|
const criteria: AcceptanceCriterion[] = [];
|
|
36
132
|
const lines = specContent.split("\n");
|
package/src/acceptance/index.ts
CHANGED
|
@@ -7,14 +7,26 @@
|
|
|
7
7
|
export type {
|
|
8
8
|
AcceptanceCriterion,
|
|
9
9
|
GenerateAcceptanceTestsOptions,
|
|
10
|
+
GenerateFromPRDOptions,
|
|
10
11
|
AcceptanceTestResult,
|
|
12
|
+
RefinedCriterion,
|
|
13
|
+
RefinementContext,
|
|
11
14
|
} from "./types";
|
|
12
15
|
|
|
16
|
+
export {
|
|
17
|
+
buildRefinementPrompt,
|
|
18
|
+
parseRefinementResponse,
|
|
19
|
+
refineAcceptanceCriteria,
|
|
20
|
+
_refineDeps,
|
|
21
|
+
} from "./refinement";
|
|
22
|
+
|
|
13
23
|
export {
|
|
14
24
|
parseAcceptanceCriteria,
|
|
15
25
|
buildAcceptanceTestPrompt,
|
|
16
26
|
generateAcceptanceTests,
|
|
27
|
+
generateFromPRD,
|
|
17
28
|
generateSkeletonTests,
|
|
29
|
+
_generatorPRDDeps,
|
|
18
30
|
} from "./generator";
|
|
19
31
|
|
|
20
32
|
export type {
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AC Refinement Module
|
|
3
|
+
*
|
|
4
|
+
* Takes raw PRD acceptanceCriteria strings and refines them into concrete,
|
|
5
|
+
* testable assertions using an LLM call via adapter.complete().
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { AgentAdapter } from "../agents";
|
|
9
|
+
import { ClaudeCodeAdapter } from "../agents/claude";
|
|
10
|
+
import { resolveModel } from "../config/schema";
|
|
11
|
+
import { getLogger } from "../logger";
|
|
12
|
+
import { errorMessage } from "../utils/errors";
|
|
13
|
+
import type { RefinedCriterion, RefinementContext } from "./types";
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Injectable dependencies — allows tests to mock adapter.complete()
|
|
17
|
+
* without needing the claude binary.
|
|
18
|
+
*
|
|
19
|
+
* @internal
|
|
20
|
+
*/
|
|
21
|
+
export const _refineDeps = {
|
|
22
|
+
adapter: new ClaudeCodeAdapter() as AgentAdapter,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Build the LLM prompt for refining acceptance criteria.
|
|
27
|
+
*
|
|
28
|
+
* @param criteria - Raw AC strings from PRD
|
|
29
|
+
* @param codebaseContext - File tree / dependency context
|
|
30
|
+
* @returns Formatted prompt string
|
|
31
|
+
*/
|
|
32
|
+
export function buildRefinementPrompt(criteria: string[], codebaseContext: string): string {
|
|
33
|
+
const criteriaList = criteria.map((c, i) => `${i + 1}. ${c}`).join("\n");
|
|
34
|
+
|
|
35
|
+
return `You are an acceptance criteria refinement assistant. Your task is to convert raw acceptance criteria into concrete, machine-verifiable assertions.
|
|
36
|
+
|
|
37
|
+
CODEBASE CONTEXT:
|
|
38
|
+
${codebaseContext}
|
|
39
|
+
|
|
40
|
+
ACCEPTANCE CRITERIA TO REFINE:
|
|
41
|
+
${criteriaList}
|
|
42
|
+
|
|
43
|
+
For each criterion, produce a refined version that is concrete and automatically testable where possible.
|
|
44
|
+
Respond with ONLY a JSON array (no markdown code fences):
|
|
45
|
+
[{
|
|
46
|
+
"original": "<exact original criterion text>",
|
|
47
|
+
"refined": "<concrete, machine-verifiable description>",
|
|
48
|
+
"testable": true,
|
|
49
|
+
"storyId": ""
|
|
50
|
+
}]
|
|
51
|
+
|
|
52
|
+
Rules:
|
|
53
|
+
- "original" must match the input criterion text exactly
|
|
54
|
+
- "refined" must be a concrete assertion (e.g., "Function returns array of length N", "HTTP status 200 returned")
|
|
55
|
+
- "testable" is false only if the criterion cannot be automatically verified (e.g., "UX feels responsive", "design looks good")
|
|
56
|
+
- "storyId" leave as empty string — it will be assigned by the caller
|
|
57
|
+
- Respond with ONLY the JSON array`;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Parse the LLM JSON response into RefinedCriterion[].
|
|
62
|
+
*
|
|
63
|
+
* Falls back gracefully: if JSON is malformed or a criterion is missing,
|
|
64
|
+
* uses the original text with testable: true.
|
|
65
|
+
*
|
|
66
|
+
* @param response - Raw LLM response text
|
|
67
|
+
* @param criteria - Original criteria strings (used as fallback)
|
|
68
|
+
* @returns Array of refined criteria
|
|
69
|
+
*/
|
|
70
|
+
export function parseRefinementResponse(response: string, criteria: string[]): RefinedCriterion[] {
|
|
71
|
+
if (!response || !response.trim()) {
|
|
72
|
+
return fallbackCriteria(criteria);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
try {
|
|
76
|
+
const parsed: unknown = JSON.parse(response);
|
|
77
|
+
|
|
78
|
+
if (!Array.isArray(parsed)) {
|
|
79
|
+
return fallbackCriteria(criteria);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return (parsed as RefinedCriterion[]).map((item, i) => ({
|
|
83
|
+
original: typeof item.original === "string" && item.original.length > 0 ? item.original : (criteria[i] ?? ""),
|
|
84
|
+
refined: typeof item.refined === "string" && item.refined.length > 0 ? item.refined : (criteria[i] ?? ""),
|
|
85
|
+
testable: typeof item.testable === "boolean" ? item.testable : true,
|
|
86
|
+
storyId: typeof item.storyId === "string" ? item.storyId : "",
|
|
87
|
+
}));
|
|
88
|
+
} catch {
|
|
89
|
+
return fallbackCriteria(criteria);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Refine raw acceptance criteria strings into concrete, testable assertions.
|
|
95
|
+
*
|
|
96
|
+
* @param criteria - Raw AC strings from PRD
|
|
97
|
+
* @param context - Refinement context (storyId, codebase context, config)
|
|
98
|
+
* @returns Promise resolving to array of refined criteria
|
|
99
|
+
*/
|
|
100
|
+
export async function refineAcceptanceCriteria(
|
|
101
|
+
criteria: string[],
|
|
102
|
+
context: RefinementContext,
|
|
103
|
+
): Promise<RefinedCriterion[]> {
|
|
104
|
+
if (criteria.length === 0) {
|
|
105
|
+
return [];
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const { storyId, codebaseContext, config } = context;
|
|
109
|
+
const logger = getLogger();
|
|
110
|
+
|
|
111
|
+
const modelTier = config.acceptance?.model ?? "fast";
|
|
112
|
+
const modelEntry = config.models[modelTier] ?? config.models.fast;
|
|
113
|
+
|
|
114
|
+
if (!modelEntry) {
|
|
115
|
+
throw new Error(`[refinement] config.models.${modelTier} not configured`);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const modelDef = resolveModel(modelEntry);
|
|
119
|
+
const prompt = buildRefinementPrompt(criteria, codebaseContext);
|
|
120
|
+
|
|
121
|
+
let response: string;
|
|
122
|
+
|
|
123
|
+
try {
|
|
124
|
+
response = await _refineDeps.adapter.complete(prompt, {
|
|
125
|
+
jsonMode: true,
|
|
126
|
+
maxTokens: 4096,
|
|
127
|
+
model: modelDef.model,
|
|
128
|
+
});
|
|
129
|
+
} catch (error) {
|
|
130
|
+
const reason = errorMessage(error);
|
|
131
|
+
logger.warn("refinement", "adapter.complete() failed, falling back to original criteria", {
|
|
132
|
+
storyId,
|
|
133
|
+
error: reason,
|
|
134
|
+
});
|
|
135
|
+
return fallbackCriteria(criteria, storyId);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const parsed = parseRefinementResponse(response, criteria);
|
|
139
|
+
|
|
140
|
+
return parsed.map((item) => ({
|
|
141
|
+
...item,
|
|
142
|
+
storyId: item.storyId || storyId,
|
|
143
|
+
}));
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Build fallback RefinedCriterion[] using original criterion text.
|
|
148
|
+
*/
|
|
149
|
+
function fallbackCriteria(criteria: string[], storyId = ""): RefinedCriterion[] {
|
|
150
|
+
return criteria.map((c) => ({
|
|
151
|
+
original: c,
|
|
152
|
+
refined: c,
|
|
153
|
+
testable: true,
|
|
154
|
+
storyId,
|
|
155
|
+
}));
|
|
156
|
+
}
|
package/src/acceptance/types.ts
CHANGED
|
@@ -6,6 +6,32 @@
|
|
|
6
6
|
|
|
7
7
|
import type { ModelDef, ModelTier, NaxConfig } from "../config/schema";
|
|
8
8
|
|
|
9
|
+
/**
|
|
10
|
+
* A single refined acceptance criterion produced by the refinement module.
|
|
11
|
+
*/
|
|
12
|
+
export interface RefinedCriterion {
|
|
13
|
+
/** The original criterion text from the PRD */
|
|
14
|
+
original: string;
|
|
15
|
+
/** Concrete, machine-verifiable description produced by LLM */
|
|
16
|
+
refined: string;
|
|
17
|
+
/** False if the LLM determines the criterion cannot be automatically tested */
|
|
18
|
+
testable: boolean;
|
|
19
|
+
/** The story ID this criterion belongs to */
|
|
20
|
+
storyId: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Context passed to refineAcceptanceCriteria.
|
|
25
|
+
*/
|
|
26
|
+
export interface RefinementContext {
|
|
27
|
+
/** Story ID for attribution on each RefinedCriterion */
|
|
28
|
+
storyId: string;
|
|
29
|
+
/** Codebase context string (file tree, dependencies, etc.) */
|
|
30
|
+
codebaseContext: string;
|
|
31
|
+
/** Global config — model tier resolved from config.acceptance.model */
|
|
32
|
+
config: NaxConfig;
|
|
33
|
+
}
|
|
34
|
+
|
|
9
35
|
/**
|
|
10
36
|
* A single acceptance criterion extracted from spec.md.
|
|
11
37
|
*
|
|
@@ -42,6 +68,24 @@ export interface AcceptanceCriterion {
|
|
|
42
68
|
* };
|
|
43
69
|
* ```
|
|
44
70
|
*/
|
|
71
|
+
/**
|
|
72
|
+
* Options for generating acceptance tests from PRD stories and refined criteria.
|
|
73
|
+
*/
|
|
74
|
+
export interface GenerateFromPRDOptions {
|
|
75
|
+
/** Feature name for context */
|
|
76
|
+
featureName: string;
|
|
77
|
+
/** Working directory for context scanning */
|
|
78
|
+
workdir: string;
|
|
79
|
+
/** Codebase context (file tree, dependencies, test patterns) */
|
|
80
|
+
codebaseContext: string;
|
|
81
|
+
/** Model tier to use for test generation */
|
|
82
|
+
modelTier: ModelTier;
|
|
83
|
+
/** Resolved model definition */
|
|
84
|
+
modelDef: ModelDef;
|
|
85
|
+
/** Global config for quality settings */
|
|
86
|
+
config: NaxConfig;
|
|
87
|
+
}
|
|
88
|
+
|
|
45
89
|
export interface GenerateAcceptanceTestsOptions {
|
|
46
90
|
/** Full spec.md content */
|
|
47
91
|
specContent: string;
|
|
@@ -105,11 +105,6 @@ async function classifyWithLLM(
|
|
|
105
105
|
scan: CodebaseScan,
|
|
106
106
|
config: NaxConfig,
|
|
107
107
|
): Promise<StoryClassification[]> {
|
|
108
|
-
// Check for required environment variables
|
|
109
|
-
if (!process.env.ANTHROPIC_API_KEY) {
|
|
110
|
-
throw new Error("ANTHROPIC_API_KEY environment variable not configured — cannot use LLM classification");
|
|
111
|
-
}
|
|
112
|
-
|
|
113
108
|
// Build prompt
|
|
114
109
|
const prompt = buildClassificationPrompt(stories, scan);
|
|
115
110
|
|
|
@@ -120,7 +115,7 @@ async function classifyWithLLM(
|
|
|
120
115
|
}
|
|
121
116
|
const modelDef = resolveModel(fastModelEntry);
|
|
122
117
|
|
|
123
|
-
// Make API call via adapter (
|
|
118
|
+
// Make API call via adapter (uses config.models.fast tier)
|
|
124
119
|
const jsonText = await _classifyDeps.adapter.complete(prompt, {
|
|
125
120
|
jsonMode: true,
|
|
126
121
|
maxTokens: 4096,
|
package/src/cli/prompts-tdd.ts
CHANGED
|
@@ -31,13 +31,23 @@ export async function handleThreeSessionTddPrompts(
|
|
|
31
31
|
.withLoader(ctx.workdir, ctx.config)
|
|
32
32
|
.story(story)
|
|
33
33
|
.context(ctx.contextMarkdown)
|
|
34
|
+
.constitution(ctx.constitution?.content)
|
|
35
|
+
.testCommand(ctx.config.quality?.commands?.test)
|
|
34
36
|
.build(),
|
|
35
37
|
PromptBuilder.for("implementer", { variant: "standard" })
|
|
36
38
|
.withLoader(ctx.workdir, ctx.config)
|
|
37
39
|
.story(story)
|
|
38
40
|
.context(ctx.contextMarkdown)
|
|
41
|
+
.constitution(ctx.constitution?.content)
|
|
42
|
+
.testCommand(ctx.config.quality?.commands?.test)
|
|
43
|
+
.build(),
|
|
44
|
+
PromptBuilder.for("verifier")
|
|
45
|
+
.withLoader(ctx.workdir, ctx.config)
|
|
46
|
+
.story(story)
|
|
47
|
+
.context(ctx.contextMarkdown)
|
|
48
|
+
.constitution(ctx.constitution?.content)
|
|
49
|
+
.testCommand(ctx.config.quality?.commands?.test)
|
|
39
50
|
.build(),
|
|
40
|
-
PromptBuilder.for("verifier").withLoader(ctx.workdir, ctx.config).story(story).context(ctx.contextMarkdown).build(),
|
|
41
51
|
]);
|
|
42
52
|
|
|
43
53
|
const sessions = [
|
package/src/config/defaults.ts
CHANGED
|
@@ -84,7 +84,43 @@ export const DEFAULT_CONFIG: NaxConfig = {
|
|
|
84
84
|
dangerouslySkipPermissions: true,
|
|
85
85
|
drainTimeoutMs: 2000,
|
|
86
86
|
shell: "/bin/sh",
|
|
87
|
-
stripEnvVars: [
|
|
87
|
+
stripEnvVars: [
|
|
88
|
+
// Agent detection markers
|
|
89
|
+
"CLAUDECODE",
|
|
90
|
+
"REPL_ID",
|
|
91
|
+
"AGENT",
|
|
92
|
+
// Source control tokens
|
|
93
|
+
"GITLAB_ACCESS_TOKEN",
|
|
94
|
+
"GITHUB_TOKEN",
|
|
95
|
+
"GITHUB_ACCESS_TOKEN",
|
|
96
|
+
"GH_TOKEN",
|
|
97
|
+
"CI_GIT_TOKEN",
|
|
98
|
+
"CI_JOB_TOKEN",
|
|
99
|
+
"BITBUCKET_ACCESS_TOKEN",
|
|
100
|
+
// Package registry tokens
|
|
101
|
+
"NPM_TOKEN",
|
|
102
|
+
"NPM_AUTH_TOKEN",
|
|
103
|
+
"YARN_NPM_AUTH_TOKEN",
|
|
104
|
+
// LLM API keys (agent gets these via allowlist in buildAllowedEnv; test runners don't need them)
|
|
105
|
+
"ANTHROPIC_API_KEY",
|
|
106
|
+
"OPENAI_API_KEY",
|
|
107
|
+
"GEMINI_API_KEY",
|
|
108
|
+
"COHERE_API_KEY",
|
|
109
|
+
// Cloud / infra credentials
|
|
110
|
+
"AWS_ACCESS_KEY_ID",
|
|
111
|
+
"AWS_SECRET_ACCESS_KEY",
|
|
112
|
+
"AWS_SESSION_TOKEN",
|
|
113
|
+
"GOOGLE_APPLICATION_CREDENTIALS",
|
|
114
|
+
"GCLOUD_SERVICE_KEY",
|
|
115
|
+
"AZURE_CLIENT_SECRET",
|
|
116
|
+
"AZURE_TENANT_ID",
|
|
117
|
+
// CI secrets
|
|
118
|
+
"TELEGRAM_BOT_TOKEN",
|
|
119
|
+
"SLACK_TOKEN",
|
|
120
|
+
"SLACK_WEBHOOK_URL",
|
|
121
|
+
"SENTRY_AUTH_TOKEN",
|
|
122
|
+
"DATADOG_API_KEY",
|
|
123
|
+
],
|
|
88
124
|
environmentalEscalationDivisor: 2,
|
|
89
125
|
},
|
|
90
126
|
tdd: {
|
|
@@ -127,6 +163,9 @@ export const DEFAULT_CONFIG: NaxConfig = {
|
|
|
127
163
|
maxRetries: 2,
|
|
128
164
|
generateTests: true,
|
|
129
165
|
testPath: "acceptance.test.ts",
|
|
166
|
+
model: "fast" as const,
|
|
167
|
+
refinement: true,
|
|
168
|
+
redGate: true,
|
|
130
169
|
},
|
|
131
170
|
context: {
|
|
132
171
|
fileInjection: "disabled",
|
|
@@ -238,6 +238,12 @@ export interface AcceptanceConfig {
|
|
|
238
238
|
generateTests: boolean;
|
|
239
239
|
/** Path to acceptance test file (relative to feature directory) */
|
|
240
240
|
testPath: string;
|
|
241
|
+
/** Model tier for AC refinement LLM calls (default: "fast") */
|
|
242
|
+
model: ModelTier;
|
|
243
|
+
/** Whether to LLM-refine acceptance criteria before generating tests (default: true) */
|
|
244
|
+
refinement: boolean;
|
|
245
|
+
/** Whether to run RED gate check after generating acceptance tests (default: true) */
|
|
246
|
+
redGate: boolean;
|
|
241
247
|
}
|
|
242
248
|
|
|
243
249
|
/** Optimizer config (v0.10) */
|
|
@@ -387,7 +393,9 @@ export interface RoutingConfig {
|
|
|
387
393
|
|
|
388
394
|
/** Prompt overrides config (PB-003) */
|
|
389
395
|
export interface PromptsConfig {
|
|
390
|
-
overrides?: Partial<
|
|
396
|
+
overrides?: Partial<
|
|
397
|
+
Record<"test-writer" | "implementer" | "verifier" | "single-session" | "tdd-simple" | "batch", string>
|
|
398
|
+
>;
|
|
391
399
|
}
|
|
392
400
|
|
|
393
401
|
/** Decompose config (SD-003) */
|
package/src/config/schemas.ts
CHANGED
|
@@ -127,7 +127,39 @@ const QualityConfigSchema = z.object({
|
|
|
127
127
|
gracePeriodMs: z.number().int().min(500).max(30000).default(5000),
|
|
128
128
|
drainTimeoutMs: z.number().int().min(0).max(10000).default(2000),
|
|
129
129
|
shell: z.string().default("/bin/sh"),
|
|
130
|
-
stripEnvVars: z
|
|
130
|
+
stripEnvVars: z
|
|
131
|
+
.array(z.string())
|
|
132
|
+
.default([
|
|
133
|
+
"CLAUDECODE",
|
|
134
|
+
"REPL_ID",
|
|
135
|
+
"AGENT",
|
|
136
|
+
"GITLAB_ACCESS_TOKEN",
|
|
137
|
+
"GITHUB_TOKEN",
|
|
138
|
+
"GITHUB_ACCESS_TOKEN",
|
|
139
|
+
"GH_TOKEN",
|
|
140
|
+
"CI_GIT_TOKEN",
|
|
141
|
+
"CI_JOB_TOKEN",
|
|
142
|
+
"BITBUCKET_ACCESS_TOKEN",
|
|
143
|
+
"NPM_TOKEN",
|
|
144
|
+
"NPM_AUTH_TOKEN",
|
|
145
|
+
"YARN_NPM_AUTH_TOKEN",
|
|
146
|
+
"ANTHROPIC_API_KEY",
|
|
147
|
+
"OPENAI_API_KEY",
|
|
148
|
+
"GEMINI_API_KEY",
|
|
149
|
+
"COHERE_API_KEY",
|
|
150
|
+
"AWS_ACCESS_KEY_ID",
|
|
151
|
+
"AWS_SECRET_ACCESS_KEY",
|
|
152
|
+
"AWS_SESSION_TOKEN",
|
|
153
|
+
"GOOGLE_APPLICATION_CREDENTIALS",
|
|
154
|
+
"GCLOUD_SERVICE_KEY",
|
|
155
|
+
"AZURE_CLIENT_SECRET",
|
|
156
|
+
"AZURE_TENANT_ID",
|
|
157
|
+
"TELEGRAM_BOT_TOKEN",
|
|
158
|
+
"SLACK_TOKEN",
|
|
159
|
+
"SLACK_WEBHOOK_URL",
|
|
160
|
+
"SENTRY_AUTH_TOKEN",
|
|
161
|
+
"DATADOG_API_KEY",
|
|
162
|
+
]),
|
|
131
163
|
environmentalEscalationDivisor: z.number().min(1).max(10).default(2),
|
|
132
164
|
});
|
|
133
165
|
|
|
@@ -183,6 +215,9 @@ const AcceptanceConfigSchema = z.object({
|
|
|
183
215
|
maxRetries: z.number().int().nonnegative(),
|
|
184
216
|
generateTests: z.boolean(),
|
|
185
217
|
testPath: z.string().min(1, "acceptance.testPath must be non-empty"),
|
|
218
|
+
model: z.enum(["fast", "balanced", "powerful"]).default("fast"),
|
|
219
|
+
refinement: z.boolean().default(true),
|
|
220
|
+
redGate: z.boolean().default(true),
|
|
186
221
|
});
|
|
187
222
|
|
|
188
223
|
const TestCoverageConfigSchema = z.object({
|
package/src/execution/index.ts
CHANGED
|
@@ -2,7 +2,6 @@ export type { RunOptions, RunResult } from "./runner";
|
|
|
2
2
|
export { run } from "./runner";
|
|
3
3
|
export type { FailureCategory } from "../tdd/types";
|
|
4
4
|
export { appendProgress } from "./progress";
|
|
5
|
-
export { buildSingleSessionPrompt, buildBatchPrompt } from "./prompts";
|
|
6
5
|
export { groupStoriesIntoBatches, type StoryBatch } from "./batching";
|
|
7
6
|
export { escalateTier, getTierConfig, calculateMaxIterations } from "./escalation";
|
|
8
7
|
export { readQueueFile, clearQueueFile } from "./queue-handler";
|
package/src/execution/runner.ts
CHANGED
|
@@ -247,6 +247,5 @@ export async function run(options: RunOptions): Promise<RunResult> {
|
|
|
247
247
|
}
|
|
248
248
|
|
|
249
249
|
// Re-exports for backward compatibility with existing test imports
|
|
250
|
-
export { buildSingleSessionPrompt, buildBatchPrompt } from "./prompts";
|
|
251
250
|
export { groupStoriesIntoBatches, type StoryBatch } from "./batching";
|
|
252
251
|
export { escalateTier } from "./escalation";
|
|
@@ -5,7 +5,7 @@ import { getSafeLogger } from "../logger";
|
|
|
5
5
|
import type { StoryMetrics } from "../metrics";
|
|
6
6
|
import { pipelineEventBus } from "../pipeline/event-bus";
|
|
7
7
|
import { runPipeline } from "../pipeline/runner";
|
|
8
|
-
import { postRunPipeline } from "../pipeline/stages";
|
|
8
|
+
import { postRunPipeline, preRunPipeline } from "../pipeline/stages";
|
|
9
9
|
import { wireEventsWriter } from "../pipeline/subscribers/events-writer";
|
|
10
10
|
import { wireHooks } from "../pipeline/subscribers/hooks";
|
|
11
11
|
import { wireInteraction } from "../pipeline/subscribers/interaction";
|
|
@@ -68,6 +68,20 @@ export async function executeSequential(
|
|
|
68
68
|
);
|
|
69
69
|
|
|
70
70
|
try {
|
|
71
|
+
// Pre-run pipeline (acceptance test setup with RED gate)
|
|
72
|
+
logger?.info("execution", "Running pre-run pipeline (acceptance test setup)");
|
|
73
|
+
const preRunCtx: PipelineContext = {
|
|
74
|
+
config: ctx.config,
|
|
75
|
+
prd,
|
|
76
|
+
workdir: ctx.workdir,
|
|
77
|
+
featureDir: ctx.featureDir,
|
|
78
|
+
story: prd.userStories[0],
|
|
79
|
+
stories: prd.userStories,
|
|
80
|
+
routing: { complexity: "simple", modelTier: "fast", testStrategy: "test-after", reasoning: "" },
|
|
81
|
+
hooks: ctx.hooks,
|
|
82
|
+
};
|
|
83
|
+
await runPipeline(preRunPipeline, preRunCtx, ctx.eventEmitter);
|
|
84
|
+
|
|
71
85
|
while (iterations < ctx.config.execution.maxIterations) {
|
|
72
86
|
iterations++;
|
|
73
87
|
if (Math.round(process.memoryUsage().heapUsed / 1024 / 1024) > 1024)
|