@nyxa/nyx-agent 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -5
- package/dist/cli.js +3 -1
- package/dist/commands/init.js +53 -29
- package/dist/config/schema.js +45 -4
- package/dist/runtime/prompts.js +50 -17
- package/dist/runtime/runPipeline.js +501 -102
- package/dist/runtime/schemas.js +108 -22
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,7 +11,8 @@ For every run NyxAgent:
|
|
|
11
11
|
confirm the proposed checklist.
|
|
12
12
|
2. For each selected issue, in an isolated git **worktree**:
|
|
13
13
|
- **implements** it (the agent — the only customizable prompt),
|
|
14
|
-
- optionally **reviews**
|
|
14
|
+
- optionally **reviews** it in bounded discovery rounds, then revises only
|
|
15
|
+
verified blockers with locked validation,
|
|
15
16
|
- **commits** the change (the engine, deterministically).
|
|
16
17
|
3. Optionally runs a **global review** across the whole run.
|
|
17
18
|
4. **Pushes** the run branch and **opens one pull request** (the engine).
|
|
@@ -45,7 +46,7 @@ nyxagent update # self-update to the latest published version
|
|
|
45
46
|
"model": "gpt-5.5",
|
|
46
47
|
"reasoning_effort": "medium",
|
|
47
48
|
"review": "each",
|
|
48
|
-
"
|
|
49
|
+
"review_rounds": { "each": 1, "global": 1 },
|
|
49
50
|
"tracker": { "type": "github", "repo": "owner/repo" },
|
|
50
51
|
"base_branch": "main",
|
|
51
52
|
"max_iterations": 5
|
|
@@ -54,12 +55,19 @@ nyxagent update # self-update to the latest published version
|
|
|
54
55
|
|
|
55
56
|
- `harness`: `codex` or `claude` (override per run with `--harness`).
|
|
56
57
|
- `review`: `each` (per task), `all` (global only), `both`, or `none`.
|
|
57
|
-
- `
|
|
58
|
+
- `review_rounds.each`: fresh per-task discovery rounds (default 1).
|
|
59
|
+
- `review_rounds.global`: fresh global discovery rounds (default 1).
|
|
60
|
+
- `review_max_attempts`: deprecated; accepted for old configs with a warning, but
|
|
61
|
+
ignored by the review loop.
|
|
62
|
+
- `agents.execution`, `agents.review`, `agents.global_review`, and
|
|
63
|
+
`agents.global_review.roles.<role>` can override `harness`, `model`, and
|
|
64
|
+
`reasoning_effort` for specialized phases. Global review roles are
|
|
65
|
+
`diff-contract`, `integration`, `domain-invariants`, and `tests-validation`.
|
|
58
66
|
- `base_branch`: optional; defaults to the current branch at run time.
|
|
59
67
|
|
|
60
|
-
If a run fails review
|
|
68
|
+
If a run fails review validation but has already produced
|
|
61
69
|
commits, NyxAgent pushes the branch and opens a **draft** pull request with the
|
|
62
|
-
unresolved
|
|
70
|
+
unresolved blockers, so the work is never stranded on an orphaned branch.
|
|
63
71
|
|
|
64
72
|
## Requirements
|
|
65
73
|
|
package/dist/cli.js
CHANGED
|
@@ -20,7 +20,9 @@ program
|
|
|
20
20
|
.option("--model <name>", "model name")
|
|
21
21
|
.option("--reasoning-effort <level>", "reasoning effort (default: medium)")
|
|
22
22
|
.option("--review <mode>", "review strategy: each, all, both, or none")
|
|
23
|
-
.option("--review-
|
|
23
|
+
.option("--review-rounds-each <count>", "per-work-item review discovery rounds (default: 1)")
|
|
24
|
+
.option("--review-rounds-global <count>", "global review discovery rounds (default: 1)")
|
|
25
|
+
.option("--review-attempts <count>", "deprecated alias for both review round counts")
|
|
24
26
|
.option("--repo <owner/repo>", "GitHub repository")
|
|
25
27
|
.option("--base-branch <branch>", "base branch (default: current branch)")
|
|
26
28
|
.option("--max-iterations <count>", "maximum work items per run")
|
package/dist/commands/init.js
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { input, number as numberPrompt, select } from "@inquirer/prompts";
|
|
4
4
|
import pc from "picocolors";
|
|
5
|
-
import { harnessNames, reviewModes } from "../config/schema.js";
|
|
6
|
-
import { ensureDir, pathExists, readText, writeText } from "../runtime/files.js";
|
|
5
|
+
import { harnessNames, reviewModes, } from "../config/schema.js";
|
|
6
|
+
import { ensureDir, pathExists, readText, writeText, } from "../runtime/files.js";
|
|
7
7
|
import { getNyxDir, relativeToProject } from "../runtime/paths.js";
|
|
8
8
|
import { EXECUTION_PROMPT_FILE } from "../runtime/prompts.js";
|
|
9
9
|
const DEFAULT_CODEX_MODEL = "gpt-5.5";
|
|
@@ -17,7 +17,7 @@ const GITIGNORE_ENTRIES = [
|
|
|
17
17
|
".nyxagent/state.json",
|
|
18
18
|
".nyxagent/config.json",
|
|
19
19
|
".nyxagent/config.toml",
|
|
20
|
-
".nyxagent/prompts/"
|
|
20
|
+
".nyxagent/prompts/",
|
|
21
21
|
];
|
|
22
22
|
export async function initCommand(options, projectRoot = process.cwd()) {
|
|
23
23
|
const root = path.resolve(projectRoot);
|
|
@@ -46,14 +46,14 @@ async function resolveInitOptions(options) {
|
|
|
46
46
|
message: "Default harness",
|
|
47
47
|
choices: [
|
|
48
48
|
{ name: "codex", value: "codex" },
|
|
49
|
-
{ name: "claude", value: "claude" }
|
|
50
|
-
]
|
|
49
|
+
{ name: "claude", value: "claude" },
|
|
50
|
+
],
|
|
51
51
|
});
|
|
52
52
|
const model = options.model ??
|
|
53
53
|
(await input({
|
|
54
54
|
message: "Model",
|
|
55
55
|
default: harness === "codex" ? DEFAULT_CODEX_MODEL : "",
|
|
56
|
-
validate: (value) => value.trim().length > 0 || "Model is required"
|
|
56
|
+
validate: (value) => value.trim().length > 0 || "Model is required",
|
|
57
57
|
}));
|
|
58
58
|
const reasoning_effort = options.reasoningEffort ??
|
|
59
59
|
(await input({ message: "Reasoning effort", default: "medium" }));
|
|
@@ -65,34 +65,27 @@ async function resolveInitOptions(options) {
|
|
|
65
65
|
{ name: "After each task", value: "each" },
|
|
66
66
|
{ name: "After all tasks (global review)", value: "all" },
|
|
67
67
|
{ name: "Both per-task and global", value: "both" },
|
|
68
|
-
{ name: "No review", value: "none" }
|
|
68
|
+
{ name: "No review", value: "none" },
|
|
69
69
|
],
|
|
70
|
-
default: "each"
|
|
70
|
+
default: "each",
|
|
71
71
|
});
|
|
72
|
-
const
|
|
73
|
-
|
|
74
|
-
:
|
|
75
|
-
(await numberPrompt({
|
|
76
|
-
message: "Max review attempts per stage",
|
|
77
|
-
default: 4,
|
|
78
|
-
required: true
|
|
79
|
-
}));
|
|
80
|
-
if (!Number.isInteger(review_max_attempts) || review_max_attempts <= 0) {
|
|
81
|
-
throw new Error("review attempts must be a positive integer");
|
|
82
|
-
}
|
|
83
|
-
const repo = options.repo ?? (await input({ message: "GitHub repository (owner/repo)" }));
|
|
72
|
+
const review_rounds = await resolveReviewRounds(options, review);
|
|
73
|
+
const repo = options.repo ??
|
|
74
|
+
(await input({ message: "GitHub repository (owner/repo)" }));
|
|
84
75
|
validateRepository(repo);
|
|
85
76
|
const baseBranchInput = options.baseBranch ??
|
|
86
77
|
(await input({
|
|
87
78
|
message: "Base branch (blank = current branch at run time)",
|
|
88
|
-
default: ""
|
|
79
|
+
default: "",
|
|
89
80
|
}));
|
|
90
|
-
const base_branch = baseBranchInput.trim()
|
|
81
|
+
const base_branch = baseBranchInput.trim()
|
|
82
|
+
? baseBranchInput.trim()
|
|
83
|
+
: undefined;
|
|
91
84
|
const max_iterations = parseMaxIterations(options.maxIterations) ??
|
|
92
85
|
(await numberPrompt({
|
|
93
86
|
message: "Max work items per run",
|
|
94
87
|
default: 5,
|
|
95
|
-
required: true
|
|
88
|
+
required: true,
|
|
96
89
|
}));
|
|
97
90
|
if (!Number.isInteger(max_iterations) || max_iterations <= 0) {
|
|
98
91
|
throw new Error("max iterations must be a positive integer");
|
|
@@ -102,10 +95,10 @@ async function resolveInitOptions(options) {
|
|
|
102
95
|
model: model.trim(),
|
|
103
96
|
reasoning_effort: reasoning_effort.trim() || "medium",
|
|
104
97
|
review,
|
|
105
|
-
|
|
98
|
+
review_rounds,
|
|
106
99
|
repo,
|
|
107
100
|
base_branch,
|
|
108
|
-
max_iterations
|
|
101
|
+
max_iterations,
|
|
109
102
|
};
|
|
110
103
|
}
|
|
111
104
|
function buildConfig(options) {
|
|
@@ -115,11 +108,11 @@ function buildConfig(options) {
|
|
|
115
108
|
reasoning_effort: options.reasoning_effort,
|
|
116
109
|
review: options.review,
|
|
117
110
|
tracker: { type: "github", repo: options.repo },
|
|
118
|
-
max_iterations: options.max_iterations
|
|
111
|
+
max_iterations: options.max_iterations,
|
|
119
112
|
};
|
|
120
|
-
// No point persisting
|
|
113
|
+
// No point persisting review rounds when reviews are disabled.
|
|
121
114
|
if (options.review !== "none") {
|
|
122
|
-
config.
|
|
115
|
+
config.review_rounds = options.review_rounds;
|
|
123
116
|
}
|
|
124
117
|
if (options.base_branch) {
|
|
125
118
|
config.base_branch = options.base_branch;
|
|
@@ -149,7 +142,38 @@ function parseMaxIterations(value) {
|
|
|
149
142
|
}
|
|
150
143
|
return Number.parseInt(value, 10);
|
|
151
144
|
}
|
|
152
|
-
function
|
|
145
|
+
async function resolveReviewRounds(options, review) {
|
|
146
|
+
if (review === "none") {
|
|
147
|
+
return { each: 1, global: 1 };
|
|
148
|
+
}
|
|
149
|
+
const deprecatedAttempts = parsePositiveInteger(options.reviewAttempts);
|
|
150
|
+
const each = parsePositiveInteger(options.reviewRoundsEach) ??
|
|
151
|
+
deprecatedAttempts ??
|
|
152
|
+
(review === "each" || review === "both"
|
|
153
|
+
? await numberPrompt({
|
|
154
|
+
message: "Review rounds per work item",
|
|
155
|
+
default: 1,
|
|
156
|
+
required: true,
|
|
157
|
+
})
|
|
158
|
+
: 1);
|
|
159
|
+
const global = parsePositiveInteger(options.reviewRoundsGlobal) ??
|
|
160
|
+
deprecatedAttempts ??
|
|
161
|
+
(review === "all" || review === "both"
|
|
162
|
+
? await numberPrompt({
|
|
163
|
+
message: "Global review rounds",
|
|
164
|
+
default: 1,
|
|
165
|
+
required: true,
|
|
166
|
+
})
|
|
167
|
+
: 1);
|
|
168
|
+
if (!Number.isInteger(each) || each <= 0) {
|
|
169
|
+
throw new Error("review_rounds.each must be a positive integer");
|
|
170
|
+
}
|
|
171
|
+
if (!Number.isInteger(global) || global <= 0) {
|
|
172
|
+
throw new Error("review_rounds.global must be a positive integer");
|
|
173
|
+
}
|
|
174
|
+
return { each, global };
|
|
175
|
+
}
|
|
176
|
+
function parsePositiveInteger(value) {
|
|
153
177
|
if (value === undefined) {
|
|
154
178
|
return undefined;
|
|
155
179
|
}
|
package/dist/config/schema.js
CHANGED
|
@@ -7,7 +7,37 @@ import { z } from "zod";
|
|
|
7
7
|
*/
|
|
8
8
|
export const harnessNames = ["codex", "claude"];
|
|
9
9
|
export const reviewModes = ["each", "all", "both", "none"];
|
|
10
|
+
export const globalReviewRoles = [
|
|
11
|
+
"diff-contract",
|
|
12
|
+
"integration",
|
|
13
|
+
"domain-invariants",
|
|
14
|
+
"tests-validation",
|
|
15
|
+
];
|
|
10
16
|
const githubRepositoryPattern = /^[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+$/;
|
|
17
|
+
const reviewRoundsSchema = z
|
|
18
|
+
.object({
|
|
19
|
+
each: z.number().int().positive().default(1),
|
|
20
|
+
global: z.number().int().positive().default(1),
|
|
21
|
+
})
|
|
22
|
+
.default({ each: 1, global: 1 });
|
|
23
|
+
const agentOverrideSchema = z
|
|
24
|
+
.object({
|
|
25
|
+
harness: z.enum(harnessNames).optional(),
|
|
26
|
+
model: z.string().min(1).optional(),
|
|
27
|
+
reasoning_effort: z.string().min(1).optional(),
|
|
28
|
+
})
|
|
29
|
+
.strict();
|
|
30
|
+
const globalReviewAgentOverrideSchema = agentOverrideSchema.extend({
|
|
31
|
+
roles: z
|
|
32
|
+
.object({
|
|
33
|
+
"diff-contract": agentOverrideSchema.optional(),
|
|
34
|
+
integration: agentOverrideSchema.optional(),
|
|
35
|
+
"domain-invariants": agentOverrideSchema.optional(),
|
|
36
|
+
"tests-validation": agentOverrideSchema.optional(),
|
|
37
|
+
})
|
|
38
|
+
.strict()
|
|
39
|
+
.optional(),
|
|
40
|
+
});
|
|
11
41
|
export const nyxConfigSchema = z
|
|
12
42
|
.object({
|
|
13
43
|
/** Which agent CLI runs each phase. Overridable per run via `run --harness`. */
|
|
@@ -18,18 +48,29 @@ export const nyxConfigSchema = z
|
|
|
18
48
|
reasoning_effort: z.string().min(1).default("medium"),
|
|
19
49
|
/** When the agent reviews its own work. */
|
|
20
50
|
review: z.enum(reviewModes).default("each"),
|
|
21
|
-
/** How many
|
|
22
|
-
|
|
51
|
+
/** How many fresh discovery rounds each review stage may run. */
|
|
52
|
+
review_rounds: reviewRoundsSchema,
|
|
53
|
+
/** Deprecated: accepted for existing configs, but no longer drives reviews. */
|
|
54
|
+
review_max_attempts: z.number().int().positive().optional(),
|
|
55
|
+
/** Optional agent overrides by phase and global-review role. */
|
|
56
|
+
agents: z
|
|
57
|
+
.object({
|
|
58
|
+
execution: agentOverrideSchema.optional(),
|
|
59
|
+
review: agentOverrideSchema.optional(),
|
|
60
|
+
global_review: globalReviewAgentOverrideSchema.optional(),
|
|
61
|
+
})
|
|
62
|
+
.strict()
|
|
63
|
+
.optional(),
|
|
23
64
|
/** Work item tracker. GitHub issues only in this version. */
|
|
24
65
|
tracker: z.object({
|
|
25
66
|
type: z.literal("github"),
|
|
26
67
|
repo: z
|
|
27
68
|
.string()
|
|
28
|
-
.regex(githubRepositoryPattern, 'tracker.repo must be "owner/repo"')
|
|
69
|
+
.regex(githubRepositoryPattern, 'tracker.repo must be "owner/repo"'),
|
|
29
70
|
}),
|
|
30
71
|
/** Base branch the run branch is cut from. Defaults to the current branch. */
|
|
31
72
|
base_branch: z.string().min(1).optional(),
|
|
32
73
|
/** Maximum work items processed in a single run. */
|
|
33
|
-
max_iterations: z.number().int().positive().default(5)
|
|
74
|
+
max_iterations: z.number().int().positive().default(5),
|
|
34
75
|
})
|
|
35
76
|
.strict();
|
package/dist/runtime/prompts.js
CHANGED
|
@@ -23,39 +23,72 @@ test, implement the smallest change that satisfies it, then tidy the result.
|
|
|
23
23
|
|
|
24
24
|
Do not commit and do not touch git — NyxAgent commits your changes for you. Leave
|
|
25
25
|
clear validation evidence (commands run and their results) in your final response.`;
|
|
26
|
-
export const REVIEW_PROMPT = `
|
|
26
|
+
export const REVIEW_PROMPT = `Discover findings in the implementation of the selected work item.
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
anything.
|
|
28
|
+
Use the review-context artifact paths in the context above. Inspect the patch file,
|
|
29
|
+
diffstat, changed-files list, and the working directory as needed. Stay read-only
|
|
30
|
+
and do not modify anything.
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
validation evidence, design fit, and
|
|
32
|
+
This is discovery for the current review round only. Assess alignment with the work
|
|
33
|
+
item, correctness and regression risk, test or validation evidence, design fit, and
|
|
34
|
+
security or data-safety concerns.
|
|
34
35
|
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
Put only must-fix issues in blockers. Put missing or weak validation in test_gaps,
|
|
37
|
+
non-blocking concerns in advisory_findings, uncertain suspicions in
|
|
38
|
+
uncertain_findings, and explicitly refuted candidates in rejected_findings.`;
|
|
39
|
+
export const REVIEW_CHALLENGE_PROMPT = `Challenge the proposed blockers for the selected work item.
|
|
40
|
+
|
|
41
|
+
Stay read-only. Try to refute each proposed blocker using the current code,
|
|
42
|
+
review-context artifacts, and concrete evidence. Return only blockers that remain
|
|
43
|
+
valid and actionable. Move false positives or already-satisfied findings to
|
|
44
|
+
rejected_findings with evidence. Do not introduce new findings in this phase.`;
|
|
37
45
|
export const REVISION_PROMPT = `Apply the changes requested by the review for the selected work item.
|
|
38
46
|
|
|
39
|
-
The
|
|
40
|
-
the work focused. Do not commit — NyxAgent commits your changes for you.`;
|
|
47
|
+
The verified blockers are listed in the context above. Address exactly those,
|
|
48
|
+
keeping the work focused. Do not commit — NyxAgent commits your changes for you.`;
|
|
49
|
+
export const REVIEW_VALIDATION_PROMPT = `Validate the correction for the previously verified blockers.
|
|
50
|
+
|
|
51
|
+
Stay read-only. Validate only the blockers listed in the context above. Do not run a
|
|
52
|
+
new review and do not introduce unrelated new findings. For each blocker, return one
|
|
53
|
+
status: resolved, unresolved, false_positive, or regression_from_correction.
|
|
54
|
+
|
|
55
|
+
Use regression_from_correction only when the correction itself directly created a
|
|
56
|
+
new blocker and the evidence proves that causal link.`;
|
|
41
57
|
export const GLOBAL_REVIEW_PROMPT = `Review the entire run as a whole, now that every selected work item is implemented
|
|
42
58
|
and committed.
|
|
43
59
|
|
|
44
|
-
|
|
45
|
-
|
|
60
|
+
Use the review-context artifact paths in the context above. Inspect the patch file,
|
|
61
|
+
diffstat, changed-files list, commit list, and the working directory as needed. Stay
|
|
62
|
+
read-only and do not modify anything.
|
|
46
63
|
|
|
47
64
|
Focus on cross-cutting concerns a per-item review cannot see: integration between
|
|
48
65
|
items, regressions one item introduced in another, overall design coherence,
|
|
49
66
|
duplication, and gaps versus the issues' intent.
|
|
50
67
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
68
|
+
Return typed findings. Put only must-fix issues in blockers. Put missing or weak
|
|
69
|
+
validation in test_gaps, non-blocking concerns in advisory_findings, uncertain
|
|
70
|
+
suspicions in uncertain_findings, and explicitly refuted candidates in
|
|
71
|
+
rejected_findings.`;
|
|
72
|
+
export const GLOBAL_REVIEW_CHALLENGE_PROMPT = `Challenge the aggregated global-review blockers.
|
|
73
|
+
|
|
74
|
+
Stay read-only. Try to refute each proposed blocker using the current code,
|
|
75
|
+
review-context artifacts, and concrete evidence. Return only blockers that remain
|
|
76
|
+
valid and actionable. Move false positives or already-satisfied findings to
|
|
77
|
+
rejected_findings with evidence. Do not introduce new findings in this phase.`;
|
|
54
78
|
export const GLOBAL_REVISION_PROMPT = `Apply the changes requested by the global review of the whole run.
|
|
55
79
|
|
|
56
|
-
The
|
|
80
|
+
The verified blockers are listed in the context above. Address exactly those, across
|
|
57
81
|
whichever work items are affected. Do not commit — NyxAgent commits your corrections
|
|
58
82
|
for you.`;
|
|
83
|
+
export const GLOBAL_REVIEW_VALIDATION_PROMPT = `Validate the global review correction for the previously verified blockers.
|
|
84
|
+
|
|
85
|
+
Stay read-only. Validate only the blockers listed in the context above. Do not run a
|
|
86
|
+
new global review and do not introduce unrelated new findings. For each blocker,
|
|
87
|
+
return one status: resolved, unresolved, false_positive, or
|
|
88
|
+
regression_from_correction.
|
|
89
|
+
|
|
90
|
+
Use regression_from_correction only when the correction itself directly created a
|
|
91
|
+
new blocker and the evidence proves that causal link.`;
|
|
59
92
|
/** Rendered into .nyxagent/prompts/execution.md at init; the only editable prompt. */
|
|
60
93
|
export const EXECUTION_PROMPT_FILE = `${EXECUTION_PROMPT}
|
|
61
94
|
`;
|
|
@@ -88,7 +121,7 @@ export function buildPhasePrompt(input) {
|
|
|
88
121
|
"",
|
|
89
122
|
"## Instructions",
|
|
90
123
|
"",
|
|
91
|
-
input.guidance.trim()
|
|
124
|
+
input.guidance.trim(),
|
|
92
125
|
];
|
|
93
126
|
if (input.schema) {
|
|
94
127
|
parts.push("", "## Required result", "", "End your response with a single <nyxagent_result> block containing JSON that", "matches this schema. NyxAgent parses the last such block, validates it, and", "ignores everything else for control flow.", "", "```json", JSON.stringify(input.schema, null, 2), "```", "", "<nyxagent_result>", "{ ... }", "</nyxagent_result>");
|
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
|
+
import { execa } from "execa";
|
|
2
3
|
import { loadConfig } from "../config/loadConfig.js";
|
|
3
|
-
import {
|
|
4
|
+
import { globalReviewRoles, } from "../config/schema.js";
|
|
5
|
+
import { ensureDir, pathExists, readText, writeText } from "./files.js";
|
|
4
6
|
import { deleteBranch, removeRunWorktree, setUpRunWorktree, } from "./gitLifecycle.js";
|
|
5
7
|
import { markWorkItemCompleted, readWorkItemLedger, writeWorkItemLedger, } from "./ledger.js";
|
|
6
8
|
import { getNyxDir, relativeToProject } from "./paths.js";
|
|
7
|
-
import { buildContextBlock, buildPhasePrompt, EXECUTION_PROMPT, GLOBAL_REVIEW_PROMPT, GLOBAL_REVISION_PROMPT, REVIEW_PROMPT, REVISION_PROMPT, SELECTION_PROMPT,
|
|
9
|
+
import { buildContextBlock, buildPhasePrompt, EXECUTION_PROMPT, GLOBAL_REVIEW_CHALLENGE_PROMPT, GLOBAL_REVIEW_PROMPT, GLOBAL_REVIEW_VALIDATION_PROMPT, GLOBAL_REVISION_PROMPT, REVIEW_CHALLENGE_PROMPT, REVIEW_PROMPT, REVIEW_VALIDATION_PROMPT, REVISION_PROMPT, SELECTION_PROMPT, } from "./prompts.js";
|
|
8
10
|
import { createRunReporter } from "./reporter.js";
|
|
9
11
|
import { runAgentPhase, } from "./runPhase.js";
|
|
10
|
-
import { GLOBAL_REVIEW_SCHEMA,
|
|
12
|
+
import { REVIEW_CHALLENGE_SCHEMA, REVIEW_DISCOVERY_SCHEMA, GLOBAL_REVIEW_SCHEMA, REVIEW_VALIDATION_SCHEMA, SELECTION_SCHEMA, } from "./schemas.js";
|
|
11
13
|
import { commitAll, commitsAhead, createPullRequest, pushBranch, rangeDiff, stageAllAndDiff, } from "./scm.js";
|
|
12
14
|
import { confirmWorkItemSelection, } from "./selectionConfirmation.js";
|
|
13
15
|
import { createRunId } from "./time.js";
|
|
14
16
|
import { filterAvailable, listGitHubIssues, resolveSelectedQueue, } from "./workItems.js";
|
|
15
17
|
const MAX_CANDIDATES = 50;
|
|
16
18
|
const EXCERPT_CHARS = 800;
|
|
19
|
+
const CORRECTION_VALIDATION_MAX_ATTEMPTS = 3;
|
|
17
20
|
export function defaultPipelineDependencies() {
|
|
18
21
|
return {
|
|
19
22
|
listIssues: listGitHubIssues,
|
|
@@ -37,16 +40,22 @@ export async function runPipeline(input = {}, deps = defaultPipelineDependencies
|
|
|
37
40
|
const nyxDir = getNyxDir(projectRoot);
|
|
38
41
|
const configPath = input.configPath ?? path.join(nyxDir, "config.json");
|
|
39
42
|
const config = await loadConfig(configPath);
|
|
40
|
-
const
|
|
43
|
+
const baseAgent = resolveAgentProfile({
|
|
44
|
+
config,
|
|
45
|
+
cliHarness: input.harness,
|
|
46
|
+
});
|
|
41
47
|
const runId = createRunId();
|
|
42
48
|
const runDir = path.join(nyxDir, "runs", runId);
|
|
43
49
|
await ensureDir(runDir);
|
|
44
50
|
const reporter = input.reporter ?? createRunReporter({ verbose: input.verbose ?? false });
|
|
45
51
|
reporter.heading(`NyxAgent run ${runId}`);
|
|
46
|
-
reporter.info(`Harness: ${harness} · model: ${
|
|
52
|
+
reporter.info(`Harness: ${baseAgent.harness} · model: ${baseAgent.model} · review: ${config.review}`);
|
|
47
53
|
reporter.detail(`Config: ${relativeToProject(projectRoot, configPath)}`);
|
|
48
54
|
reporter.detail(`Artifacts: ${relativeToProject(projectRoot, runDir)}`);
|
|
49
55
|
reporter.detail(`Tracker: ${config.tracker.repo}`);
|
|
56
|
+
if (config.review_max_attempts !== undefined) {
|
|
57
|
+
reporter.warn("review_max_attempts is deprecated and ignored; use review_rounds.each/global instead.");
|
|
58
|
+
}
|
|
50
59
|
const ledger = await readWorkItemLedger(nyxDir);
|
|
51
60
|
reporter.detail(`Completed work items already in ledger: ${ledger.completed_work_item_keys.length}`);
|
|
52
61
|
// 1. Selection runs read-only in the main checkout, before any branch exists.
|
|
@@ -66,7 +75,7 @@ export async function runPipeline(input = {}, deps = defaultPipelineDependencies
|
|
|
66
75
|
const proposed = await runSelection({
|
|
67
76
|
projectRoot,
|
|
68
77
|
runDir,
|
|
69
|
-
harness,
|
|
78
|
+
cliHarness: input.harness,
|
|
70
79
|
config,
|
|
71
80
|
candidates,
|
|
72
81
|
runPhase: deps.runPhase,
|
|
@@ -114,7 +123,7 @@ export async function runPipeline(input = {}, deps = defaultPipelineDependencies
|
|
|
114
123
|
item,
|
|
115
124
|
guidance: executionGuidance,
|
|
116
125
|
git,
|
|
117
|
-
harness,
|
|
126
|
+
cliHarness: input.harness,
|
|
118
127
|
config,
|
|
119
128
|
runPhase: deps.runPhase,
|
|
120
129
|
reporter,
|
|
@@ -124,9 +133,9 @@ export async function runPipeline(input = {}, deps = defaultPipelineDependencies
|
|
|
124
133
|
iterationDir,
|
|
125
134
|
item,
|
|
126
135
|
git,
|
|
127
|
-
harness,
|
|
136
|
+
cliHarness: input.harness,
|
|
128
137
|
config,
|
|
129
|
-
|
|
138
|
+
rounds: config.review_rounds.each,
|
|
130
139
|
runPhase: deps.runPhase,
|
|
131
140
|
reporter,
|
|
132
141
|
});
|
|
@@ -153,9 +162,10 @@ export async function runPipeline(input = {}, deps = defaultPipelineDependencies
|
|
|
153
162
|
const corrections = await runGlobalReviewLoop({
|
|
154
163
|
runDir,
|
|
155
164
|
git,
|
|
156
|
-
|
|
165
|
+
completed,
|
|
166
|
+
cliHarness: input.harness,
|
|
157
167
|
config,
|
|
158
|
-
|
|
168
|
+
rounds: config.review_rounds.global,
|
|
159
169
|
runPhase: deps.runPhase,
|
|
160
170
|
reporter,
|
|
161
171
|
});
|
|
@@ -257,6 +267,10 @@ async function salvageFailedRun(input) {
|
|
|
257
267
|
}
|
|
258
268
|
}
|
|
259
269
|
async function runSelection(input) {
|
|
270
|
+
const agent = resolveAgentProfile({
|
|
271
|
+
config: input.config,
|
|
272
|
+
cliHarness: input.cliHarness,
|
|
273
|
+
});
|
|
260
274
|
const context = buildContextBlock([
|
|
261
275
|
["Repository", input.config.tracker.repo],
|
|
262
276
|
["Max work items this run", input.config.max_iterations],
|
|
@@ -275,9 +289,9 @@ async function runSelection(input) {
|
|
|
275
289
|
phaseId: "selection",
|
|
276
290
|
phaseDir: path.join(input.runDir, "selection"),
|
|
277
291
|
workdir: input.projectRoot,
|
|
278
|
-
harness:
|
|
279
|
-
model:
|
|
280
|
-
reasoning:
|
|
292
|
+
harness: agent.harness,
|
|
293
|
+
model: agent.model,
|
|
294
|
+
reasoning: agent.reasoning_effort,
|
|
281
295
|
capability: "readonly",
|
|
282
296
|
prompt: buildPhasePrompt({
|
|
283
297
|
guidance: SELECTION_PROMPT,
|
|
@@ -304,6 +318,11 @@ async function runSelection(input) {
|
|
|
304
318
|
return resolved.queue;
|
|
305
319
|
}
|
|
306
320
|
async function runExecution(input) {
|
|
321
|
+
const agent = resolveAgentProfile({
|
|
322
|
+
config: input.config,
|
|
323
|
+
cliHarness: input.cliHarness,
|
|
324
|
+
phase: "execution",
|
|
325
|
+
});
|
|
307
326
|
const context = buildContextBlock([
|
|
308
327
|
["Work item", workItemSummary(input.item)],
|
|
309
328
|
["Issue description", input.item.excerpt ?? "(no description provided)"],
|
|
@@ -314,9 +333,9 @@ async function runExecution(input) {
|
|
|
314
333
|
phaseId: "execution",
|
|
315
334
|
phaseDir: path.join(input.iterationDir, "execution"),
|
|
316
335
|
workdir: input.git.worktree,
|
|
317
|
-
harness:
|
|
318
|
-
model:
|
|
319
|
-
reasoning:
|
|
336
|
+
harness: agent.harness,
|
|
337
|
+
model: agent.model,
|
|
338
|
+
reasoning: agent.reasoning_effort,
|
|
320
339
|
capability: "write",
|
|
321
340
|
prompt: buildPhasePrompt({ guidance: input.guidance, context }),
|
|
322
341
|
reporter: input.reporter,
|
|
@@ -326,54 +345,275 @@ async function runExecution(input) {
|
|
|
326
345
|
}
|
|
327
346
|
}
|
|
328
347
|
async function runReviewLoop(input) {
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
348
|
+
const agent = resolveAgentProfile({
|
|
349
|
+
config: input.config,
|
|
350
|
+
cliHarness: input.cliHarness,
|
|
351
|
+
phase: "review",
|
|
352
|
+
});
|
|
353
|
+
const validationHistory = [];
|
|
354
|
+
for (let round = 1; round <= input.rounds; round += 1) {
|
|
355
|
+
const roundDir = path.join(input.iterationDir, `review-round-${round}`);
|
|
356
|
+
const discoveryPack = await createReviewContextPack({
|
|
357
|
+
dir: path.join(roundDir, "discovery", "review-context"),
|
|
358
|
+
git: input.git,
|
|
359
|
+
scope: "item",
|
|
360
|
+
workItems: [input.item],
|
|
361
|
+
validations: validationHistory,
|
|
362
|
+
});
|
|
363
|
+
const discoveryResult = await input.runPhase({
|
|
332
364
|
phaseId: "review",
|
|
333
|
-
phaseDir: path.join(
|
|
365
|
+
phaseDir: path.join(roundDir, "discovery"),
|
|
334
366
|
workdir: input.git.worktree,
|
|
335
|
-
harness:
|
|
336
|
-
model:
|
|
337
|
-
reasoning:
|
|
367
|
+
harness: agent.harness,
|
|
368
|
+
model: agent.model,
|
|
369
|
+
reasoning: agent.reasoning_effort,
|
|
338
370
|
capability: "readonly",
|
|
339
371
|
prompt: buildPhasePrompt({
|
|
340
372
|
guidance: REVIEW_PROMPT,
|
|
341
373
|
context: buildContextBlock([
|
|
342
374
|
["Work item", workItemSummary(input.item)],
|
|
343
|
-
[
|
|
344
|
-
|
|
345
|
-
truncateForPrompt(diff || "(no changes)"),
|
|
346
|
-
],
|
|
375
|
+
["Review round", round],
|
|
376
|
+
["Review context", reviewContextSummary(discoveryPack)],
|
|
347
377
|
]),
|
|
348
|
-
schema:
|
|
378
|
+
schema: REVIEW_DISCOVERY_SCHEMA,
|
|
349
379
|
}),
|
|
350
|
-
schema:
|
|
380
|
+
schema: REVIEW_DISCOVERY_SCHEMA,
|
|
351
381
|
reporter: input.reporter,
|
|
352
382
|
});
|
|
353
|
-
if (!
|
|
354
|
-
throw new Error(
|
|
383
|
+
if (!discoveryResult.ok) {
|
|
384
|
+
throw new Error(discoveryResult.error);
|
|
355
385
|
}
|
|
356
|
-
const
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
386
|
+
const discovery = discoveryResult.result;
|
|
387
|
+
const proposedBlockers = normalizeFindings(discovery.blockers);
|
|
388
|
+
input.reporter.info(` review round ${round}: ${proposedBlockers.length} proposed blocker(s)`);
|
|
389
|
+
const challenge = await runReviewChallenge({
|
|
390
|
+
phaseId: "review_challenge",
|
|
391
|
+
phaseDir: path.join(roundDir, "challenge"),
|
|
392
|
+
workdir: input.git.worktree,
|
|
393
|
+
agent,
|
|
394
|
+
runPhase: input.runPhase,
|
|
395
|
+
reporter: input.reporter,
|
|
396
|
+
guidance: REVIEW_CHALLENGE_PROMPT,
|
|
397
|
+
contextEntries: [
|
|
398
|
+
["Work item", workItemSummary(input.item)],
|
|
399
|
+
["Review round", round],
|
|
400
|
+
["Review context", reviewContextSummary(discoveryPack)],
|
|
401
|
+
["Proposed blockers", proposedBlockers],
|
|
402
|
+
["Rejected findings from discovery", discovery.rejected_findings ?? []],
|
|
403
|
+
],
|
|
404
|
+
});
|
|
405
|
+
input.reporter.info(` review round ${round}: ${challenge.blockers.length} verified blocker(s)`);
|
|
406
|
+
await runCorrectionValidationLoop({
|
|
407
|
+
scope: "item",
|
|
408
|
+
roundDir,
|
|
409
|
+
git: input.git,
|
|
410
|
+
workItems: [input.item],
|
|
411
|
+
validationHistory,
|
|
412
|
+
blockers: challenge.blockers,
|
|
413
|
+
agent,
|
|
414
|
+
runPhase: input.runPhase,
|
|
415
|
+
reporter: input.reporter,
|
|
416
|
+
revisionPhaseId: "revision",
|
|
417
|
+
validationPhaseId: "review_validation",
|
|
418
|
+
revisionGuidance: REVISION_PROMPT,
|
|
419
|
+
validationGuidance: REVIEW_VALIDATION_PROMPT,
|
|
420
|
+
failureMessage: (blockers) => `Review for #${input.item.number} has unresolved blocker(s) after ${CORRECTION_VALIDATION_MAX_ATTEMPTS} correction validation attempts:${formatBlockers(blockers)}`,
|
|
421
|
+
});
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
async function runGlobalReviewLoop(input) {
|
|
425
|
+
let committedCorrections = false;
|
|
426
|
+
const validationHistory = [];
|
|
427
|
+
for (let round = 1; round <= input.rounds; round += 1) {
|
|
428
|
+
const roundDir = path.join(input.runDir, "final", `global-round-${round}`);
|
|
429
|
+
const discoveryPack = await createReviewContextPack({
|
|
430
|
+
dir: path.join(roundDir, "discovery", "review-context"),
|
|
431
|
+
git: input.git,
|
|
432
|
+
scope: "global",
|
|
433
|
+
workItems: input.completed,
|
|
434
|
+
validations: validationHistory,
|
|
435
|
+
});
|
|
436
|
+
const discoveries = [];
|
|
437
|
+
for (const role of globalReviewRoles) {
|
|
438
|
+
const roleAgent = resolveAgentProfile({
|
|
439
|
+
config: input.config,
|
|
440
|
+
cliHarness: input.cliHarness,
|
|
441
|
+
phase: "global_review",
|
|
442
|
+
role,
|
|
443
|
+
});
|
|
444
|
+
const reviewResult = await input.runPhase({
|
|
445
|
+
phaseId: "global_review",
|
|
446
|
+
phaseDir: path.join(roundDir, "discovery", role),
|
|
447
|
+
workdir: input.git.worktree,
|
|
448
|
+
harness: roleAgent.harness,
|
|
449
|
+
model: roleAgent.model,
|
|
450
|
+
reasoning: roleAgent.reasoning_effort,
|
|
451
|
+
capability: "readonly",
|
|
452
|
+
prompt: buildPhasePrompt({
|
|
453
|
+
guidance: buildGlobalReviewGuidance(role),
|
|
454
|
+
context: buildContextBlock([
|
|
455
|
+
["Run branch", `${input.git.branch} (base ${input.git.base})`],
|
|
456
|
+
["Review round", round],
|
|
457
|
+
["Reviewer role", role],
|
|
458
|
+
["Review context", reviewContextSummary(discoveryPack)],
|
|
459
|
+
]),
|
|
460
|
+
schema: GLOBAL_REVIEW_SCHEMA,
|
|
461
|
+
}),
|
|
462
|
+
schema: GLOBAL_REVIEW_SCHEMA,
|
|
463
|
+
reporter: input.reporter,
|
|
464
|
+
});
|
|
465
|
+
if (!reviewResult.ok) {
|
|
466
|
+
throw new Error(reviewResult.error);
|
|
467
|
+
}
|
|
468
|
+
discoveries.push(reviewResult.result);
|
|
360
469
|
}
|
|
361
|
-
|
|
362
|
-
|
|
470
|
+
const aggregated = aggregateDiscoveries(discoveries);
|
|
471
|
+
input.reporter.info(`global review round ${round}: ${aggregated.blockers.length} proposed blocker(s)`);
|
|
472
|
+
const challengeAgent = resolveAgentProfile({
|
|
473
|
+
config: input.config,
|
|
474
|
+
cliHarness: input.cliHarness,
|
|
475
|
+
phase: "global_review",
|
|
476
|
+
});
|
|
477
|
+
const challenge = await runReviewChallenge({
|
|
478
|
+
phaseId: "global_review_challenge",
|
|
479
|
+
phaseDir: path.join(roundDir, "challenge"),
|
|
480
|
+
workdir: input.git.worktree,
|
|
481
|
+
agent: challengeAgent,
|
|
482
|
+
runPhase: input.runPhase,
|
|
483
|
+
reporter: input.reporter,
|
|
484
|
+
guidance: GLOBAL_REVIEW_CHALLENGE_PROMPT,
|
|
485
|
+
contextEntries: [
|
|
486
|
+
["Run branch", `${input.git.branch} (base ${input.git.base})`],
|
|
487
|
+
["Review round", round],
|
|
488
|
+
["Review context", reviewContextSummary(discoveryPack)],
|
|
489
|
+
["Aggregated proposed blockers", aggregated.blockers],
|
|
490
|
+
["Aggregated rejected findings", aggregated.rejected_findings],
|
|
491
|
+
],
|
|
492
|
+
});
|
|
493
|
+
input.reporter.info(`global review round ${round}: ${challenge.blockers.length} verified blocker(s)`);
|
|
494
|
+
const roundCommittedCorrections = await runCorrectionValidationLoop({
|
|
495
|
+
scope: "global",
|
|
496
|
+
roundDir,
|
|
497
|
+
git: input.git,
|
|
498
|
+
workItems: input.completed,
|
|
499
|
+
validationHistory,
|
|
500
|
+
blockers: challenge.blockers,
|
|
501
|
+
agent: challengeAgent,
|
|
502
|
+
runPhase: input.runPhase,
|
|
503
|
+
reporter: input.reporter,
|
|
504
|
+
revisionPhaseId: "global_revision",
|
|
505
|
+
validationPhaseId: "global_review_validation",
|
|
506
|
+
revisionGuidance: GLOBAL_REVISION_PROMPT,
|
|
507
|
+
validationGuidance: GLOBAL_REVIEW_VALIDATION_PROMPT,
|
|
508
|
+
commitMessage: "Apply global review corrections",
|
|
509
|
+
failureMessage: (blockers) => `Global review has unresolved blocker(s) after ${CORRECTION_VALIDATION_MAX_ATTEMPTS} correction validation attempts:${formatBlockers(blockers)}`,
|
|
510
|
+
});
|
|
511
|
+
if (roundCommittedCorrections) {
|
|
512
|
+
committedCorrections = true;
|
|
363
513
|
}
|
|
514
|
+
}
|
|
515
|
+
return committedCorrections;
|
|
516
|
+
}
|
|
517
|
+
function resolveAgentProfile(input) {
|
|
518
|
+
const profile = {
|
|
519
|
+
harness: input.cliHarness ?? input.config.harness,
|
|
520
|
+
model: input.config.model,
|
|
521
|
+
reasoning_effort: input.config.reasoning_effort,
|
|
522
|
+
};
|
|
523
|
+
const phaseOverride = input.phase
|
|
524
|
+
? phaseAgentOverride(input.config, input.phase)
|
|
525
|
+
: undefined;
|
|
526
|
+
const roleOverride = input.role
|
|
527
|
+
? input.config.agents?.global_review?.roles?.[input.role]
|
|
528
|
+
: undefined;
|
|
529
|
+
return applyAgentOverride(applyAgentOverride(profile, phaseOverride), roleOverride);
|
|
530
|
+
}
|
|
531
|
+
function phaseAgentOverride(config, phase) {
|
|
532
|
+
if (phase === "execution") {
|
|
533
|
+
return config.agents?.execution;
|
|
534
|
+
}
|
|
535
|
+
if (phase === "review") {
|
|
536
|
+
return config.agents?.review;
|
|
537
|
+
}
|
|
538
|
+
return config.agents?.global_review;
|
|
539
|
+
}
|
|
540
|
+
function applyAgentOverride(profile, override) {
|
|
541
|
+
if (!override) {
|
|
542
|
+
return profile;
|
|
543
|
+
}
|
|
544
|
+
return {
|
|
545
|
+
harness: override.harness ?? profile.harness,
|
|
546
|
+
model: override.model ?? profile.model,
|
|
547
|
+
reasoning_effort: override.reasoning_effort ?? profile.reasoning_effort,
|
|
548
|
+
};
|
|
549
|
+
}
|
|
550
|
+
function buildGlobalReviewGuidance(role) {
|
|
551
|
+
const focus = {
|
|
552
|
+
"diff-contract": "Focus on the public contract of the diff: APIs, CLI behavior, schemas, config compatibility, and generated artifacts.",
|
|
553
|
+
integration: "Focus on integration across touched modules, phase sequencing, artifact paths, and cross-item behavior.",
|
|
554
|
+
"domain-invariants": "Focus on NyxAgent workflow invariants: engine-owned git side effects, read-only review phases, closed pipeline control flow, and review semantics.",
|
|
555
|
+
"tests-validation": "Focus on test coverage, validation evidence, failure modes, and whether the committed changes are demonstrably safe.",
|
|
556
|
+
};
|
|
557
|
+
return `${GLOBAL_REVIEW_PROMPT}\n\nRole focus (${role}): ${focus[role]}`;
|
|
558
|
+
}
|
|
559
|
+
async function runReviewChallenge(input) {
|
|
560
|
+
const result = await input.runPhase({
|
|
561
|
+
phaseId: input.phaseId,
|
|
562
|
+
phaseDir: input.phaseDir,
|
|
563
|
+
workdir: input.workdir,
|
|
564
|
+
harness: input.agent.harness,
|
|
565
|
+
model: input.agent.model,
|
|
566
|
+
reasoning: input.agent.reasoning_effort,
|
|
567
|
+
capability: "readonly",
|
|
568
|
+
prompt: buildPhasePrompt({
|
|
569
|
+
guidance: input.guidance,
|
|
570
|
+
context: buildContextBlock(input.contextEntries),
|
|
571
|
+
schema: REVIEW_CHALLENGE_SCHEMA,
|
|
572
|
+
}),
|
|
573
|
+
schema: REVIEW_CHALLENGE_SCHEMA,
|
|
574
|
+
reporter: input.reporter,
|
|
575
|
+
});
|
|
576
|
+
if (!result.ok) {
|
|
577
|
+
throw new Error(result.error);
|
|
578
|
+
}
|
|
579
|
+
const challenge = result.result;
|
|
580
|
+
return {
|
|
581
|
+
...challenge,
|
|
582
|
+
blockers: normalizeFindings(challenge.blockers),
|
|
583
|
+
rejected_findings: normalizeFindings(challenge.rejected_findings),
|
|
584
|
+
};
|
|
585
|
+
}
|
|
586
|
+
async function runCorrectionValidationLoop(input) {
|
|
587
|
+
let pending = normalizeFindings(input.blockers);
|
|
588
|
+
let committedCorrections = false;
|
|
589
|
+
if (pending.length === 0) {
|
|
590
|
+
return committedCorrections;
|
|
591
|
+
}
|
|
592
|
+
for (let attempt = 1; attempt <= CORRECTION_VALIDATION_MAX_ATTEMPTS; attempt += 1) {
|
|
593
|
+
const revisionPack = await createReviewContextPack({
|
|
594
|
+
dir: path.join(input.roundDir, `${input.revisionPhaseId}-${attempt}`, "review-context"),
|
|
595
|
+
git: input.git,
|
|
596
|
+
scope: input.scope,
|
|
597
|
+
workItems: input.workItems,
|
|
598
|
+
validations: input.validationHistory,
|
|
599
|
+
});
|
|
364
600
|
const revision = await input.runPhase({
|
|
365
|
-
phaseId:
|
|
366
|
-
phaseDir: path.join(input.
|
|
601
|
+
phaseId: input.revisionPhaseId,
|
|
602
|
+
phaseDir: path.join(input.roundDir, `${input.revisionPhaseId}-${attempt}`),
|
|
367
603
|
workdir: input.git.worktree,
|
|
368
|
-
harness: input.harness,
|
|
369
|
-
model: input.
|
|
370
|
-
reasoning: input.
|
|
604
|
+
harness: input.agent.harness,
|
|
605
|
+
model: input.agent.model,
|
|
606
|
+
reasoning: input.agent.reasoning_effort,
|
|
371
607
|
capability: "write",
|
|
372
608
|
prompt: buildPhasePrompt({
|
|
373
|
-
guidance:
|
|
609
|
+
guidance: input.revisionGuidance,
|
|
374
610
|
context: buildContextBlock([
|
|
375
|
-
["
|
|
376
|
-
[
|
|
611
|
+
["Review context", reviewContextSummary(revisionPack)],
|
|
612
|
+
[
|
|
613
|
+
"Correction attempt",
|
|
614
|
+
`${attempt}/${CORRECTION_VALIDATION_MAX_ATTEMPTS}`,
|
|
615
|
+
],
|
|
616
|
+
["Verified blockers", pending],
|
|
377
617
|
]),
|
|
378
618
|
}),
|
|
379
619
|
reporter: input.reporter,
|
|
@@ -381,74 +621,233 @@ async function runReviewLoop(input) {
|
|
|
381
621
|
if (!revision.ok) {
|
|
382
622
|
throw new Error(revision.error);
|
|
383
623
|
}
|
|
384
|
-
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
624
|
+
if (input.commitMessage) {
|
|
625
|
+
const { committed } = await commitAll({
|
|
626
|
+
cwd: input.git.worktree,
|
|
627
|
+
message: input.commitMessage,
|
|
628
|
+
});
|
|
629
|
+
if (committed) {
|
|
630
|
+
committedCorrections = true;
|
|
631
|
+
input.reporter.detail("Committed global review corrections.");
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
const validationPack = await createReviewContextPack({
|
|
635
|
+
dir: path.join(input.roundDir, `${input.validationPhaseId}-${attempt}`, "review-context"),
|
|
636
|
+
git: input.git,
|
|
637
|
+
scope: input.scope,
|
|
638
|
+
workItems: input.workItems,
|
|
639
|
+
validations: input.validationHistory,
|
|
640
|
+
});
|
|
641
|
+
const validationResult = await input.runPhase({
|
|
642
|
+
phaseId: input.validationPhaseId,
|
|
643
|
+
phaseDir: path.join(input.roundDir, `${input.validationPhaseId}-${attempt}`),
|
|
393
644
|
workdir: input.git.worktree,
|
|
394
|
-
harness: input.harness,
|
|
395
|
-
model: input.
|
|
396
|
-
reasoning: input.
|
|
645
|
+
harness: input.agent.harness,
|
|
646
|
+
model: input.agent.model,
|
|
647
|
+
reasoning: input.agent.reasoning_effort,
|
|
397
648
|
capability: "readonly",
|
|
398
649
|
prompt: buildPhasePrompt({
|
|
399
|
-
guidance:
|
|
650
|
+
guidance: input.validationGuidance,
|
|
400
651
|
context: buildContextBlock([
|
|
401
|
-
["
|
|
652
|
+
["Review context", reviewContextSummary(validationPack)],
|
|
402
653
|
[
|
|
403
|
-
"
|
|
404
|
-
|
|
654
|
+
"Correction attempt",
|
|
655
|
+
`${attempt}/${CORRECTION_VALIDATION_MAX_ATTEMPTS}`,
|
|
405
656
|
],
|
|
657
|
+
["Validated blockers", pending],
|
|
406
658
|
]),
|
|
407
|
-
schema:
|
|
659
|
+
schema: REVIEW_VALIDATION_SCHEMA,
|
|
408
660
|
}),
|
|
409
|
-
schema:
|
|
661
|
+
schema: REVIEW_VALIDATION_SCHEMA,
|
|
410
662
|
reporter: input.reporter,
|
|
411
663
|
});
|
|
412
|
-
if (!
|
|
413
|
-
throw new Error(
|
|
664
|
+
if (!validationResult.ok) {
|
|
665
|
+
throw new Error(validationResult.error);
|
|
414
666
|
}
|
|
415
|
-
const
|
|
416
|
-
|
|
417
|
-
|
|
667
|
+
const validation = validationResult.result;
|
|
668
|
+
const validations = normalizeValidations(validation.validations);
|
|
669
|
+
input.validationHistory.push(...validations);
|
|
670
|
+
pending = blockersNeedingCorrection(pending, validations);
|
|
671
|
+
input.reporter.info(`${input.scope} validation attempt ${attempt}: ${pending.length} unresolved blocker(s)`);
|
|
672
|
+
if (pending.length === 0) {
|
|
418
673
|
return committedCorrections;
|
|
419
674
|
}
|
|
420
|
-
|
|
421
|
-
|
|
675
|
+
}
|
|
676
|
+
throw new Error(input.failureMessage(pending));
|
|
677
|
+
}
|
|
678
|
+
async function createReviewContextPack(input) {
|
|
679
|
+
await ensureDir(input.dir);
|
|
680
|
+
const patch = input.scope === "item"
|
|
681
|
+
? await stageAllAndDiff(input.git.worktree)
|
|
682
|
+
: await rangeDiff(input.git.worktree, input.git.base);
|
|
683
|
+
const diffstat = input.scope === "item"
|
|
684
|
+
? await gitOutput(input.git.worktree, ["diff", "--cached", "--stat"], "diff --cached --stat")
|
|
685
|
+
: await gitOutput(input.git.worktree, ["diff", "--stat", `${input.git.base}..HEAD`], "diff --stat range");
|
|
686
|
+
const files = input.scope === "item"
|
|
687
|
+
? await gitOutput(input.git.worktree, ["diff", "--cached", "--name-only"], "diff --cached --name-only")
|
|
688
|
+
: await gitOutput(input.git.worktree, ["diff", "--name-only", `${input.git.base}..HEAD`], "diff --name-only range");
|
|
689
|
+
const commits = await gitOutput(input.git.worktree, ["log", "--oneline", `${input.git.base}..HEAD`], "log range");
|
|
690
|
+
const pack = {
|
|
691
|
+
dir: input.dir,
|
|
692
|
+
summaryPath: path.join(input.dir, "summary.md"),
|
|
693
|
+
patchPath: path.join(input.dir, "combined.patch"),
|
|
694
|
+
diffstatPath: path.join(input.dir, "diffstat.txt"),
|
|
695
|
+
commitsPath: path.join(input.dir, "commits.txt"),
|
|
696
|
+
filesPath: path.join(input.dir, "modified-files.txt"),
|
|
697
|
+
issuesPath: path.join(input.dir, "issues.json"),
|
|
698
|
+
validationsPath: path.join(input.dir, "validations.json"),
|
|
699
|
+
};
|
|
700
|
+
await writeText(pack.patchPath, textOrPlaceholder(patch, "(no changes)"));
|
|
701
|
+
await writeText(pack.diffstatPath, textOrPlaceholder(diffstat, "(no diffstat)"));
|
|
702
|
+
await writeText(pack.commitsPath, textOrPlaceholder(commits, "(no commits yet)"));
|
|
703
|
+
await writeText(pack.filesPath, textOrPlaceholder(files, "(no modified files)"));
|
|
704
|
+
await writeText(pack.issuesPath, `${JSON.stringify((input.workItems ?? []).map(workItemSummary), null, 2)}\n`);
|
|
705
|
+
await writeText(pack.validationsPath, `${JSON.stringify(input.validations, null, 2)}\n`);
|
|
706
|
+
await writeText(pack.summaryPath, [
|
|
707
|
+
"# NyxAgent review context",
|
|
708
|
+
"",
|
|
709
|
+
`Scope: ${input.scope}`,
|
|
710
|
+
`Branch: ${input.git.branch}`,
|
|
711
|
+
`Base: ${input.git.base}`,
|
|
712
|
+
"",
|
|
713
|
+
"Artifacts:",
|
|
714
|
+
`- combined patch: ${pack.patchPath}`,
|
|
715
|
+
`- diffstat: ${pack.diffstatPath}`,
|
|
716
|
+
`- modified files: ${pack.filesPath}`,
|
|
717
|
+
`- commits: ${pack.commitsPath}`,
|
|
718
|
+
`- issues: ${pack.issuesPath}`,
|
|
719
|
+
`- validations: ${pack.validationsPath}`,
|
|
720
|
+
"",
|
|
721
|
+
"Inspect these files, or run the corresponding git commands in the working directory.",
|
|
722
|
+
].join("\n"));
|
|
723
|
+
return pack;
|
|
724
|
+
}
|
|
725
|
+
async function gitOutput(cwd, args, label) {
|
|
726
|
+
const result = await execa("git", args, { cwd, reject: false });
|
|
727
|
+
if (result.exitCode !== 0) {
|
|
728
|
+
const detail = (result.stderr || result.stdout || "unknown error").trim();
|
|
729
|
+
throw new Error(`git ${label} failed: ${detail}`);
|
|
730
|
+
}
|
|
731
|
+
return result.stdout;
|
|
732
|
+
}
|
|
733
|
+
function reviewContextSummary(pack) {
|
|
734
|
+
return {
|
|
735
|
+
directory: pack.dir,
|
|
736
|
+
summary: pack.summaryPath,
|
|
737
|
+
combined_patch: pack.patchPath,
|
|
738
|
+
diffstat: pack.diffstatPath,
|
|
739
|
+
modified_files: pack.filesPath,
|
|
740
|
+
commits: pack.commitsPath,
|
|
741
|
+
issues: pack.issuesPath,
|
|
742
|
+
validations: pack.validationsPath,
|
|
743
|
+
};
|
|
744
|
+
}
|
|
745
|
+
function aggregateDiscoveries(discoveries) {
|
|
746
|
+
return {
|
|
747
|
+
summary: discoveries
|
|
748
|
+
.map((discovery) => discovery.summary)
|
|
749
|
+
.filter((summary) => Boolean(summary))
|
|
750
|
+
.join("\n"),
|
|
751
|
+
blockers: dedupeFindings(discoveries.flatMap((discovery) => normalizeFindings(discovery.blockers))),
|
|
752
|
+
test_gaps: dedupeFindings(discoveries.flatMap((discovery) => normalizeFindings(discovery.test_gaps))),
|
|
753
|
+
advisory_findings: dedupeFindings(discoveries.flatMap((discovery) => normalizeFindings(discovery.advisory_findings))),
|
|
754
|
+
uncertain_findings: dedupeFindings(discoveries.flatMap((discovery) => normalizeFindings(discovery.uncertain_findings))),
|
|
755
|
+
rejected_findings: dedupeFindings(discoveries.flatMap((discovery) => normalizeFindings(discovery.rejected_findings))),
|
|
756
|
+
};
|
|
757
|
+
}
|
|
758
|
+
function blockersNeedingCorrection(pending, validations) {
|
|
759
|
+
const byTitle = new Map(pending.map((blocker) => [normalizeTitle(blocker.title), blocker]));
|
|
760
|
+
const seen = new Set();
|
|
761
|
+
const unresolved = [];
|
|
762
|
+
for (const validation of validations) {
|
|
763
|
+
const key = normalizeTitle(validation.blocker_title);
|
|
764
|
+
seen.add(key);
|
|
765
|
+
if (validation.status === "unresolved") {
|
|
766
|
+
const original = byTitle.get(key);
|
|
767
|
+
if (original) {
|
|
768
|
+
unresolved.push({
|
|
769
|
+
...original,
|
|
770
|
+
required_change: validation.required_change ?? original.required_change,
|
|
771
|
+
evidence: normalizeEvidence(validation.evidence, original.evidence),
|
|
772
|
+
});
|
|
773
|
+
}
|
|
774
|
+
continue;
|
|
775
|
+
}
|
|
776
|
+
if (validation.status === "regression_from_correction") {
|
|
777
|
+
unresolved.push({
|
|
778
|
+
title: validation.blocker_title,
|
|
779
|
+
required_change: validation.required_change ??
|
|
780
|
+
`Fix regression from correction: ${validation.blocker_title}`,
|
|
781
|
+
confidence: "high",
|
|
782
|
+
evidence: normalizeEvidence(validation.evidence),
|
|
783
|
+
});
|
|
422
784
|
}
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
harness: input.harness,
|
|
428
|
-
model: input.config.model,
|
|
429
|
-
reasoning: input.config.reasoning_effort,
|
|
430
|
-
capability: "write",
|
|
431
|
-
prompt: buildPhasePrompt({
|
|
432
|
-
guidance: GLOBAL_REVISION_PROMPT,
|
|
433
|
-
context: buildContextBlock([
|
|
434
|
-
["Required changes", review.required_changes ?? []],
|
|
435
|
-
]),
|
|
436
|
-
}),
|
|
437
|
-
reporter: input.reporter,
|
|
438
|
-
});
|
|
439
|
-
if (!revision.ok) {
|
|
440
|
-
throw new Error(revision.error);
|
|
785
|
+
}
|
|
786
|
+
for (const blocker of pending) {
|
|
787
|
+
if (!seen.has(normalizeTitle(blocker.title))) {
|
|
788
|
+
unresolved.push(blocker);
|
|
441
789
|
}
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
790
|
+
}
|
|
791
|
+
return dedupeFindings(unresolved);
|
|
792
|
+
}
|
|
793
|
+
function normalizeFindings(findings) {
|
|
794
|
+
return dedupeFindings((findings ?? [])
|
|
795
|
+
.filter((finding) => finding.title && finding.required_change)
|
|
796
|
+
.map((finding) => ({
|
|
797
|
+
title: finding.title,
|
|
798
|
+
required_change: finding.required_change,
|
|
799
|
+
confidence: normalizeConfidence(finding.confidence),
|
|
800
|
+
evidence: normalizeEvidence(finding.evidence),
|
|
801
|
+
})));
|
|
802
|
+
}
|
|
803
|
+
function normalizeValidations(validations) {
|
|
804
|
+
const allowed = new Set([
|
|
805
|
+
"resolved",
|
|
806
|
+
"unresolved",
|
|
807
|
+
"false_positive",
|
|
808
|
+
"regression_from_correction",
|
|
809
|
+
]);
|
|
810
|
+
return (validations ?? [])
|
|
811
|
+
.filter((validation) => validation.blocker_title && allowed.has(validation.status))
|
|
812
|
+
.map((validation) => ({
|
|
813
|
+
...validation,
|
|
814
|
+
evidence: normalizeEvidence(validation.evidence),
|
|
815
|
+
}));
|
|
816
|
+
}
|
|
817
|
+
function dedupeFindings(findings) {
|
|
818
|
+
const seen = new Set();
|
|
819
|
+
const deduped = [];
|
|
820
|
+
for (const finding of findings) {
|
|
821
|
+
const key = `${normalizeTitle(finding.title)}\n${finding.required_change.trim().toLowerCase()}`;
|
|
822
|
+
if (seen.has(key)) {
|
|
823
|
+
continue;
|
|
449
824
|
}
|
|
825
|
+
seen.add(key);
|
|
826
|
+
deduped.push(finding);
|
|
450
827
|
}
|
|
451
|
-
return
|
|
828
|
+
return deduped;
|
|
829
|
+
}
|
|
830
|
+
function normalizeEvidence(evidence, fallback) {
|
|
831
|
+
if (Array.isArray(evidence) && evidence.length > 0) {
|
|
832
|
+
return evidence;
|
|
833
|
+
}
|
|
834
|
+
if (fallback && fallback.length > 0) {
|
|
835
|
+
return fallback;
|
|
836
|
+
}
|
|
837
|
+
return [{ detail: "No evidence provided." }];
|
|
838
|
+
}
|
|
839
|
+
function normalizeConfidence(value) {
|
|
840
|
+
if (value === "low" || value === "medium" || value === "high") {
|
|
841
|
+
return value;
|
|
842
|
+
}
|
|
843
|
+
return "medium";
|
|
844
|
+
}
|
|
845
|
+
function normalizeTitle(value) {
|
|
846
|
+
return value.trim().toLowerCase();
|
|
847
|
+
}
|
|
848
|
+
function textOrPlaceholder(text, placeholder) {
|
|
849
|
+
const trimmed = text.trim();
|
|
850
|
+
return `${trimmed.length > 0 ? trimmed : placeholder}\n`;
|
|
452
851
|
}
|
|
453
852
|
async function loadExecutionGuidance(nyxDir) {
|
|
454
853
|
const override = path.join(nyxDir, "prompts", "execution.md");
|
|
@@ -508,12 +907,12 @@ function buildDraftPrBody(items, reason) {
|
|
|
508
907
|
buildPrBody(items),
|
|
509
908
|
].join("\n");
|
|
510
909
|
}
|
|
511
|
-
/** Render
|
|
512
|
-
function
|
|
513
|
-
if (
|
|
910
|
+
/** Render unresolved blockers as a bullet list to append to a failure message. */
|
|
911
|
+
function formatBlockers(blockers) {
|
|
912
|
+
if (blockers.length === 0) {
|
|
514
913
|
return "";
|
|
515
914
|
}
|
|
516
|
-
return `\n\nUnresolved review
|
|
517
|
-
.map((
|
|
915
|
+
return `\n\nUnresolved review blockers:\n${blockers
|
|
916
|
+
.map((blocker) => `- ${blocker.title}: ${blocker.required_change}`)
|
|
518
917
|
.join("\n")}`;
|
|
519
918
|
}
|
package/dist/runtime/schemas.js
CHANGED
|
@@ -12,41 +12,127 @@ export const SELECTION_SCHEMA = {
|
|
|
12
12
|
work_item_keys: {
|
|
13
13
|
type: "array",
|
|
14
14
|
items: { type: "string" },
|
|
15
|
-
description: "Ordered keys of the chosen candidates (prerequisites first)."
|
|
16
|
-
}
|
|
15
|
+
description: "Ordered keys of the chosen candidates (prerequisites first).",
|
|
16
|
+
},
|
|
17
17
|
},
|
|
18
18
|
allOf: [
|
|
19
19
|
{
|
|
20
20
|
if: { properties: { outcome: { const: "selected" } } },
|
|
21
|
-
then: { required: ["work_item_keys"] }
|
|
22
|
-
}
|
|
21
|
+
then: { required: ["work_item_keys"] },
|
|
22
|
+
},
|
|
23
23
|
],
|
|
24
|
-
additionalProperties: true
|
|
24
|
+
additionalProperties: true,
|
|
25
|
+
};
|
|
26
|
+
const evidenceSchema = {
|
|
27
|
+
type: "object",
|
|
28
|
+
properties: {
|
|
29
|
+
file: { type: "string" },
|
|
30
|
+
line: { type: "integer", minimum: 1 },
|
|
31
|
+
command: { type: "string" },
|
|
32
|
+
observation: { type: "string" },
|
|
33
|
+
detail: { type: "string" },
|
|
34
|
+
},
|
|
35
|
+
additionalProperties: true,
|
|
36
|
+
};
|
|
37
|
+
const findingSchema = {
|
|
38
|
+
type: "object",
|
|
39
|
+
required: ["title", "required_change", "confidence", "evidence"],
|
|
40
|
+
properties: {
|
|
41
|
+
title: { type: "string", minLength: 1 },
|
|
42
|
+
required_change: { type: "string", minLength: 1 },
|
|
43
|
+
confidence: {
|
|
44
|
+
type: "string",
|
|
45
|
+
enum: ["low", "medium", "high"],
|
|
46
|
+
},
|
|
47
|
+
evidence: {
|
|
48
|
+
type: "array",
|
|
49
|
+
minItems: 1,
|
|
50
|
+
items: evidenceSchema,
|
|
51
|
+
description: "Concrete evidence: file+line, command+observation, or an equivalent current-code observation.",
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
additionalProperties: true,
|
|
25
55
|
};
|
|
26
|
-
const
|
|
56
|
+
const findingArraySchema = {
|
|
57
|
+
type: "array",
|
|
58
|
+
items: findingSchema,
|
|
59
|
+
default: [],
|
|
60
|
+
};
|
|
61
|
+
const reviewDiscoverySchema = {
|
|
27
62
|
$schema: "https://json-schema.org/draft/2020-12/schema",
|
|
28
63
|
type: "object",
|
|
29
|
-
required: [
|
|
64
|
+
required: [
|
|
65
|
+
"summary",
|
|
66
|
+
"blockers",
|
|
67
|
+
"test_gaps",
|
|
68
|
+
"advisory_findings",
|
|
69
|
+
"uncertain_findings",
|
|
70
|
+
"rejected_findings",
|
|
71
|
+
],
|
|
30
72
|
properties: {
|
|
31
|
-
outcome: { type: "string", enum: ["approved", "changes_requested"] },
|
|
32
73
|
summary: {
|
|
33
74
|
type: "string",
|
|
34
75
|
minLength: 1,
|
|
35
|
-
description: "A brief assessment of the work."
|
|
76
|
+
description: "A brief assessment of the work.",
|
|
36
77
|
},
|
|
37
|
-
|
|
78
|
+
blockers: findingArraySchema,
|
|
79
|
+
test_gaps: findingArraySchema,
|
|
80
|
+
advisory_findings: findingArraySchema,
|
|
81
|
+
uncertain_findings: findingArraySchema,
|
|
82
|
+
rejected_findings: findingArraySchema,
|
|
83
|
+
},
|
|
84
|
+
additionalProperties: true,
|
|
85
|
+
};
|
|
86
|
+
const reviewChallengeSchema = {
|
|
87
|
+
$schema: "https://json-schema.org/draft/2020-12/schema",
|
|
88
|
+
type: "object",
|
|
89
|
+
required: ["summary", "blockers", "rejected_findings"],
|
|
90
|
+
properties: {
|
|
91
|
+
summary: { type: "string", minLength: 1 },
|
|
92
|
+
blockers: findingArraySchema,
|
|
93
|
+
rejected_findings: findingArraySchema,
|
|
94
|
+
},
|
|
95
|
+
additionalProperties: true,
|
|
96
|
+
};
|
|
97
|
+
const validationSchema = {
|
|
98
|
+
$schema: "https://json-schema.org/draft/2020-12/schema",
|
|
99
|
+
type: "object",
|
|
100
|
+
required: ["summary", "validations"],
|
|
101
|
+
properties: {
|
|
102
|
+
summary: { type: "string", minLength: 1 },
|
|
103
|
+
validations: {
|
|
38
104
|
type: "array",
|
|
39
|
-
items: {
|
|
40
|
-
|
|
41
|
-
|
|
105
|
+
items: {
|
|
106
|
+
type: "object",
|
|
107
|
+
required: ["blocker_title", "status", "evidence"],
|
|
108
|
+
properties: {
|
|
109
|
+
blocker_title: { type: "string", minLength: 1 },
|
|
110
|
+
status: {
|
|
111
|
+
type: "string",
|
|
112
|
+
enum: [
|
|
113
|
+
"resolved",
|
|
114
|
+
"unresolved",
|
|
115
|
+
"false_positive",
|
|
116
|
+
"regression_from_correction",
|
|
117
|
+
],
|
|
118
|
+
},
|
|
119
|
+
required_change: {
|
|
120
|
+
type: "string",
|
|
121
|
+
description: "Required when the status remains unresolved or is a correction-caused regression.",
|
|
122
|
+
},
|
|
123
|
+
evidence: {
|
|
124
|
+
type: "array",
|
|
125
|
+
minItems: 1,
|
|
126
|
+
items: evidenceSchema,
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
additionalProperties: true,
|
|
130
|
+
},
|
|
131
|
+
},
|
|
42
132
|
},
|
|
43
|
-
|
|
44
|
-
{
|
|
45
|
-
if: { properties: { outcome: { const: "changes_requested" } } },
|
|
46
|
-
then: { required: ["required_changes"] }
|
|
47
|
-
}
|
|
48
|
-
],
|
|
49
|
-
additionalProperties: true
|
|
133
|
+
additionalProperties: true,
|
|
50
134
|
};
|
|
51
|
-
export const
|
|
52
|
-
export const GLOBAL_REVIEW_SCHEMA =
|
|
135
|
+
export const REVIEW_DISCOVERY_SCHEMA = reviewDiscoverySchema;
|
|
136
|
+
export const GLOBAL_REVIEW_SCHEMA = reviewDiscoverySchema;
|
|
137
|
+
export const REVIEW_CHALLENGE_SCHEMA = reviewChallengeSchema;
|
|
138
|
+
export const REVIEW_VALIDATION_SCHEMA = validationSchema;
|