@nyxa/nyx-agent 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,8 @@ For every run NyxAgent:
11
11
  confirm the proposed checklist.
12
12
  2. For each selected issue, in an isolated git **worktree**:
13
13
  - **implements** it (the agent — the only customizable prompt),
14
- - optionally **reviews** and **revises** it (bounded loop),
14
+ - optionally **reviews** it in bounded discovery rounds, then revises only
15
+ verified blockers with locked validation,
15
16
  - **commits** the change (the engine, deterministically).
16
17
  3. Optionally runs a **global review** across the whole run.
17
18
  4. **Pushes** the run branch and **opens one pull request** (the engine).
@@ -31,6 +32,7 @@ nyxagent init # create .nyxagent/config.json (interactive)
31
32
  nyxagent run # run the pipeline, confirming selected work items
32
33
  nyxagent run --yes # accept the agent selection without prompting
33
34
  nyxagent run --harness claude # override the configured harness for one run
35
+ nyxagent run --verbose # stream agent output and runtime details
34
36
  nyxagent update # self-update to the latest published version
35
37
  ```
36
38
 
@@ -44,7 +46,7 @@ nyxagent update # self-update to the latest published version
44
46
  "model": "gpt-5.5",
45
47
  "reasoning_effort": "medium",
46
48
  "review": "each",
47
- "review_max_attempts": 4,
49
+ "review_rounds": { "each": 1, "global": 1 },
48
50
  "tracker": { "type": "github", "repo": "owner/repo" },
49
51
  "base_branch": "main",
50
52
  "max_iterations": 5
@@ -53,12 +55,19 @@ nyxagent update # self-update to the latest published version
53
55
 
54
56
  - `harness`: `codex` or `claude` (override per run with `--harness`).
55
57
  - `review`: `each` (per task), `all` (global only), `both`, or `none`.
56
- - `review_max_attempts`: review+revise rounds per stage before the run fails (default 4).
58
+ - `review_rounds.each`: fresh per-task discovery rounds (default 1).
59
+ - `review_rounds.global`: fresh global discovery rounds (default 1).
60
+ - `review_max_attempts`: deprecated; accepted for old configs with a warning, but
61
+ ignored by the review loop.
62
+ - `agents.execution`, `agents.review`, `agents.global_review`, and
63
+ `agents.global_review.roles.<role>` can override `harness`, `model`, and
64
+ `reasoning_effort` for specialized phases. Global review roles are
65
+ `diff-contract`, `integration`, `domain-invariants`, and `tests-validation`.
57
66
  - `base_branch`: optional; defaults to the current branch at run time.
58
67
 
59
- If a run fails review after exhausting its attempts but has already produced
68
+ If a run fails review validation but has already produced
60
69
  commits, NyxAgent pushes the branch and opens a **draft** pull request with the
61
- unresolved feedback, so the work is never stranded on an orphaned branch.
70
+ unresolved blockers, so the work is never stranded on an orphaned branch.
62
71
 
63
72
  ## Requirements
64
73
 
package/dist/cli.js CHANGED
@@ -20,7 +20,9 @@ program
20
20
  .option("--model <name>", "model name")
21
21
  .option("--reasoning-effort <level>", "reasoning effort (default: medium)")
22
22
  .option("--review <mode>", "review strategy: each, all, both, or none")
23
- .option("--review-attempts <count>", "max review attempts per stage (default: 4)")
23
+ .option("--review-rounds-each <count>", "per-work-item review discovery rounds (default: 1)")
24
+ .option("--review-rounds-global <count>", "global review discovery rounds (default: 1)")
25
+ .option("--review-attempts <count>", "deprecated alias for both review round counts")
24
26
  .option("--repo <owner/repo>", "GitHub repository")
25
27
  .option("--base-branch <branch>", "base branch (default: current branch)")
26
28
  .option("--max-iterations <count>", "maximum work items per run")
@@ -34,6 +36,7 @@ program
34
36
  .option("--config <path>", "config path (default: .nyxagent/config.json)")
35
37
  .option("--harness <name>", "override the configured harness: codex or claude")
36
38
  .option("-y, --yes", "accept the agent-selected work items without prompting")
39
+ .option("--verbose", "stream agent output and NyxAgent runtime details")
37
40
  .action(async (options) => {
38
41
  await runCommand(options);
39
42
  });
@@ -2,8 +2,8 @@
2
2
  import path from "node:path";
3
3
  import { input, number as numberPrompt, select } from "@inquirer/prompts";
4
4
  import pc from "picocolors";
5
- import { harnessNames, reviewModes } from "../config/schema.js";
6
- import { ensureDir, pathExists, readText, writeText } from "../runtime/files.js";
5
+ import { harnessNames, reviewModes, } from "../config/schema.js";
6
+ import { ensureDir, pathExists, readText, writeText, } from "../runtime/files.js";
7
7
  import { getNyxDir, relativeToProject } from "../runtime/paths.js";
8
8
  import { EXECUTION_PROMPT_FILE } from "../runtime/prompts.js";
9
9
  const DEFAULT_CODEX_MODEL = "gpt-5.5";
@@ -17,7 +17,7 @@ const GITIGNORE_ENTRIES = [
17
17
  ".nyxagent/state.json",
18
18
  ".nyxagent/config.json",
19
19
  ".nyxagent/config.toml",
20
- ".nyxagent/prompts/"
20
+ ".nyxagent/prompts/",
21
21
  ];
22
22
  export async function initCommand(options, projectRoot = process.cwd()) {
23
23
  const root = path.resolve(projectRoot);
@@ -46,14 +46,14 @@ async function resolveInitOptions(options) {
46
46
  message: "Default harness",
47
47
  choices: [
48
48
  { name: "codex", value: "codex" },
49
- { name: "claude", value: "claude" }
50
- ]
49
+ { name: "claude", value: "claude" },
50
+ ],
51
51
  });
52
52
  const model = options.model ??
53
53
  (await input({
54
54
  message: "Model",
55
55
  default: harness === "codex" ? DEFAULT_CODEX_MODEL : "",
56
- validate: (value) => value.trim().length > 0 || "Model is required"
56
+ validate: (value) => value.trim().length > 0 || "Model is required",
57
57
  }));
58
58
  const reasoning_effort = options.reasoningEffort ??
59
59
  (await input({ message: "Reasoning effort", default: "medium" }));
@@ -65,34 +65,27 @@ async function resolveInitOptions(options) {
65
65
  { name: "After each task", value: "each" },
66
66
  { name: "After all tasks (global review)", value: "all" },
67
67
  { name: "Both per-task and global", value: "both" },
68
- { name: "No review", value: "none" }
68
+ { name: "No review", value: "none" },
69
69
  ],
70
- default: "each"
70
+ default: "each",
71
71
  });
72
- const review_max_attempts = review === "none"
73
- ? 4
74
- : parseReviewAttempts(options.reviewAttempts) ??
75
- (await numberPrompt({
76
- message: "Max review attempts per stage",
77
- default: 4,
78
- required: true
79
- }));
80
- if (!Number.isInteger(review_max_attempts) || review_max_attempts <= 0) {
81
- throw new Error("review attempts must be a positive integer");
82
- }
83
- const repo = options.repo ?? (await input({ message: "GitHub repository (owner/repo)" }));
72
+ const review_rounds = await resolveReviewRounds(options, review);
73
+ const repo = options.repo ??
74
+ (await input({ message: "GitHub repository (owner/repo)" }));
84
75
  validateRepository(repo);
85
76
  const baseBranchInput = options.baseBranch ??
86
77
  (await input({
87
78
  message: "Base branch (blank = current branch at run time)",
88
- default: ""
79
+ default: "",
89
80
  }));
90
- const base_branch = baseBranchInput.trim() ? baseBranchInput.trim() : undefined;
81
+ const base_branch = baseBranchInput.trim()
82
+ ? baseBranchInput.trim()
83
+ : undefined;
91
84
  const max_iterations = parseMaxIterations(options.maxIterations) ??
92
85
  (await numberPrompt({
93
86
  message: "Max work items per run",
94
87
  default: 5,
95
- required: true
88
+ required: true,
96
89
  }));
97
90
  if (!Number.isInteger(max_iterations) || max_iterations <= 0) {
98
91
  throw new Error("max iterations must be a positive integer");
@@ -102,10 +95,10 @@ async function resolveInitOptions(options) {
102
95
  model: model.trim(),
103
96
  reasoning_effort: reasoning_effort.trim() || "medium",
104
97
  review,
105
- review_max_attempts,
98
+ review_rounds,
106
99
  repo,
107
100
  base_branch,
108
- max_iterations
101
+ max_iterations,
109
102
  };
110
103
  }
111
104
  function buildConfig(options) {
@@ -115,11 +108,11 @@ function buildConfig(options) {
115
108
  reasoning_effort: options.reasoning_effort,
116
109
  review: options.review,
117
110
  tracker: { type: "github", repo: options.repo },
118
- max_iterations: options.max_iterations
111
+ max_iterations: options.max_iterations,
119
112
  };
120
- // No point persisting an attempts cap when reviews are disabled.
113
+ // No point persisting review rounds when reviews are disabled.
121
114
  if (options.review !== "none") {
122
- config.review_max_attempts = options.review_max_attempts;
115
+ config.review_rounds = options.review_rounds;
123
116
  }
124
117
  if (options.base_branch) {
125
118
  config.base_branch = options.base_branch;
@@ -149,7 +142,38 @@ function parseMaxIterations(value) {
149
142
  }
150
143
  return Number.parseInt(value, 10);
151
144
  }
152
- function parseReviewAttempts(value) {
145
+ async function resolveReviewRounds(options, review) {
146
+ if (review === "none") {
147
+ return { each: 1, global: 1 };
148
+ }
149
+ const deprecatedAttempts = parsePositiveInteger(options.reviewAttempts);
150
+ const each = parsePositiveInteger(options.reviewRoundsEach) ??
151
+ deprecatedAttempts ??
152
+ (review === "each" || review === "both"
153
+ ? await numberPrompt({
154
+ message: "Review rounds per work item",
155
+ default: 1,
156
+ required: true,
157
+ })
158
+ : 1);
159
+ const global = parsePositiveInteger(options.reviewRoundsGlobal) ??
160
+ deprecatedAttempts ??
161
+ (review === "all" || review === "both"
162
+ ? await numberPrompt({
163
+ message: "Global review rounds",
164
+ default: 1,
165
+ required: true,
166
+ })
167
+ : 1);
168
+ if (!Number.isInteger(each) || each <= 0) {
169
+ throw new Error("review_rounds.each must be a positive integer");
170
+ }
171
+ if (!Number.isInteger(global) || global <= 0) {
172
+ throw new Error("review_rounds.global must be a positive integer");
173
+ }
174
+ return { each, global };
175
+ }
176
+ function parsePositiveInteger(value) {
153
177
  if (value === undefined) {
154
178
  return undefined;
155
179
  }
@@ -9,7 +9,8 @@ export async function runCommand(options, projectRoot = process.cwd()) {
9
9
  ? path.resolve(projectRoot, options.config)
10
10
  : undefined,
11
11
  harness: normalizeHarness(options.harness),
12
- autoAcceptSelection: options.yes ?? false
12
+ autoAcceptSelection: options.yes ?? false,
13
+ verbose: options.verbose ?? false,
13
14
  });
14
15
  }
15
16
  function normalizeHarness(value) {
@@ -7,7 +7,37 @@ import { z } from "zod";
7
7
  */
8
8
  export const harnessNames = ["codex", "claude"];
9
9
  export const reviewModes = ["each", "all", "both", "none"];
10
+ export const globalReviewRoles = [
11
+ "diff-contract",
12
+ "integration",
13
+ "domain-invariants",
14
+ "tests-validation",
15
+ ];
10
16
  const githubRepositoryPattern = /^[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+$/;
17
+ const reviewRoundsSchema = z
18
+ .object({
19
+ each: z.number().int().positive().default(1),
20
+ global: z.number().int().positive().default(1),
21
+ })
22
+ .default({ each: 1, global: 1 });
23
+ const agentOverrideSchema = z
24
+ .object({
25
+ harness: z.enum(harnessNames).optional(),
26
+ model: z.string().min(1).optional(),
27
+ reasoning_effort: z.string().min(1).optional(),
28
+ })
29
+ .strict();
30
+ const globalReviewAgentOverrideSchema = agentOverrideSchema.extend({
31
+ roles: z
32
+ .object({
33
+ "diff-contract": agentOverrideSchema.optional(),
34
+ integration: agentOverrideSchema.optional(),
35
+ "domain-invariants": agentOverrideSchema.optional(),
36
+ "tests-validation": agentOverrideSchema.optional(),
37
+ })
38
+ .strict()
39
+ .optional(),
40
+ });
11
41
  export const nyxConfigSchema = z
12
42
  .object({
13
43
  /** Which agent CLI runs each phase. Overridable per run via `run --harness`. */
@@ -18,18 +48,29 @@ export const nyxConfigSchema = z
18
48
  reasoning_effort: z.string().min(1).default("medium"),
19
49
  /** When the agent reviews its own work. */
20
50
  review: z.enum(reviewModes).default("each"),
21
- /** How many review+revise rounds a review stage gets before the run fails. */
22
- review_max_attempts: z.number().int().positive().default(4),
51
+ /** How many fresh discovery rounds each review stage may run. */
52
+ review_rounds: reviewRoundsSchema,
53
+ /** Deprecated: accepted for existing configs, but no longer drives reviews. */
54
+ review_max_attempts: z.number().int().positive().optional(),
55
+ /** Optional agent overrides by phase and global-review role. */
56
+ agents: z
57
+ .object({
58
+ execution: agentOverrideSchema.optional(),
59
+ review: agentOverrideSchema.optional(),
60
+ global_review: globalReviewAgentOverrideSchema.optional(),
61
+ })
62
+ .strict()
63
+ .optional(),
23
64
  /** Work item tracker. GitHub issues only in this version. */
24
65
  tracker: z.object({
25
66
  type: z.literal("github"),
26
67
  repo: z
27
68
  .string()
28
- .regex(githubRepositoryPattern, 'tracker.repo must be "owner/repo"')
69
+ .regex(githubRepositoryPattern, 'tracker.repo must be "owner/repo"'),
29
70
  }),
30
71
  /** Base branch the run branch is cut from. Defaults to the current branch. */
31
72
  base_branch: z.string().min(1).optional(),
32
73
  /** Maximum work items processed in a single run. */
33
- max_iterations: z.number().int().positive().default(5)
74
+ max_iterations: z.number().int().positive().default(5),
34
75
  })
35
76
  .strict();
@@ -23,39 +23,72 @@ test, implement the smallest change that satisfies it, then tidy the result.
23
23
 
24
24
  Do not commit and do not touch git — NyxAgent commits your changes for you. Leave
25
25
  clear validation evidence (commands run and their results) in your final response.`;
26
- export const REVIEW_PROMPT = `Review the implementation of the selected work item.
26
+ export const REVIEW_PROMPT = `Discover findings in the implementation of the selected work item.
27
27
 
28
- The uncommitted changes for this item are shown as a diff in the context above; you
29
- may also read files in the working directory. Stay read-only and do not modify
30
- anything.
28
+ Use the review-context artifact paths in the context above. Inspect the patch file,
29
+ diffstat, changed-files list, and the working directory as needed. Stay read-only
30
+ and do not modify anything.
31
31
 
32
- Assess: alignment with the work item, correctness and regression risk, test or
33
- validation evidence, design fit, and security or data-safety concerns.
32
+ This is discovery for the current review round only. Assess alignment with the work
33
+ item, correctness and regression risk, test or validation evidence, design fit, and
34
+ security or data-safety concerns.
34
35
 
35
- Set outcome to "approved" when the work is ready, or "changes_requested" with a
36
- concrete, actionable list in required_changes. Always include a short summary.`;
36
+ Put only must-fix issues in blockers. Put missing or weak validation in test_gaps,
37
+ non-blocking concerns in advisory_findings, uncertain suspicions in
38
+ uncertain_findings, and explicitly refuted candidates in rejected_findings.`;
39
+ export const REVIEW_CHALLENGE_PROMPT = `Challenge the proposed blockers for the selected work item.
40
+
41
+ Stay read-only. Try to refute each proposed blocker using the current code,
42
+ review-context artifacts, and concrete evidence. Return only blockers that remain
43
+ valid and actionable. Move false positives or already-satisfied findings to
44
+ rejected_findings with evidence. Do not introduce new findings in this phase.`;
37
45
  export const REVISION_PROMPT = `Apply the changes requested by the review for the selected work item.
38
46
 
39
- The required changes are listed in the context above. Address exactly those, keeping
40
- the work focused. Do not commit — NyxAgent commits your changes for you.`;
47
+ The verified blockers are listed in the context above. Address exactly those,
48
+ keeping the work focused. Do not commit — NyxAgent commits your changes for you.`;
49
+ export const REVIEW_VALIDATION_PROMPT = `Validate the correction for the previously verified blockers.
50
+
51
+ Stay read-only. Validate only the blockers listed in the context above. Do not run a
52
+ new review and do not introduce unrelated new findings. For each blocker, return one
53
+ status: resolved, unresolved, false_positive, or regression_from_correction.
54
+
55
+ Use regression_from_correction only when the correction itself directly created a
56
+ new blocker and the evidence proves that causal link.`;
41
57
  export const GLOBAL_REVIEW_PROMPT = `Review the entire run as a whole, now that every selected work item is implemented
42
58
  and committed.
43
59
 
44
- The combined diff for the run is shown in the context above; you may also read files
45
- in the working directory. Stay read-only and do not modify anything.
60
+ Use the review-context artifact paths in the context above. Inspect the patch file,
61
+ diffstat, changed-files list, commit list, and the working directory as needed. Stay
62
+ read-only and do not modify anything.
46
63
 
47
64
  Focus on cross-cutting concerns a per-item review cannot see: integration between
48
65
  items, regressions one item introduced in another, overall design coherence,
49
66
  duplication, and gaps versus the issues' intent.
50
67
 
51
- Set outcome to "approved" when the run is coherent and ready, or
52
- "changes_requested" with a concrete, actionable list in required_changes. Always
53
- include a short summary.`;
68
+ Return typed findings. Put only must-fix issues in blockers. Put missing or weak
69
+ validation in test_gaps, non-blocking concerns in advisory_findings, uncertain
70
+ suspicions in uncertain_findings, and explicitly refuted candidates in
71
+ rejected_findings.`;
72
+ export const GLOBAL_REVIEW_CHALLENGE_PROMPT = `Challenge the aggregated global-review blockers.
73
+
74
+ Stay read-only. Try to refute each proposed blocker using the current code,
75
+ review-context artifacts, and concrete evidence. Return only blockers that remain
76
+ valid and actionable. Move false positives or already-satisfied findings to
77
+ rejected_findings with evidence. Do not introduce new findings in this phase.`;
54
78
  export const GLOBAL_REVISION_PROMPT = `Apply the changes requested by the global review of the whole run.
55
79
 
56
- The required changes are listed in the context above. Address exactly those, across
80
+ The verified blockers are listed in the context above. Address exactly those, across
57
81
  whichever work items are affected. Do not commit — NyxAgent commits your corrections
58
82
  for you.`;
83
+ export const GLOBAL_REVIEW_VALIDATION_PROMPT = `Validate the global review correction for the previously verified blockers.
84
+
85
+ Stay read-only. Validate only the blockers listed in the context above. Do not run a
86
+ new global review and do not introduce unrelated new findings. For each blocker,
87
+ return one status: resolved, unresolved, false_positive, or
88
+ regression_from_correction.
89
+
90
+ Use regression_from_correction only when the correction itself directly created a
91
+ new blocker and the evidence proves that causal link.`;
59
92
  /** Rendered into .nyxagent/prompts/execution.md at init; the only editable prompt. */
60
93
  export const EXECUTION_PROMPT_FILE = `${EXECUTION_PROMPT}
61
94
  `;
@@ -88,7 +121,7 @@ export function buildPhasePrompt(input) {
88
121
  "",
89
122
  "## Instructions",
90
123
  "",
91
- input.guidance.trim()
124
+ input.guidance.trim(),
92
125
  ];
93
126
  if (input.schema) {
94
127
  parts.push("", "## Required result", "", "End your response with a single <nyxagent_result> block containing JSON that", "matches this schema. NyxAgent parses the last such block, validates it, and", "ignores everything else for control flow.", "", "```json", JSON.stringify(input.schema, null, 2), "```", "", "<nyxagent_result>", "{ ... }", "</nyxagent_result>");
@@ -0,0 +1,65 @@
1
+ import path from "node:path";
2
+ import pc from "picocolors";
3
+ export function createRunReporter(options = {}) {
4
+ const verbose = options.verbose ?? false;
5
+ const writeStdout = options.writeStdout ?? ((line) => console.log(line));
6
+ const writeStderr = options.writeStderr ??
7
+ ((line) => {
8
+ process.stderr.write(`${line}\n`);
9
+ });
10
+ const stdout = (line) => writeStdout(line);
11
+ const stderr = (line) => writeStderr(line);
12
+ return {
13
+ verbose,
14
+ heading: (message) => stdout(pc.bold(message)),
15
+ info: (message) => stdout(message),
16
+ section: (message) => stdout(pc.cyan(message)),
17
+ success: (message) => stdout(pc.green(message)),
18
+ warn: (message) => stdout(pc.yellow(message)),
19
+ error: (message) => stdout(pc.red(message)),
20
+ detail: (message) => {
21
+ if (verbose) {
22
+ stderr(pc.dim(message));
23
+ }
24
+ },
25
+ phaseStarted: (event) => {
26
+ if (!verbose) {
27
+ return;
28
+ }
29
+ const attempt = attemptLabel(event.attemptDir);
30
+ const command = [event.invocation.command, ...event.invocation.args].join(" ");
31
+ stderr(pc.dim(`[${event.phaseId} ${attempt}] start ${command} (cwd ${event.workdir}, capability ${event.capability}, model ${event.model}, reasoning ${event.reasoning})`));
32
+ },
33
+ phaseFinished: (event) => {
34
+ if (!verbose) {
35
+ return;
36
+ }
37
+ stderr(pc.dim(`[${event.phaseId} ${attemptLabel(event.attemptDir)}] exit ${event.exitCode} in ${formatDuration(event.durationMs)}`));
38
+ },
39
+ phaseArtifact: (event) => {
40
+ if (!verbose) {
41
+ return;
42
+ }
43
+ const prefix = event.attemptDir
44
+ ? `${event.phaseId} ${attemptLabel(event.attemptDir)}`
45
+ : event.phaseId;
46
+ stderr(pc.dim(`[${prefix}] artifact ${event.filePath}`));
47
+ },
48
+ agentOutput: (event) => {
49
+ if (!verbose) {
50
+ return;
51
+ }
52
+ const outputType = event.stream ?? event.eventType;
53
+ stderr(`[${event.phaseId} ${attemptLabel(event.attemptDir)} ${outputType}] ${event.message}`);
54
+ },
55
+ };
56
+ }
57
+ function attemptLabel(attemptDirOrPath) {
58
+ return path.basename(attemptDirOrPath);
59
+ }
60
+ function formatDuration(durationMs) {
61
+ if (durationMs < 1000) {
62
+ return `${durationMs}ms`;
63
+ }
64
+ return `${(durationMs / 1000).toFixed(1)}s`;
65
+ }
@@ -15,12 +15,12 @@ export async function runAgentPhase(input) {
15
15
  const attempt = await invokeHarness({
16
16
  attemptDir: path.join(input.phaseDir, "attempt-001"),
17
17
  input,
18
- prompt: input.prompt
18
+ prompt: input.prompt,
19
19
  });
20
20
  if (attempt.exitCode !== 0) {
21
21
  return {
22
22
  ok: false,
23
- error: `Phase "${input.phaseId}" failed with exit code ${attempt.exitCode}`
23
+ error: `Phase "${input.phaseId}" failed with exit code ${attempt.exitCode}`,
24
24
  };
25
25
  }
26
26
  if (!input.schema) {
@@ -28,16 +28,26 @@ export async function runAgentPhase(input) {
28
28
  }
29
29
  const parsed = parseAndValidate(input.schema, attempt.stdout);
30
30
  if (parsed.ok) {
31
- await writeJson(path.join(input.phaseDir, "result.json"), parsed.result);
31
+ const resultPath = path.join(input.phaseDir, "result.json");
32
+ await writeJson(resultPath, parsed.result);
33
+ input.reporter?.phaseArtifact({
34
+ phaseId: input.phaseId,
35
+ filePath: resultPath,
36
+ });
32
37
  return parsed;
33
38
  }
34
39
  const repaired = await repairResult({
35
40
  input,
36
41
  originalStdout: attempt.stdout,
37
- validationError: parsed.error
42
+ validationError: parsed.error,
38
43
  });
39
44
  if (repaired.ok && repaired.result !== undefined) {
40
- await writeJson(path.join(input.phaseDir, "result.json"), repaired.result);
45
+ const resultPath = path.join(input.phaseDir, "result.json");
46
+ await writeJson(resultPath, repaired.result);
47
+ input.reporter?.phaseArtifact({
48
+ phaseId: input.phaseId,
49
+ filePath: resultPath,
50
+ });
41
51
  }
42
52
  return repaired;
43
53
  }
@@ -48,19 +58,53 @@ async function invokeHarness(args) {
48
58
  harness: args.input.harness,
49
59
  capability: args.forceReadonly ? "readonly" : args.input.capability,
50
60
  model: args.input.model,
51
- reasoning: args.input.reasoning
61
+ reasoning: args.input.reasoning,
52
62
  });
53
63
  const startedAt = new Date().toISOString();
54
64
  const started = Date.now();
55
65
  const gitBefore = await getGitSnapshot(args.input.workdir);
66
+ args.input.reporter?.phaseStarted({
67
+ phaseId: args.input.phaseId,
68
+ attemptDir: args.attemptDir,
69
+ workdir: args.input.workdir,
70
+ capability: args.forceReadonly ? "readonly" : args.input.capability,
71
+ model: args.input.model,
72
+ reasoning: args.input.reasoning,
73
+ invocation,
74
+ });
56
75
  let stdout = "";
57
76
  let stderr = "";
58
77
  let exitCode = 0;
59
78
  try {
79
+ const verbose = args.input.reporter?.verbose
80
+ ? {
81
+ stdout: (line, event) => {
82
+ args.input.reporter?.agentOutput({
83
+ phaseId: args.input.phaseId,
84
+ attemptDir: args.attemptDir,
85
+ eventType: event.type,
86
+ stream: event.type === "output" ? "stdout" : undefined,
87
+ message: line,
88
+ });
89
+ return "";
90
+ },
91
+ stderr: (line, event) => {
92
+ args.input.reporter?.agentOutput({
93
+ phaseId: args.input.phaseId,
94
+ attemptDir: args.attemptDir,
95
+ eventType: event.type,
96
+ stream: event.type === "output" ? "stderr" : undefined,
97
+ message: line,
98
+ });
99
+ return "";
100
+ },
101
+ }
102
+ : "none";
60
103
  const result = await execa(invocation.command, invocation.args, {
61
104
  cwd: args.input.workdir,
62
105
  input: args.prompt,
63
- reject: false
106
+ reject: false,
107
+ verbose,
64
108
  });
65
109
  stdout = result.stdout;
66
110
  stderr = result.stderr;
@@ -71,18 +115,35 @@ async function invokeHarness(args) {
71
115
  stderr = error instanceof Error ? error.message : String(error);
72
116
  }
73
117
  const gitAfter = await getGitSnapshot(args.input.workdir);
74
- await writeText(path.join(args.attemptDir, "stdout.log"), stdout);
75
- await writeText(path.join(args.attemptDir, "stderr.log"), stderr);
76
- await writeJson(path.join(args.attemptDir, "meta.json"), {
118
+ const durationMs = Date.now() - started;
119
+ const stdoutPath = path.join(args.attemptDir, "stdout.log");
120
+ const stderrPath = path.join(args.attemptDir, "stderr.log");
121
+ const metaPath = path.join(args.attemptDir, "meta.json");
122
+ await writeText(stdoutPath, stdout);
123
+ await writeText(stderrPath, stderr);
124
+ await writeJson(metaPath, {
77
125
  command: invocation.command,
78
126
  args: invocation.args,
79
127
  started_at: startedAt,
80
128
  ended_at: new Date().toISOString(),
81
- duration_ms: Date.now() - started,
129
+ duration_ms: durationMs,
82
130
  exit_code: exitCode,
83
131
  git_before: gitBefore,
84
- git_after: gitAfter
132
+ git_after: gitAfter,
133
+ });
134
+ args.input.reporter?.phaseFinished({
135
+ phaseId: args.input.phaseId,
136
+ attemptDir: args.attemptDir,
137
+ durationMs,
138
+ exitCode,
85
139
  });
140
+ for (const filePath of [stdoutPath, stderrPath, metaPath]) {
141
+ args.input.reporter?.phaseArtifact({
142
+ phaseId: args.input.phaseId,
143
+ attemptDir: args.attemptDir,
144
+ filePath,
145
+ });
146
+ }
86
147
  return { stdout, stderr, exitCode };
87
148
  }
88
149
  function parseAndValidate(schema, stdout) {
@@ -118,13 +179,13 @@ async function repairResult(args) {
118
179
  "",
119
180
  "Original prompt:",
120
181
  "",
121
- args.input.prompt
182
+ args.input.prompt,
122
183
  ].join("\n");
123
184
  const attempt = await invokeHarness({
124
185
  attemptDir: path.join(args.input.phaseDir, `repair-${String(attemptNumber).padStart(3, "0")}`),
125
186
  input: args.input,
126
187
  prompt: repairPrompt,
127
- forceReadonly: true
188
+ forceReadonly: true,
128
189
  });
129
190
  if (attempt.exitCode !== 0) {
130
191
  lastError = `Repair harness exited with code ${attempt.exitCode}`;
@@ -138,6 +199,6 @@ async function repairResult(args) {
138
199
  }
139
200
  return {
140
201
  ok: false,
141
- error: `Phase "${args.input.phaseId}" produced an invalid result: ${lastError}`
202
+ error: `Phase "${args.input.phaseId}" produced an invalid result: ${lastError}`,
142
203
  };
143
204
  }