@cat-factory/executor-harness 1.31.0 → 1.31.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/dist/pi-workspace.js +15 -1
- package/package.json +12 -7
- package/src/pi-workspace.ts +17 -1
- package/dist/blueprint.js +0 -367
- package/dist/bootstrap.js +0 -99
- package/dist/ci-fixer.js +0 -46
- package/dist/conflict-resolver.js +0 -138
- package/dist/explore.js +0 -74
- package/dist/fixer.js +0 -44
- package/dist/merger.js +0 -135
- package/dist/on-call.js +0 -126
- package/dist/spec.js +0 -754
- package/dist/tester.js +0 -191
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
import { cloneRepo, commitAll, conflictDiff, headCommit, mergeBranch, pushBranch, unmergedPaths, } from './git.js';
|
|
2
|
-
import { agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
|
|
3
|
-
import { log } from './logger.js';
|
|
4
|
-
// Async job execution for the merge-conflict resolver. When a PR cannot be merged
|
|
5
|
-
// because it conflicts with its base, the engine dispatches this: clone the PR head
|
|
6
|
-
// branch (full history), merge the base branch into it to surface the conflicts,
|
|
7
|
-
// run Pi to resolve them, complete the merge commit and push back onto the SAME
|
|
8
|
-
// branch (no new branch / PR) so the PR becomes mergeable and CI re-runs.
|
|
9
|
-
//
|
|
10
|
-
// Shares the thin workspace/Pi base (withWorkspace + runAgentInWorkspace) with the
|
|
11
|
-
// other agents; it diverges only in needing a full clone, a base→branch merge to
|
|
12
|
-
// produce the conflicts, and a guard that refuses to push a half-resolved tree.
|
|
13
|
-
/** Run one conflict-resolver job: clone → merge base → Pi resolves → push (same branch). */
|
|
14
|
-
export async function handleConflictResolver(job, opts = {}) {
|
|
15
|
-
const { signal } = opts;
|
|
16
|
-
const trace = { jobId: job.jobId, repo: `${job.repo.owner}/${job.repo.name}`, branch: job.branch };
|
|
17
|
-
return withWorkspace('conflict', async (dir) => {
|
|
18
|
-
log.info('conflict: cloning PR branch (full history)', trace);
|
|
19
|
-
// Full clone so the merge base + `origin/<base>` are present for the merge.
|
|
20
|
-
await cloneRepo({
|
|
21
|
-
repo: { ...job.repo, baseBranch: job.branch },
|
|
22
|
-
ghToken: job.ghToken,
|
|
23
|
-
dir,
|
|
24
|
-
signal,
|
|
25
|
-
full: true,
|
|
26
|
-
});
|
|
27
|
-
const prTip = await headCommit(dir, signal);
|
|
28
|
-
log.info('conflict: merging base into PR branch', { ...trace, base: job.repo.baseBranch });
|
|
29
|
-
const clean = await mergeBranch(dir, job.repo.baseBranch, signal);
|
|
30
|
-
// No conflicts to resolve. If base brought new commits the merge advanced the
|
|
31
|
-
// branch, so push it; otherwise the branch is already up to date — a no-op we
|
|
32
|
-
// leave alone (re-dispatching it never changes the PR, so a gate that keeps
|
|
33
|
-
// seeing GitHub report this branch as "conflicting" is a base-resolution problem,
|
|
34
|
-
// not the agent's — logged here so that loop is diagnosable).
|
|
35
|
-
if (clean) {
|
|
36
|
-
if ((await headCommit(dir, signal)) === prTip) {
|
|
37
|
-
log.info('conflict: base merged clean and branch already up to date — nothing to push', {
|
|
38
|
-
...trace,
|
|
39
|
-
base: job.repo.baseBranch,
|
|
40
|
-
});
|
|
41
|
-
return {
|
|
42
|
-
resolved: true,
|
|
43
|
-
summary: 'No conflicts: the branch is already up to date with its base.',
|
|
44
|
-
stats: { toolCalls: 0, assistantChars: 0 },
|
|
45
|
-
};
|
|
46
|
-
}
|
|
47
|
-
log.info('conflict: base merged clean — pushing the merge commit', trace);
|
|
48
|
-
await pushBranch(dir, job.branch, job.ghToken, signal);
|
|
49
|
-
return {
|
|
50
|
-
resolved: true,
|
|
51
|
-
summary: 'Merged the base in cleanly (no conflicts to resolve).',
|
|
52
|
-
stats: { toolCalls: 0, assistantChars: 0 },
|
|
53
|
-
};
|
|
54
|
-
}
|
|
55
|
-
// The merge left conflicts in the working tree. Surface the EXACT files + hunks
|
|
56
|
-
// to the agent: the generic task prompt alone never told it which files conflict
|
|
57
|
-
// (or even that there were conflicts), so it would drift onto the original feature
|
|
58
|
-
// task. Lead with the conflict; keep the task only as trailing reference.
|
|
59
|
-
const conflicted = await unmergedPaths(dir, signal);
|
|
60
|
-
log.info('conflict: resolving conflicts with agent', { ...trace, conflicted });
|
|
61
|
-
const diff = await conflictDiff(dir, conflicted, signal);
|
|
62
|
-
const userPrompt = buildConflictPrompt(job.repo.baseBranch, job.branch, conflicted, diff, job.userPrompt);
|
|
63
|
-
const { summary, stats, stderrTail, usage } = await runAgentInWorkspace({
|
|
64
|
-
dir,
|
|
65
|
-
systemPrompt: job.systemPrompt,
|
|
66
|
-
userPrompt,
|
|
67
|
-
model: job.model,
|
|
68
|
-
harness: job.harness,
|
|
69
|
-
subscriptionToken: job.subscriptionToken,
|
|
70
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
71
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
72
|
-
sessionToken: job.sessionToken,
|
|
73
|
-
}, opts);
|
|
74
|
-
// Never push a half-resolved tree: if any conflict markers / unmerged paths
|
|
75
|
-
// remain, the PR would still be broken. Fail so the engine can retry / notify.
|
|
76
|
-
const unresolved = await unmergedPaths(dir, signal);
|
|
77
|
-
if (unresolved.length > 0) {
|
|
78
|
-
log.error('conflict: unresolved conflicts remain — refusing to push', {
|
|
79
|
-
...trace,
|
|
80
|
-
unresolved: unresolved.length,
|
|
81
|
-
});
|
|
82
|
-
return {
|
|
83
|
-
resolved: false,
|
|
84
|
-
summary,
|
|
85
|
-
stats,
|
|
86
|
-
error: unresolvedReason(unresolved, stats, stderrTail),
|
|
87
|
-
...(usage ? { usage } : {}),
|
|
88
|
-
};
|
|
89
|
-
}
|
|
90
|
-
// Complete the merge commit with the agent's resolution staged, then push.
|
|
91
|
-
await commitAll(dir, `Merge ${job.repo.baseBranch} into ${job.branch}`, signal);
|
|
92
|
-
log.info('conflict: pushing resolved branch', { ...trace, ...stats });
|
|
93
|
-
await pushBranch(dir, job.branch, job.ghToken, signal);
|
|
94
|
-
return { resolved: true, summary, stats, ...(usage ? { usage } : {}) };
|
|
95
|
-
});
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* The conflict-focused user prompt: lead with the exact conflicted files and their
|
|
99
|
-
* hunks (so the model acts on the real conflict, not the original feature task), then
|
|
100
|
-
* carry the task only as trailing reference. The role/system prompt frames it as a
|
|
101
|
-
* merge-conflict resolution; this gives it the concrete material.
|
|
102
|
-
*/
|
|
103
|
-
function buildConflictPrompt(baseBranch, prBranch, conflicted, diff, taskReference) {
|
|
104
|
-
const fileList = conflicted.map((p) => `- ${p}`).join('\n');
|
|
105
|
-
const parts = [
|
|
106
|
-
`The base branch \`${baseBranch}\` was merged into this pull-request branch ` +
|
|
107
|
-
`\`${prBranch}\` and left Git merge conflicts in the following ${conflicted.length} ` +
|
|
108
|
-
`file(s):`,
|
|
109
|
-
'',
|
|
110
|
-
fileList,
|
|
111
|
-
'',
|
|
112
|
-
'Resolve EVERY conflict in these files: open each one, understand both sides of each ' +
|
|
113
|
-
'`<<<<<<<` / `=======` / `>>>>>>>` region, and edit it to a correct result that ' +
|
|
114
|
-
"preserves the intent of BOTH the base changes and this PR's changes — never just " +
|
|
115
|
-
'discard one side. Remove every conflict marker and leave the project building. Do ' +
|
|
116
|
-
'not create a new branch or PR; the harness completes the merge commit and pushes once ' +
|
|
117
|
-
'no conflict markers remain.',
|
|
118
|
-
'',
|
|
119
|
-
'Conflict hunks (`git diff` of the conflicted files):',
|
|
120
|
-
'',
|
|
121
|
-
'```diff',
|
|
122
|
-
diff,
|
|
123
|
-
'```',
|
|
124
|
-
];
|
|
125
|
-
const ref = taskReference.trim();
|
|
126
|
-
if (ref) {
|
|
127
|
-
parts.push('', 'For reference, the task this pull request implements:', '', ref);
|
|
128
|
-
}
|
|
129
|
-
return parts.join('\n');
|
|
130
|
-
}
|
|
131
|
-
/** Human-readable reason the agent failed to fully resolve the conflicts. */
|
|
132
|
-
function unresolvedReason(unresolved, stats, stderrTail) {
|
|
133
|
-
const cause = agentNeverActed(stats) ? NEVER_ACTED_CAUSE : '';
|
|
134
|
-
const sample = unresolved.slice(0, 10).join(', ');
|
|
135
|
-
return (`The agent did not resolve all merge conflicts ` +
|
|
136
|
-
`(${unresolved.length} file(s) still conflicted: ${sample}).${cause}` +
|
|
137
|
-
agentOutputTail(stderrTail));
|
|
138
|
-
}
|
package/dist/explore.js
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
import { join } from 'node:path';
|
|
2
|
-
import { mkdir } from 'node:fs/promises';
|
|
3
|
-
import { cloneRepo } from './git.js';
|
|
4
|
-
import { agentNeverActed, agentOutputTail, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
|
|
5
|
-
import { log } from './logger.js';
|
|
6
|
-
// The shared read-only container agent: clone a branch, run Pi to EXPLORE the
|
|
7
|
-
// checkout (read-only), and return its prose report/proposal. Both the architect
|
|
8
|
-
// (proposes a design after reading the code) and the tech-debt analysis agent use
|
|
9
|
-
// this one path. Unlike the coding agents (`/run`, `/ci-fix`) it pushes nothing and
|
|
10
|
-
// opens no PR, and — like the merger — it makes no edits, so an edit-free run is the
|
|
11
|
-
// expected, correct outcome rather than a "no changes" failure. The only failure
|
|
12
|
-
// mode is producing no text at all (the agent never reached the model).
|
|
13
|
-
/** Run one read-only exploration job end to end: clone branch → Pi explores → return prose. */
|
|
14
|
-
export async function handleExplore(job, opts = {}) {
|
|
15
|
-
const trace = {
|
|
16
|
-
jobId: job.jobId,
|
|
17
|
-
kind: job.label ?? 'explore',
|
|
18
|
-
repo: `${job.repo.owner}/${job.repo.name}`,
|
|
19
|
-
branch: job.branch,
|
|
20
|
-
};
|
|
21
|
-
return withWorkspace(job.label ?? 'explore', async (dir) => {
|
|
22
|
-
log.info('explore: cloning', trace);
|
|
23
|
-
await cloneRepo({
|
|
24
|
-
repo: { ...job.repo, baseBranch: job.branch },
|
|
25
|
-
ghToken: job.ghToken,
|
|
26
|
-
dir,
|
|
27
|
-
signal: opts.signal,
|
|
28
|
-
});
|
|
29
|
-
// In a monorepo the service lives in a subdirectory: run Pi with its cwd set
|
|
30
|
-
// there (created if missing, mirroring the coding agent) so a service-scoped
|
|
31
|
-
// exploration sees the right subtree.
|
|
32
|
-
const serviceDirectory = job.repo.serviceDirectory;
|
|
33
|
-
const workDir = serviceDirectory ? join(dir, serviceDirectory) : dir;
|
|
34
|
-
if (serviceDirectory)
|
|
35
|
-
await mkdir(workDir, { recursive: true });
|
|
36
|
-
log.info('explore: running agent', { ...trace, serviceDirectory });
|
|
37
|
-
const { summary, stats, stderrTail, usage } = await runAgentInWorkspace({
|
|
38
|
-
dir: workDir,
|
|
39
|
-
systemPrompt: job.systemPrompt,
|
|
40
|
-
userPrompt: job.userPrompt,
|
|
41
|
-
model: job.model,
|
|
42
|
-
harness: job.harness,
|
|
43
|
-
subscriptionToken: job.subscriptionToken,
|
|
44
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
45
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
46
|
-
sessionToken: job.sessionToken,
|
|
47
|
-
serviceDirectory,
|
|
48
|
-
// Read-only: it inspects and reports, making no edits — so the no-progress
|
|
49
|
-
// guard's no-edit bound must not fire on its legitimately edit-free run.
|
|
50
|
-
expectsEdits: false,
|
|
51
|
-
webToolsGuidance: job.webToolsGuidance,
|
|
52
|
-
webSearchProxy: job.webSearch,
|
|
53
|
-
}, opts);
|
|
54
|
-
// The prose report IS the deliverable; an edit-free run is success. The only
|
|
55
|
-
// failure is producing no text at all (the signature of never reaching the model).
|
|
56
|
-
if (!summary.trim()) {
|
|
57
|
-
return {
|
|
58
|
-
summary,
|
|
59
|
-
stats,
|
|
60
|
-
error: noOutputReason(stats, stderrTail),
|
|
61
|
-
...(usage ? { usage } : {}),
|
|
62
|
-
};
|
|
63
|
-
}
|
|
64
|
-
log.info('explore: done', { ...trace, ...stats });
|
|
65
|
-
return { summary, stats, ...(usage ? { usage } : {}) };
|
|
66
|
-
});
|
|
67
|
-
}
|
|
68
|
-
/** Human-readable reason a read-only run produced no usable output. */
|
|
69
|
-
function noOutputReason(stats, stderrTail) {
|
|
70
|
-
const cause = agentNeverActed(stats)
|
|
71
|
-
? ' (the agent never acted — it most likely could not reach the model)'
|
|
72
|
-
: '';
|
|
73
|
-
return `Read-only agent produced no report${cause}.${agentOutputTail(stderrTail)}`;
|
|
74
|
-
}
|
package/dist/fixer.js
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import { noChangesReason, runCodingAgent } from './coding-agent.js';
|
|
2
|
-
// Async job execution for the test Fixer. When a Tester withholds its greenlight the
|
|
3
|
-
// engine dispatches this: clone the PR HEAD branch, run Pi to fix the concerns in the
|
|
4
|
-
// Tester's report (folded into the user prompt by the backend), then commit + push
|
|
5
|
-
// back onto the SAME branch (no new branch, no new PR) so the Tester can re-run. The
|
|
6
|
-
// engine re-dispatches the Tester after the push and loops up to the attempt budget.
|
|
7
|
-
//
|
|
8
|
-
// The clone/Pi/push mechanics are shared with implementation + the CI-fixer via
|
|
9
|
-
// runCodingAgent; the Fixer only differs in working ON the existing PR branch.
|
|
10
|
-
/** Run one Fixer job end to end: clone branch → Pi fixes → push (same branch). */
|
|
11
|
-
export async function handleFixer(job, opts = {}) {
|
|
12
|
-
const { summary, stats, stderrTail, pushed, usage } = await runCodingAgent({
|
|
13
|
-
kind: 'fix-tests',
|
|
14
|
-
jobId: job.jobId,
|
|
15
|
-
repo: job.repo,
|
|
16
|
-
// Work directly on the PR head branch — no new branch, no new PR.
|
|
17
|
-
cloneBranch: job.branch,
|
|
18
|
-
pushBranch: job.branch,
|
|
19
|
-
ghToken: job.ghToken,
|
|
20
|
-
systemPrompt: job.systemPrompt,
|
|
21
|
-
userPrompt: job.userPrompt,
|
|
22
|
-
model: job.model,
|
|
23
|
-
harness: job.harness,
|
|
24
|
-
subscriptionToken: job.subscriptionToken,
|
|
25
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
26
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
27
|
-
sessionToken: job.sessionToken,
|
|
28
|
-
commitMessage: 'Fix issues found by the tester',
|
|
29
|
-
webToolsGuidance: job.webToolsGuidance,
|
|
30
|
-
webSearchProxy: job.webSearch,
|
|
31
|
-
}, opts);
|
|
32
|
-
// Not an error: the engine re-runs the Tester regardless. Report `pushed: false`
|
|
33
|
-
// so the (unused) result is still meaningful.
|
|
34
|
-
if (!pushed) {
|
|
35
|
-
return {
|
|
36
|
-
pushed: false,
|
|
37
|
-
summary,
|
|
38
|
-
stats,
|
|
39
|
-
error: noChangesReason('No test fix produced', stats, stderrTail),
|
|
40
|
-
...(usage ? { usage } : {}),
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
return { pushed: true, summary, stats, ...(usage ? { usage } : {}) };
|
|
44
|
-
}
|
package/dist/merger.js
DELETED
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
import { cloneRepo, hasDiffAgainstBase } from './git.js';
|
|
2
|
-
import { extractJsonObject } from './blueprint.js';
|
|
3
|
-
import { agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
|
|
4
|
-
import { diagnosticsSuffix, resolveStructuredOutput, } from './structured-output.js';
|
|
5
|
-
import { log } from './logger.js';
|
|
6
|
-
/** Compact description of the merge-assessment shape, fed to the JSON repair call. */
|
|
7
|
-
const ASSESSMENT_SHAPE_HINT = 'Expected a merge assessment: {"complexity": number 0..1, "risk": number 0..1, ' +
|
|
8
|
-
'"impact": number 0..1, "rationale": string}.';
|
|
9
|
-
/** Clamp a value to a 0..1 number, defaulting to `fallback` when not finite. */
|
|
10
|
-
function clamp01(value, fallback) {
|
|
11
|
-
const n = typeof value === 'number' ? value : Number(value);
|
|
12
|
-
if (!Number.isFinite(n))
|
|
13
|
-
return fallback;
|
|
14
|
-
return Math.min(1, Math.max(0, n));
|
|
15
|
-
}
|
|
16
|
-
/**
|
|
17
|
-
* Coerce the agent's JSON into a well-formed assessment. Missing/garbage scores
|
|
18
|
-
* default to a CONSERVATIVE 1 (treat as severe → routes to human review rather
|
|
19
|
-
* than a silent auto-merge); the rationale falls back to the raw summary.
|
|
20
|
-
*/
|
|
21
|
-
function coerceAssessment(raw, summary) {
|
|
22
|
-
const o = (typeof raw === 'object' && raw !== null ? raw : {});
|
|
23
|
-
return {
|
|
24
|
-
complexity: clamp01(o.complexity, 1),
|
|
25
|
-
risk: clamp01(o.risk, 1),
|
|
26
|
-
impact: clamp01(o.impact, 1),
|
|
27
|
-
rationale: typeof o.rationale === 'string' && o.rationale ? o.rationale : summary.slice(0, 2000),
|
|
28
|
-
};
|
|
29
|
-
}
|
|
30
|
-
/** Build the merger task prompt: assess the PR branch against the base. */
|
|
31
|
-
function buildUserPrompt(job) {
|
|
32
|
-
const pr = job.prNumber !== undefined ? ` (PR #${job.prNumber})` : '';
|
|
33
|
-
return [
|
|
34
|
-
job.instructions,
|
|
35
|
-
'',
|
|
36
|
-
`The pull request${pr} is on branch \`${job.branch}\`; the base branch is ` +
|
|
37
|
-
`\`${job.repo.baseBranch}\`. Inspect the change (e.g. \`git fetch origin ${job.repo.baseBranch}\` ` +
|
|
38
|
-
`then \`git diff origin/${job.repo.baseBranch}...HEAD\`) and score complexity, risk and impact.`,
|
|
39
|
-
'',
|
|
40
|
-
'Respond with ONLY a JSON object {"complexity":0.0,"risk":0.0,"impact":0.0,"rationale":"…"}.',
|
|
41
|
-
].join('\n');
|
|
42
|
-
}
|
|
43
|
-
/** Run one merger job end to end: clone branch → Pi assesses → return scores (no commit). */
|
|
44
|
-
export async function handleMerger(job, opts = {}) {
|
|
45
|
-
const trace = { jobId: job.jobId, repo: `${job.repo.owner}/${job.repo.name}`, branch: job.branch };
|
|
46
|
-
return withWorkspace('merge', async (dir) => {
|
|
47
|
-
log.info('merge: cloning PR branch', trace);
|
|
48
|
-
await cloneRepo({
|
|
49
|
-
repo: { ...job.repo, baseBranch: job.branch },
|
|
50
|
-
ghToken: job.ghToken,
|
|
51
|
-
dir,
|
|
52
|
-
// Full clone: scoring the PR means diffing it against the base, which needs the
|
|
53
|
-
// base branch's remote-tracking ref (origin/<base>) AND the merge base present. A
|
|
54
|
-
// shallow single-branch clone has neither, so `git diff origin/<base>...HEAD` fails
|
|
55
|
-
// with "branch not found" and the agent is left to GUESS scores (it tends to emit
|
|
56
|
-
// 0/0/0, which then auto-merges). The full clone gives the merger a real diff.
|
|
57
|
-
full: true,
|
|
58
|
-
signal: opts.signal,
|
|
59
|
-
});
|
|
60
|
-
// Guard the auto-merge path: confirm a real diff against the base is examinable. If
|
|
61
|
-
// it is not (missing base ref / empty diff), the agent's scores can't be trusted —
|
|
62
|
-
// we force a CONSERVATIVE assessment below so the engine routes to human review
|
|
63
|
-
// rather than auto-merging on bogus low scores.
|
|
64
|
-
const diffExaminable = await hasDiffAgainstBase(dir, job.repo.baseBranch, opts.signal);
|
|
65
|
-
if (!diffExaminable) {
|
|
66
|
-
log.warn('merge: no examinable diff against base; will assess conservatively', trace);
|
|
67
|
-
}
|
|
68
|
-
log.info('merge: running agent', trace);
|
|
69
|
-
const { summary, stats, stderrTail, usage } = await runAgentInWorkspace({
|
|
70
|
-
dir,
|
|
71
|
-
systemPrompt: job.systemPrompt,
|
|
72
|
-
userPrompt: buildUserPrompt(job),
|
|
73
|
-
model: job.model,
|
|
74
|
-
harness: job.harness,
|
|
75
|
-
subscriptionToken: job.subscriptionToken,
|
|
76
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
77
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
78
|
-
sessionToken: job.sessionToken,
|
|
79
|
-
// The merger only assesses (no commits/edits), so the no-edit guard must
|
|
80
|
-
// not fire on its legitimately edit-free run.
|
|
81
|
-
expectsEdits: false,
|
|
82
|
-
}, opts);
|
|
83
|
-
// Parse the agent's assessment; on a malformed reply, make ONE structured repair
|
|
84
|
-
// call (see structured-output) before giving up. `coerceAssessment` only yields
|
|
85
|
-
// null when no JSON object could be extracted at all (it defaults conservatively
|
|
86
|
-
// otherwise), so a usable-but-vague reply still routes to human review as before.
|
|
87
|
-
const { value: assessment, diagnostics } = await resolveStructuredOutput({
|
|
88
|
-
label: 'merger',
|
|
89
|
-
shapeHint: ASSESSMENT_SHAPE_HINT,
|
|
90
|
-
parse: (text) => coerceAssessment(extractJsonObject(text), text),
|
|
91
|
-
}, summary, {
|
|
92
|
-
harness: job.harness,
|
|
93
|
-
subscriptionToken: job.subscriptionToken,
|
|
94
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
95
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
96
|
-
sessionToken: job.sessionToken,
|
|
97
|
-
model: job.model,
|
|
98
|
-
jobId: job.jobId,
|
|
99
|
-
signal: opts.signal,
|
|
100
|
-
});
|
|
101
|
-
if (!assessment) {
|
|
102
|
-
return {
|
|
103
|
-
summary,
|
|
104
|
-
stats,
|
|
105
|
-
error: noAssessmentReason(stats, stderrTail, diagnostics),
|
|
106
|
-
...(usage ? { usage } : {}),
|
|
107
|
-
};
|
|
108
|
-
}
|
|
109
|
-
// The agent could not actually examine the change: its scores are not trustworthy
|
|
110
|
-
// (a failed diff retrieval typically yields a bogus 0/0/0 that would auto-merge).
|
|
111
|
-
// Return a CONSERVATIVE assessment (max on every axis) so the engine's threshold
|
|
112
|
-
// check fails and the PR is routed to a human merge review instead.
|
|
113
|
-
if (!diffExaminable) {
|
|
114
|
-
const conservative = {
|
|
115
|
-
complexity: 1,
|
|
116
|
-
risk: 1,
|
|
117
|
-
impact: 1,
|
|
118
|
-
rationale: `Could not examine a real diff of \`${job.branch}\` against \`${job.repo.baseBranch}\` ` +
|
|
119
|
-
`(the base ref was missing or the diff was empty), so this PR was NOT auto-assessed ` +
|
|
120
|
-
`and needs a human merge review.`,
|
|
121
|
-
};
|
|
122
|
-
log.info('merge: assessed conservatively (no examinable diff)', { ...trace, ...conservative });
|
|
123
|
-
return { assessment: conservative, summary, stats, ...(usage ? { usage } : {}) };
|
|
124
|
-
}
|
|
125
|
-
log.info('merge: assessed', { ...trace, ...assessment });
|
|
126
|
-
return { assessment, summary, stats, ...(usage ? { usage } : {}) };
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
|
-
/** Human-readable reason a merger run produced no usable assessment. */
|
|
130
|
-
function noAssessmentReason(stats, stderrTail, diagnostics) {
|
|
131
|
-
const cause = agentNeverActed(stats)
|
|
132
|
-
? NEVER_ACTED_CAUSE
|
|
133
|
-
: ' The agent did not return a parseable JSON assessment.';
|
|
134
|
-
return `Merger produced no assessment.${cause}${diagnostics ? diagnosticsSuffix(diagnostics) : ''}${agentOutputTail(stderrTail)}`;
|
|
135
|
-
}
|
package/dist/on-call.js
DELETED
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
import { cloneRepo } from './git.js';
|
|
2
|
-
import { extractJsonObject } from './blueprint.js';
|
|
3
|
-
import { agentNeverActed, agentOutputTail, NEVER_ACTED_CAUSE, runAgentInWorkspace, withWorkspace, } from './pi-workspace.js';
|
|
4
|
-
import { diagnosticsSuffix, resolveStructuredOutput, } from './structured-output.js';
|
|
5
|
-
import { log } from './logger.js';
|
|
6
|
-
// Async job execution for the on-call agent. The engine dispatches this when the
|
|
7
|
-
// post-release-health gate detects a Datadog regression. The released PR has already
|
|
8
|
-
// merged and its work branch was deleted, so we clone the BASE branch (which contains
|
|
9
|
-
// the merged change), have Pi locate the merged commit (via the PR number / the
|
|
10
|
-
// now-historical head branch) and correlate its diff with the regression evidence
|
|
11
|
-
// (handed in via the user prompt), then return ONLY a JSON assessment of whether THIS
|
|
12
|
-
// change is the likely culprit. The on-call agent makes NO commits and reverts nothing —
|
|
13
|
-
// the engine raises a `release_regression` notification carrying this assessment.
|
|
14
|
-
const ASSESSMENT_SHAPE_HINT = 'Expected an on-call assessment: {"culpritConfidence": number 0..1, "recommendation": ' +
|
|
15
|
-
'"revert"|"hold"|"monitor", "rationale": string, "evidence": string[]}.';
|
|
16
|
-
function clamp01(value, fallback) {
|
|
17
|
-
const n = typeof value === 'number' ? value : Number(value);
|
|
18
|
-
if (!Number.isFinite(n))
|
|
19
|
-
return fallback;
|
|
20
|
-
return Math.min(1, Math.max(0, n));
|
|
21
|
-
}
|
|
22
|
-
function coerceRecommendation(value) {
|
|
23
|
-
return value === 'revert' || value === 'monitor' ? value : 'hold';
|
|
24
|
-
}
|
|
25
|
-
/**
|
|
26
|
-
* Coerce the agent's JSON into a well-formed assessment. A missing confidence defaults
|
|
27
|
-
* to a CONSERVATIVE 0 (don't imply the PR is at fault without evidence); a missing
|
|
28
|
-
* recommendation defaults to `hold` (human decides). Returns null only when no JSON
|
|
29
|
-
* object could be extracted at all.
|
|
30
|
-
*/
|
|
31
|
-
function coerceAssessment(raw, summary) {
|
|
32
|
-
if (typeof raw !== 'object' || raw === null)
|
|
33
|
-
return null;
|
|
34
|
-
const o = raw;
|
|
35
|
-
const evidence = Array.isArray(o.evidence)
|
|
36
|
-
? o.evidence.filter((e) => typeof e === 'string')
|
|
37
|
-
: [];
|
|
38
|
-
return {
|
|
39
|
-
culpritConfidence: clamp01(o.culpritConfidence, 0),
|
|
40
|
-
recommendation: coerceRecommendation(o.recommendation),
|
|
41
|
-
rationale: typeof o.rationale === 'string' && o.rationale ? o.rationale : summary.slice(0, 2000),
|
|
42
|
-
evidence,
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
function buildUserPrompt(job) {
|
|
46
|
-
const pr = job.prNumber !== undefined ? `#${job.prNumber}` : '';
|
|
47
|
-
// The PR has already merged into the base branch and its work branch was deleted, so the
|
|
48
|
-
// checkout is the base branch. Point the agent at how to find the merged commit.
|
|
49
|
-
const locate = job.prNumber
|
|
50
|
-
? `It merged as a commit referencing ${pr} — find it with ` +
|
|
51
|
-
`\`git log --oneline -n 50\` (squash/merge commits include \`(${pr})\`; a merge commit ` +
|
|
52
|
-
`mentions \`#${job.prNumber}\`), then inspect it with \`git show <sha>\`.`
|
|
53
|
-
: job.headBranch
|
|
54
|
-
? `Its work branch was \`${job.headBranch}\` (now deleted) — find the merged commit in ` +
|
|
55
|
-
`\`git log --oneline -n 50\` and inspect it with \`git show <sha>\`.`
|
|
56
|
-
: `Find the most recent merge/feature commit with \`git log --oneline -n 50\` and inspect ` +
|
|
57
|
-
`it with \`git show <sha>\`.`;
|
|
58
|
-
return [
|
|
59
|
-
job.userPrompt,
|
|
60
|
-
'',
|
|
61
|
-
`You are on the base branch \`${job.repo.baseBranch}\`, which already contains the released ` +
|
|
62
|
-
`pull request ${pr}. ${locate} Correlate that change with the regression evidence above. ` +
|
|
63
|
-
`Beware correlation vs causation.`,
|
|
64
|
-
'',
|
|
65
|
-
'Respond with ONLY a JSON object {"culpritConfidence":0.0,"recommendation":"revert"|"hold"|"monitor","rationale":"…","evidence":["…"]}.',
|
|
66
|
-
].join('\n');
|
|
67
|
-
}
|
|
68
|
-
/** Run one on-call job: clone branch → Pi investigates → return the assessment (no commit). */
|
|
69
|
-
export async function handleOnCall(job, opts = {}) {
|
|
70
|
-
const trace = { jobId: job.jobId, repo: `${job.repo.owner}/${job.repo.name}`, branch: job.branch };
|
|
71
|
-
return withWorkspace('on-call', async (dir) => {
|
|
72
|
-
log.info('on-call: cloning base branch', trace);
|
|
73
|
-
await cloneRepo({
|
|
74
|
-
repo: { ...job.repo, baseBranch: job.branch },
|
|
75
|
-
ghToken: job.ghToken,
|
|
76
|
-
dir,
|
|
77
|
-
// Full clone so the agent has the history to locate + diff the merged commit.
|
|
78
|
-
full: true,
|
|
79
|
-
signal: opts.signal,
|
|
80
|
-
});
|
|
81
|
-
log.info('on-call: running agent', trace);
|
|
82
|
-
const { summary, stats, stderrTail, usage } = await runAgentInWorkspace({
|
|
83
|
-
dir,
|
|
84
|
-
systemPrompt: job.systemPrompt,
|
|
85
|
-
userPrompt: buildUserPrompt(job),
|
|
86
|
-
model: job.model,
|
|
87
|
-
harness: job.harness,
|
|
88
|
-
subscriptionToken: job.subscriptionToken,
|
|
89
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
90
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
91
|
-
sessionToken: job.sessionToken,
|
|
92
|
-
// Investigation only — no commits/edits, so the no-edit guard must not fire.
|
|
93
|
-
expectsEdits: false,
|
|
94
|
-
}, opts);
|
|
95
|
-
const { value: assessment, diagnostics } = await resolveStructuredOutput({
|
|
96
|
-
label: 'on-call',
|
|
97
|
-
shapeHint: ASSESSMENT_SHAPE_HINT,
|
|
98
|
-
parse: (text) => coerceAssessment(extractJsonObject(text), text),
|
|
99
|
-
}, summary, {
|
|
100
|
-
harness: job.harness,
|
|
101
|
-
subscriptionToken: job.subscriptionToken,
|
|
102
|
-
subscriptionBaseUrl: job.subscriptionBaseUrl,
|
|
103
|
-
proxyBaseUrl: job.proxyBaseUrl,
|
|
104
|
-
sessionToken: job.sessionToken,
|
|
105
|
-
model: job.model,
|
|
106
|
-
jobId: job.jobId,
|
|
107
|
-
signal: opts.signal,
|
|
108
|
-
});
|
|
109
|
-
if (!assessment) {
|
|
110
|
-
return {
|
|
111
|
-
summary,
|
|
112
|
-
stats,
|
|
113
|
-
error: noAssessmentReason(stats, stderrTail, diagnostics),
|
|
114
|
-
...(usage ? { usage } : {}),
|
|
115
|
-
};
|
|
116
|
-
}
|
|
117
|
-
log.info('on-call: assessed', { ...trace, ...assessment });
|
|
118
|
-
return { onCallAssessment: assessment, summary, stats, ...(usage ? { usage } : {}) };
|
|
119
|
-
});
|
|
120
|
-
}
|
|
121
|
-
function noAssessmentReason(stats, stderrTail, diagnostics) {
|
|
122
|
-
const cause = agentNeverActed(stats)
|
|
123
|
-
? NEVER_ACTED_CAUSE
|
|
124
|
-
: ' The agent did not return a parseable JSON assessment.';
|
|
125
|
-
return `On-call produced no assessment.${cause}${diagnostics ? diagnosticsSuffix(diagnostics) : ''}${agentOutputTail(stderrTail)}`;
|
|
126
|
-
}
|