@growthub/cli 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/api/workspace/helper/apply/route.js +307 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/api/workspace/helper/query/route.js +372 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/api/workspace/helper/receipts/route.js +47 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/data-model/components/DataModelShell.jsx +664 -82
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/data-model/components/HelperSidecar.jsx +1371 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/globals.css +1383 -24
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/settings/integrations/page.jsx +7 -21
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/settings/ownership/ownership-panel.jsx +222 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/settings/ownership/page.jsx +19 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/settings/settings-shell.jsx +2 -1
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/workspace-builder.jsx +116 -24
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/app/workspace-rail.jsx +497 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/growthub.config.json +20 -4
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/lib/adapters/sandboxes/default-local-intelligence.js +19 -4
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/lib/workspace-data-model.js +23 -5
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/lib/workspace-helper-apply.js +473 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/lib/workspace-helper.js +583 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/package-lock.json +34 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/apps/workspace/package.json +3 -1
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/helpers/export-training-traces.mjs +144 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/helpers/grade-raw-pairs.mjs +279 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/helpers/harvest-cursor-traces.mjs +288 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/helpers/upload-graded-traces.mjs +128 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/kit.json +19 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/templates/seeded-configs/alignment-loop.config.json +264 -0
- package/assets/worker-kits/growthub-custom-workspace-starter-v1/workers/custom-workspace-operator/CLAUDE.md +38 -0
- package/dist/index.js +1416 -2627
- package/package.json +1 -1
package/assets/worker-kits/growthub-custom-workspace-starter-v1/helpers/export-training-traces.mjs
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* helpers/export-training-traces.mjs — Distillation Pipeline V1, Phase 3
|
|
4
|
+
*
|
|
5
|
+
* Reads `training-traces.rows` from the live workspace, filters rows where
|
|
6
|
+
* qualityScore >= --min-score AND exported == "false", emits an Unsloth-ready
|
|
7
|
+
* JSONL of {instruction, input, output} on disk, then PATCHes the same rows
|
|
8
|
+
* with exported = "true" so they are not re-exported on the next run.
|
|
9
|
+
*
|
|
10
|
+
* Output format (one JSON object per line):
|
|
11
|
+
* {"instruction": "<system + task>", "input": "<user prompt>", "output": "<agent output>"}
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* node helpers/export-training-traces.mjs \
|
|
15
|
+
* --workspace http://localhost:3000 \
|
|
16
|
+
* --traces-object training-traces \
|
|
17
|
+
* --min-score 4 \
|
|
18
|
+
* --out ./antonio/distillation/unsloth-batch-001.jsonl \
|
|
19
|
+
* --instruction "You are growthub-local-expert. Respect AWaC V2 invariants and the PATCH allowlist." \
|
|
20
|
+
* [--dry-run]
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import fs from "node:fs";
|
|
24
|
+
import path from "node:path";
|
|
25
|
+
|
|
26
|
+
function parseArgs(argv) {
|
|
27
|
+
const a = {
|
|
28
|
+
workspace: "http://localhost:3000",
|
|
29
|
+
tracesObject: "training-traces",
|
|
30
|
+
minScore: 4,
|
|
31
|
+
out: "",
|
|
32
|
+
instruction: "You are growthub-local-expert. Respect AWaC V2 invariants and the PATCH allowlist.",
|
|
33
|
+
dryRun: false,
|
|
34
|
+
};
|
|
35
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
36
|
+
const t = argv[i];
|
|
37
|
+
const next = () => String(argv[++i] || "").trim();
|
|
38
|
+
if (t === "--workspace") a.workspace = next().replace(/\/+$/, "");
|
|
39
|
+
else if (t === "--traces-object") a.tracesObject = next();
|
|
40
|
+
else if (t === "--min-score") a.minScore = Number(next()) || 4;
|
|
41
|
+
else if (t === "--out") a.out = next();
|
|
42
|
+
else if (t === "--instruction") a.instruction = next();
|
|
43
|
+
else if (t === "--dry-run") a.dryRun = true;
|
|
44
|
+
else if (t === "--help" || t === "-h") {
|
|
45
|
+
process.stdout.write(
|
|
46
|
+
"Usage: export-training-traces.mjs [--workspace URL] [--traces-object id] [--min-score N] --out <path> [--instruction TEXT] [--dry-run]\n",
|
|
47
|
+
);
|
|
48
|
+
process.exit(0);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (!a.out) {
|
|
52
|
+
process.stderr.write("error: --out is required\n");
|
|
53
|
+
process.exit(2);
|
|
54
|
+
}
|
|
55
|
+
return a;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const args = parseArgs(process.argv.slice(2));
|
|
59
|
+
const outAbs = path.resolve(args.out);
|
|
60
|
+
fs.mkdirSync(path.dirname(outAbs), { recursive: true });
|
|
61
|
+
|
|
62
|
+
async function getObjects() {
|
|
63
|
+
const r = await fetch(`${args.workspace}/api/workspace`, { cache: "no-store" });
|
|
64
|
+
if (!r.ok) throw new Error(`GET /api/workspace ${r.status}`);
|
|
65
|
+
return (await r.json()).workspaceConfig.dataModel.objects;
|
|
66
|
+
}
|
|
67
|
+
async function patchObjects(objects) {
|
|
68
|
+
const r = await fetch(`${args.workspace}/api/workspace`, {
|
|
69
|
+
method: "PATCH",
|
|
70
|
+
headers: { "content-type": "application/json" },
|
|
71
|
+
body: JSON.stringify({ dataModel: { objects } }),
|
|
72
|
+
});
|
|
73
|
+
if (!r.ok) throw new Error(`PATCH ${r.status}: ${(await r.text()).slice(0, 300)}`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const objects = await getObjects();
|
|
77
|
+
const tracesIdx = objects.findIndex((o) => o.id === args.tracesObject);
|
|
78
|
+
if (tracesIdx < 0) {
|
|
79
|
+
process.stderr.write(`error: object ${args.tracesObject} not found in workspace\n`);
|
|
80
|
+
process.exit(3);
|
|
81
|
+
}
|
|
82
|
+
const tracesObj = objects[tracesIdx];
|
|
83
|
+
const allRows = Array.isArray(tracesObj.rows) ? tracesObj.rows : [];
|
|
84
|
+
|
|
85
|
+
const eligible = allRows
|
|
86
|
+
.map((row, idx) => ({ row, idx }))
|
|
87
|
+
.filter(({ row }) =>
|
|
88
|
+
Number(row.qualityScore) >= args.minScore &&
|
|
89
|
+
String(row.exported || "false").toLowerCase() !== "true" &&
|
|
90
|
+
String(row.inputPrompt || "").trim() &&
|
|
91
|
+
String(row.agentOutput || "").trim(),
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
if (eligible.length === 0) {
|
|
95
|
+
process.stdout.write(
|
|
96
|
+
JSON.stringify(
|
|
97
|
+
{
|
|
98
|
+
ok: true,
|
|
99
|
+
out: outAbs,
|
|
100
|
+
eligible: 0,
|
|
101
|
+
exported: 0,
|
|
102
|
+
totalRows: allRows.length,
|
|
103
|
+
reason: "no rows match score >= min-score AND exported == false",
|
|
104
|
+
},
|
|
105
|
+
null,
|
|
106
|
+
2,
|
|
107
|
+
) + "\n",
|
|
108
|
+
);
|
|
109
|
+
process.exit(0);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const outStream = fs.createWriteStream(outAbs, { encoding: "utf8" });
|
|
113
|
+
for (const { row } of eligible) {
|
|
114
|
+
const sample = {
|
|
115
|
+
instruction: args.instruction,
|
|
116
|
+
input: String(row.inputPrompt),
|
|
117
|
+
output: String(row.agentOutput),
|
|
118
|
+
};
|
|
119
|
+
outStream.write(`${JSON.stringify(sample)}\n`);
|
|
120
|
+
}
|
|
121
|
+
await new Promise((r) => outStream.end(r));
|
|
122
|
+
|
|
123
|
+
if (!args.dryRun) {
|
|
124
|
+
const eligibleIdx = new Set(eligible.map((e) => e.idx));
|
|
125
|
+
const updatedRows = allRows.map((row, i) => (eligibleIdx.has(i) ? { ...row, exported: "true" } : row));
|
|
126
|
+
const nextObjects = objects.map((o, i) => (i !== tracesIdx ? o : { ...o, rows: updatedRows }));
|
|
127
|
+
await patchObjects(nextObjects);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
process.stdout.write(
|
|
131
|
+
JSON.stringify(
|
|
132
|
+
{
|
|
133
|
+
ok: true,
|
|
134
|
+
out: outAbs,
|
|
135
|
+
totalRows: allRows.length,
|
|
136
|
+
eligible: eligible.length,
|
|
137
|
+
exported: args.dryRun ? 0 : eligible.length,
|
|
138
|
+
dryRun: args.dryRun,
|
|
139
|
+
format: "unsloth-jsonl-v1 ({instruction, input, output})",
|
|
140
|
+
},
|
|
141
|
+
null,
|
|
142
|
+
2,
|
|
143
|
+
) + "\n",
|
|
144
|
+
);
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* helpers/grade-raw-pairs.mjs — Distillation Pipeline V1, Phase 2
|
|
4
|
+
*
|
|
5
|
+
* Grades pairs from `raw-pairs.jsonl` (Phase 1 output) by routing each one
|
|
6
|
+
* through the live `critic-grader` sandbox row (local-intelligence /
|
|
7
|
+
* gemma3:4b). The script never bypasses the workspace API: it PATCHes the
|
|
8
|
+
* critic row's `command`, calls `POST /api/workspace/sandbox-run`, then
|
|
9
|
+
* parses the strict-JSON `{score, reason}` envelope the grader returns.
|
|
10
|
+
*
|
|
11
|
+
* Quality boost: pairs whose `mergedToMain === true` (Phase 1 ground-truth
|
|
12
|
+
* signal that the work was squash-merged on `main`) get a floor of 4.
|
|
13
|
+
*
|
|
14
|
+
* Output: newline-delimited JSON with the original pair fields plus
|
|
15
|
+
* - `qualityScore` 1-5 (string for downstream parity with training-traces)
|
|
16
|
+
* - `qualityReason` one-sentence rationale from the grader
|
|
17
|
+
* - `criticRunMs` latency
|
|
18
|
+
* - `criticRunId` run id for traceability
|
|
19
|
+
* - `gradedAt` ISO timestamp
|
|
20
|
+
* - `boostedByMerge` true if mergedToMain forced the floor
|
|
21
|
+
*
|
|
22
|
+
* Streams to disk after each pair so partial progress survives a kill.
|
|
23
|
+
*
|
|
24
|
+
* Usage:
|
|
25
|
+
* node helpers/grade-raw-pairs.mjs \
|
|
26
|
+
* --in ./antonio/distillation/raw-pairs.jsonl \
|
|
27
|
+
* --out ./antonio/distillation/graded-batch-001.jsonl \
|
|
28
|
+
* --workspace http://localhost:3000 \
|
|
29
|
+
* --grader-row critic-grader \
|
|
30
|
+
* --sandbox-object sandboxes-alignment-loop \
|
|
31
|
+
* --limit 20 \
|
|
32
|
+
* --offset 0 \
|
|
33
|
+
* --max-input-chars 6000 # cap pair text to keep grader prompts safe
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import fs from "node:fs";
|
|
37
|
+
import path from "node:path";
|
|
38
|
+
|
|
39
|
+
function parseArgs(argv) {
|
|
40
|
+
const a = {
|
|
41
|
+
in: "",
|
|
42
|
+
out: "",
|
|
43
|
+
workspace: "http://localhost:3000",
|
|
44
|
+
graderRow: "critic-grader",
|
|
45
|
+
sandboxObject: "sandboxes-alignment-loop",
|
|
46
|
+
limit: 20,
|
|
47
|
+
offset: 0,
|
|
48
|
+
maxInputChars: 6000,
|
|
49
|
+
mergedOnly: false,
|
|
50
|
+
};
|
|
51
|
+
for (let i = 0; i < argv.length; i += 1) {
|
|
52
|
+
const t = argv[i];
|
|
53
|
+
const next = () => String(argv[++i] || "").trim();
|
|
54
|
+
if (t === "--in") a.in = next();
|
|
55
|
+
else if (t === "--out") a.out = next();
|
|
56
|
+
else if (t === "--workspace") a.workspace = next().replace(/\/+$/, "");
|
|
57
|
+
else if (t === "--grader-row") a.graderRow = next();
|
|
58
|
+
else if (t === "--sandbox-object") a.sandboxObject = next();
|
|
59
|
+
else if (t === "--limit") a.limit = Number(next()) || 20;
|
|
60
|
+
else if (t === "--offset") a.offset = Number(next()) || 0;
|
|
61
|
+
else if (t === "--max-input-chars") a.maxInputChars = Number(next()) || 6000;
|
|
62
|
+
else if (t === "--merged-only") a.mergedOnly = true;
|
|
63
|
+
else if (t === "--help" || t === "-h") {
|
|
64
|
+
process.stdout.write(
|
|
65
|
+
"Usage: grade-raw-pairs.mjs --in <raw-pairs.jsonl> --out <graded.jsonl> [--workspace URL] [--grader-row name] [--sandbox-object id] [--limit N] [--offset N] [--max-input-chars N] [--merged-only]\n",
|
|
66
|
+
);
|
|
67
|
+
process.exit(0);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (!a.in || !a.out) {
|
|
71
|
+
process.stderr.write("error: --in and --out are required\n");
|
|
72
|
+
process.exit(2);
|
|
73
|
+
}
|
|
74
|
+
return a;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const args = parseArgs(process.argv.slice(2));
|
|
78
|
+
|
|
79
|
+
async function getWorkspaceObjects() {
|
|
80
|
+
const r = await fetch(`${args.workspace}/api/workspace`, { cache: "no-store" });
|
|
81
|
+
if (!r.ok) throw new Error(`GET /api/workspace ${r.status}`);
|
|
82
|
+
const j = await r.json();
|
|
83
|
+
return j.workspaceConfig.dataModel.objects;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async function patchObjects(objects) {
|
|
87
|
+
const r = await fetch(`${args.workspace}/api/workspace`, {
|
|
88
|
+
method: "PATCH",
|
|
89
|
+
headers: { "content-type": "application/json" },
|
|
90
|
+
body: JSON.stringify({ dataModel: { objects } }),
|
|
91
|
+
});
|
|
92
|
+
if (!r.ok) {
|
|
93
|
+
const t = await r.text();
|
|
94
|
+
throw new Error(`PATCH /api/workspace ${r.status}: ${t.slice(0, 300)}`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async function runGraderSandbox() {
|
|
99
|
+
const r = await fetch(`${args.workspace}/api/workspace/sandbox-run`, {
|
|
100
|
+
method: "POST",
|
|
101
|
+
headers: { "content-type": "application/json" },
|
|
102
|
+
body: JSON.stringify({ objectId: args.sandboxObject, name: args.graderRow }),
|
|
103
|
+
});
|
|
104
|
+
return r.json();
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function setRowCommand(objects, rowName, command) {
|
|
108
|
+
return objects.map((obj) => {
|
|
109
|
+
if (obj.id !== args.sandboxObject) return obj;
|
|
110
|
+
return {
|
|
111
|
+
...obj,
|
|
112
|
+
rows: (obj.rows || []).map((row) => (row.Name === rowName ? { ...row, command } : row)),
|
|
113
|
+
};
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function parseScoreFromGraderEnvelope(stdout) {
|
|
118
|
+
if (!stdout) return null;
|
|
119
|
+
try {
|
|
120
|
+
const env = JSON.parse(stdout);
|
|
121
|
+
if (env?.result?.json && typeof env.result.json.score === "number") {
|
|
122
|
+
return { score: env.result.json.score, reason: String(env.result.json.reason || "") };
|
|
123
|
+
}
|
|
124
|
+
if (typeof env?.rawText === "string") {
|
|
125
|
+
const outer = JSON.parse(env.rawText);
|
|
126
|
+
const content = outer?.choices?.[0]?.message?.content;
|
|
127
|
+
if (typeof content === "string") {
|
|
128
|
+
const inner = JSON.parse(content);
|
|
129
|
+
if (typeof inner?.score === "number") {
|
|
130
|
+
return { score: inner.score, reason: String(inner.reason || "") };
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
} catch {
|
|
135
|
+
// fall through
|
|
136
|
+
}
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function buildGraderPrompt(pair, maxChars) {
|
|
141
|
+
const promptHead = pair.inputPrompt.slice(0, Math.floor(maxChars / 3));
|
|
142
|
+
const outputHead = pair.agentOutput.slice(0, maxChars - promptHead.length - 800);
|
|
143
|
+
const lines = [
|
|
144
|
+
"You are critic-grader for AWaC V2. Score this user→assistant pair from a Cursor session on the growthub-local repo.",
|
|
145
|
+
"Criteria:",
|
|
146
|
+
" 1) Clear understanding of the user request",
|
|
147
|
+
" 2) Used appropriate tools / primitives",
|
|
148
|
+
" 3) Respects AWaC V2 invariants (PATCH allowlist, no secret leaks, no protected-boundary edits)",
|
|
149
|
+
" 4) Output is correct and actionable",
|
|
150
|
+
" 5) Production-quality (would survive code review on this repo)",
|
|
151
|
+
"Return ONLY strict JSON: {\"score\": <1-5 integer>, \"reason\": \"one short sentence\"}.",
|
|
152
|
+
"",
|
|
153
|
+
"USER PROMPT (truncated):",
|
|
154
|
+
promptHead,
|
|
155
|
+
"",
|
|
156
|
+
"ASSISTANT RESPONSE (truncated):",
|
|
157
|
+
outputHead,
|
|
158
|
+
];
|
|
159
|
+
return lines.join("\n");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// ---------- read pairs ----------
|
|
163
|
+
const inAbs = path.resolve(args.in);
|
|
164
|
+
const outAbs = path.resolve(args.out);
|
|
165
|
+
fs.mkdirSync(path.dirname(outAbs), { recursive: true });
|
|
166
|
+
|
|
167
|
+
const allLines = fs.readFileSync(inAbs, "utf8").split("\n").filter(Boolean);
|
|
168
|
+
let pool = allLines;
|
|
169
|
+
if (args.mergedOnly) {
|
|
170
|
+
pool = allLines.filter((ln) => {
|
|
171
|
+
try {
|
|
172
|
+
return JSON.parse(ln).mergedToMain === true;
|
|
173
|
+
} catch {
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
const slice = pool.slice(args.offset, args.offset + args.limit);
|
|
179
|
+
|
|
180
|
+
process.stdout.write(
|
|
181
|
+
`[grade] in=${path.basename(inAbs)} totalLines=${allLines.length} pool=${pool.length}${args.mergedOnly ? " (mergedOnly)" : ""} offset=${args.offset} batch=${slice.length} -> ${path.basename(outAbs)}\n`,
|
|
182
|
+
);
|
|
183
|
+
|
|
184
|
+
const outStream = fs.createWriteStream(outAbs, { encoding: "utf8" });
|
|
185
|
+
const summary = {
|
|
186
|
+
graded: 0,
|
|
187
|
+
parseFailures: 0,
|
|
188
|
+
scoreCounts: { 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 0: 0 },
|
|
189
|
+
boostedByMerge: 0,
|
|
190
|
+
scoreSum: 0,
|
|
191
|
+
startedAt: new Date().toISOString(),
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
for (let i = 0; i < slice.length; i += 1) {
|
|
195
|
+
let pair;
|
|
196
|
+
try {
|
|
197
|
+
pair = JSON.parse(slice[i]);
|
|
198
|
+
} catch {
|
|
199
|
+
process.stderr.write(`[grade] skip line ${i}: not JSON\n`);
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const command = buildGraderPrompt(pair, args.maxInputChars);
|
|
204
|
+
let parsedScore = null;
|
|
205
|
+
let runMs = 0;
|
|
206
|
+
let runId = "";
|
|
207
|
+
let boosted = false;
|
|
208
|
+
|
|
209
|
+
try {
|
|
210
|
+
const objects = await getWorkspaceObjects();
|
|
211
|
+
await patchObjects(setRowCommand(objects, args.graderRow, command));
|
|
212
|
+
const startedAt = Date.now();
|
|
213
|
+
const run = await runGraderSandbox();
|
|
214
|
+
runMs = Date.now() - startedAt;
|
|
215
|
+
runId = run?.runId || "";
|
|
216
|
+
parsedScore = parseScoreFromGraderEnvelope(run?.response?.stdout);
|
|
217
|
+
} catch (e) {
|
|
218
|
+
process.stderr.write(`[grade] pair ${i + args.offset} sandbox-run error: ${e.message}\n`);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (!parsedScore) {
|
|
222
|
+
summary.parseFailures += 1;
|
|
223
|
+
parsedScore = { score: 0, reason: "grader did not return parseable JSON" };
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Apply mergedToMain floor=4 boost
|
|
227
|
+
if (pair.mergedToMain === true && parsedScore.score < 4) {
|
|
228
|
+
boosted = true;
|
|
229
|
+
summary.boostedByMerge += 1;
|
|
230
|
+
parsedScore = { score: 4, reason: `[boosted by squash-merge to main; original: ${parsedScore.score} - ${parsedScore.reason}]` };
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
summary.graded += 1;
|
|
234
|
+
summary.scoreCounts[parsedScore.score] = (summary.scoreCounts[parsedScore.score] || 0) + 1;
|
|
235
|
+
summary.scoreSum += parsedScore.score;
|
|
236
|
+
|
|
237
|
+
const out = {
|
|
238
|
+
...pair,
|
|
239
|
+
qualityScore: String(parsedScore.score),
|
|
240
|
+
qualityReason: parsedScore.reason,
|
|
241
|
+
criticRunMs: runMs,
|
|
242
|
+
criticRunId: runId,
|
|
243
|
+
boostedByMerge: boosted,
|
|
244
|
+
gradedAt: new Date().toISOString(),
|
|
245
|
+
};
|
|
246
|
+
outStream.write(`${JSON.stringify(out)}\n`);
|
|
247
|
+
|
|
248
|
+
process.stdout.write(
|
|
249
|
+
`[grade] ${i + 1}/${slice.length} session=${pair.sessionId.slice(0, 8)} pair=${pair.pairIndex} score=${out.qualityScore}${boosted ? "*" : ""} ms=${runMs}\n`,
|
|
250
|
+
);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
await new Promise((r) => outStream.end(r));
|
|
254
|
+
|
|
255
|
+
const avg = summary.graded ? (summary.scoreSum / summary.graded).toFixed(2) : "0.00";
|
|
256
|
+
const highCount = (summary.scoreCounts[4] || 0) + (summary.scoreCounts[5] || 0);
|
|
257
|
+
const finishedAt = new Date().toISOString();
|
|
258
|
+
|
|
259
|
+
process.stdout.write(
|
|
260
|
+
"\n" +
|
|
261
|
+
JSON.stringify(
|
|
262
|
+
{
|
|
263
|
+
ok: true,
|
|
264
|
+
out: outAbs,
|
|
265
|
+
startedAt: summary.startedAt,
|
|
266
|
+
finishedAt,
|
|
267
|
+
graded: summary.graded,
|
|
268
|
+
parseFailures: summary.parseFailures,
|
|
269
|
+
boostedByMerge: summary.boostedByMerge,
|
|
270
|
+
averageScore: Number(avg),
|
|
271
|
+
scoreCounts: summary.scoreCounts,
|
|
272
|
+
highQualityCount: highCount,
|
|
273
|
+
highQualityRatio: summary.graded ? Number((highCount / summary.graded).toFixed(3)) : 0,
|
|
274
|
+
},
|
|
275
|
+
null,
|
|
276
|
+
2,
|
|
277
|
+
) +
|
|
278
|
+
"\n",
|
|
279
|
+
);
|