@tangle-network/agent-eval 0.16.2 → 0.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/dist/index.d.ts +14 -115
- package/dist/index.js +92 -254
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -3337,12 +3337,12 @@ var SubprocessSandboxDriver = class {
|
|
|
3337
3337
|
this.defaultEnv = options.env;
|
|
3338
3338
|
}
|
|
3339
3339
|
async exec(phase, command, config) {
|
|
3340
|
-
const { spawn
|
|
3340
|
+
const { spawn } = await import("child_process");
|
|
3341
3341
|
const start = Date.now();
|
|
3342
3342
|
const effectiveCwd = config.cwd ?? this.defaultCwd;
|
|
3343
3343
|
const effectiveEnv = { ...process.env, ...this.defaultEnv ?? {}, ...config.env ?? {} };
|
|
3344
3344
|
return await new Promise((resolve) => {
|
|
3345
|
-
const child =
|
|
3345
|
+
const child = spawn(command, {
|
|
3346
3346
|
shell: true,
|
|
3347
3347
|
cwd: effectiveCwd,
|
|
3348
3348
|
env: effectiveEnv
|
|
@@ -8578,20 +8578,20 @@ function mergeLayerResults(name, perAdapter, options = {}) {
|
|
|
8578
8578
|
let durationMs = 0;
|
|
8579
8579
|
const reasonParts = [];
|
|
8580
8580
|
const diagnostics = {};
|
|
8581
|
-
for (const { adapter:
|
|
8581
|
+
for (const { adapter: adapter2, result } of perAdapter) {
|
|
8582
8582
|
status = worst(status, result.status);
|
|
8583
8583
|
if (typeof result.score === "number") {
|
|
8584
8584
|
weightedScoreSum += result.score;
|
|
8585
8585
|
weightCount += 1;
|
|
8586
8586
|
}
|
|
8587
8587
|
durationMs = mergeDuration === "sum" ? durationMs + result.durationMs : Math.max(durationMs, result.durationMs);
|
|
8588
|
-
reasonParts.push(`${
|
|
8588
|
+
reasonParts.push(`${adapter2}: ${result.status}`);
|
|
8589
8589
|
for (const f of result.findings) {
|
|
8590
8590
|
findings.push({
|
|
8591
8591
|
...f,
|
|
8592
8592
|
layer: name,
|
|
8593
|
-
message: prefix ? `${prefix(
|
|
8594
|
-
detail: { ...f.detail ?? {}, adapter:
|
|
8593
|
+
message: prefix ? `${prefix(adapter2)} ${f.message}` : f.message,
|
|
8594
|
+
detail: { ...f.detail ?? {}, adapter: adapter2 }
|
|
8595
8595
|
});
|
|
8596
8596
|
}
|
|
8597
8597
|
for (const [k, v] of Object.entries(result.diagnostics ?? {})) {
|
|
@@ -8610,8 +8610,8 @@ function mergeLayerResults(name, perAdapter, options = {}) {
|
|
|
8610
8610
|
reason: reasonParts.join(" \xB7 "),
|
|
8611
8611
|
diagnostics: Object.keys(diagnostics).length > 0 ? diagnostics : void 0,
|
|
8612
8612
|
detail: {
|
|
8613
|
-
adapters: perAdapter.map(({ adapter:
|
|
8614
|
-
adapter:
|
|
8613
|
+
adapters: perAdapter.map(({ adapter: adapter2, result }) => ({
|
|
8614
|
+
adapter: adapter2,
|
|
8615
8615
|
status: result.status,
|
|
8616
8616
|
score: result.score ?? null
|
|
8617
8617
|
})),
|
|
@@ -8637,10 +8637,10 @@ function multiToolchainLayer(config) {
|
|
|
8637
8637
|
reason: "no adapters detected"
|
|
8638
8638
|
};
|
|
8639
8639
|
}
|
|
8640
|
-
const runOne = async (
|
|
8641
|
-
const adapterName = config.adapterName(
|
|
8640
|
+
const runOne = async (adapter2) => {
|
|
8641
|
+
const adapterName = config.adapterName(adapter2);
|
|
8642
8642
|
try {
|
|
8643
|
-
const r = await config.run(
|
|
8643
|
+
const r = await config.run(adapter2, ctx);
|
|
8644
8644
|
return { adapter: adapterName, result: r };
|
|
8645
8645
|
} catch (err) {
|
|
8646
8646
|
return {
|
|
@@ -10076,8 +10076,8 @@ function formatPct(value) {
|
|
|
10076
10076
|
function bySplitOrder(a, b) {
|
|
10077
10077
|
return ALL_SPLITS.indexOf(a) - ALL_SPLITS.indexOf(b);
|
|
10078
10078
|
}
|
|
10079
|
-
function runAdapter(
|
|
10080
|
-
return typeof
|
|
10079
|
+
function runAdapter(adapter2, scenario, context) {
|
|
10080
|
+
return typeof adapter2 === "function" ? adapter2(scenario, context) : adapter2.run(scenario, context);
|
|
10081
10081
|
}
|
|
10082
10082
|
function throwIfAborted(signal) {
|
|
10083
10083
|
if (!signal?.aborted) return;
|
|
@@ -10968,232 +10968,18 @@ var benchmarks_exports = {};
|
|
|
10968
10968
|
__export(benchmarks_exports, {
|
|
10969
10969
|
BENCHMARK_SPLIT_SEED: () => BENCHMARK_SPLIT_SEED,
|
|
10970
10970
|
deterministicSplit: () => deterministicSplit,
|
|
10971
|
-
|
|
10972
|
-
routing: () => routing_exports,
|
|
10973
|
-
swebenchLite: () => swebench_lite_exports
|
|
10971
|
+
routing: () => routing_exports
|
|
10974
10972
|
});
|
|
10975
10973
|
|
|
10976
|
-
// src/benchmarks/gsm8k/index.ts
|
|
10977
|
-
var gsm8k_exports = {};
|
|
10978
|
-
__export(gsm8k_exports, {
|
|
10979
|
-
Gsm8kAdapter: () => Gsm8kAdapter,
|
|
10980
|
-
assignSplit: () => assignSplit,
|
|
10981
|
-
evaluate: () => evaluate,
|
|
10982
|
-
loadDataset: () => loadDataset,
|
|
10983
|
-
parseGsm8kAnswer: () => parseGsm8kAnswer
|
|
10984
|
-
});
|
|
10985
|
-
import { existsSync as existsSync5, readFileSync as readFileSync5 } from "fs";
|
|
10986
|
-
var Gsm8kAdapter = class {
|
|
10987
|
-
async loadDataset(split) {
|
|
10988
|
-
const path = process.env.AGENT_EVAL_GSM8K_PATH;
|
|
10989
|
-
if (!path) {
|
|
10990
|
-
throw new Error(
|
|
10991
|
-
"GSM8K dataset not provided. Set AGENT_EVAL_GSM8K_PATH to a JSONL file with {id, question, answer} records (the HF GSM8K mirror converted to JSONL)."
|
|
10992
|
-
);
|
|
10993
|
-
}
|
|
10994
|
-
if (!existsSync5(path)) {
|
|
10995
|
-
throw new Error(`AGENT_EVAL_GSM8K_PATH=${path} does not exist`);
|
|
10996
|
-
}
|
|
10997
|
-
const items = parseJsonl(path).filter((it) => assignSplitImpl(it.id) === split);
|
|
10998
|
-
return items;
|
|
10999
|
-
}
|
|
11000
|
-
async evaluate(item, response) {
|
|
11001
|
-
const expected = parseGsm8kAnswer(item.payload.answer);
|
|
11002
|
-
const observed = parseGsm8kAnswer(response);
|
|
11003
|
-
if (expected === null) {
|
|
11004
|
-
return { score: 0, raw: { reason: "reference_not_numeric", expected: item.payload.answer } };
|
|
11005
|
-
}
|
|
11006
|
-
if (observed === null) {
|
|
11007
|
-
return { score: 0, raw: { reason: "no_numeric_in_response", expected, observed: null } };
|
|
11008
|
-
}
|
|
11009
|
-
const ok = Math.abs(expected - observed) < 1e-6;
|
|
11010
|
-
return { score: ok ? 1 : 0, raw: { expected, observed, exactMatch: ok } };
|
|
11011
|
-
}
|
|
11012
|
-
assignSplit(itemId) {
|
|
11013
|
-
return assignSplitImpl(itemId);
|
|
11014
|
-
}
|
|
11015
|
-
};
|
|
11016
|
-
function assignSplitImpl(itemId) {
|
|
11017
|
-
return deterministicSplit(`gsm8k::${itemId}`);
|
|
11018
|
-
}
|
|
11019
|
-
function parseJsonl(path) {
|
|
11020
|
-
const raw = readFileSync5(path, "utf8");
|
|
11021
|
-
const out = [];
|
|
11022
|
-
let lineNo = 0;
|
|
11023
|
-
for (const line of raw.split("\n")) {
|
|
11024
|
-
lineNo++;
|
|
11025
|
-
const trimmed = line.trim();
|
|
11026
|
-
if (!trimmed) continue;
|
|
11027
|
-
let row;
|
|
11028
|
-
try {
|
|
11029
|
-
row = JSON.parse(trimmed);
|
|
11030
|
-
} catch (e) {
|
|
11031
|
-
throw new Error(`GSM8K JSONL parse error at line ${lineNo}: ${e.message}`);
|
|
11032
|
-
}
|
|
11033
|
-
const id = String(row.id ?? `gsm8k_${lineNo}`);
|
|
11034
|
-
const question = String(row.question ?? "");
|
|
11035
|
-
const answer = String(row.answer ?? "");
|
|
11036
|
-
if (!question || !answer) {
|
|
11037
|
-
throw new Error(`GSM8K JSONL line ${lineNo} missing question/answer`);
|
|
11038
|
-
}
|
|
11039
|
-
out.push({ id, payload: { question, answer } });
|
|
11040
|
-
}
|
|
11041
|
-
return out;
|
|
11042
|
-
}
|
|
11043
|
-
function parseGsm8kAnswer(text) {
|
|
11044
|
-
if (!text) return null;
|
|
11045
|
-
const afterMarker = text.match(/####\s*(-?\d[\d,]*\.?\d*)/);
|
|
11046
|
-
if (afterMarker) {
|
|
11047
|
-
const cleaned2 = afterMarker[1].replace(/,/g, "");
|
|
11048
|
-
const v2 = Number(cleaned2);
|
|
11049
|
-
if (Number.isFinite(v2)) return v2;
|
|
11050
|
-
}
|
|
11051
|
-
const matches2 = text.match(/-?\d[\d,]*\.?\d*/g);
|
|
11052
|
-
if (!matches2 || matches2.length === 0) return null;
|
|
11053
|
-
const last = matches2[matches2.length - 1];
|
|
11054
|
-
const cleaned = last.replace(/,/g, "");
|
|
11055
|
-
const v = Number(cleaned);
|
|
11056
|
-
return Number.isFinite(v) ? v : null;
|
|
11057
|
-
}
|
|
11058
|
-
var adapter = new Gsm8kAdapter();
|
|
11059
|
-
var loadDataset = adapter.loadDataset.bind(adapter);
|
|
11060
|
-
var evaluate = adapter.evaluate.bind(adapter);
|
|
11061
|
-
var assignSplit = adapter.assignSplit.bind(adapter);
|
|
11062
|
-
|
|
11063
|
-
// src/benchmarks/swebench-lite/index.ts
|
|
11064
|
-
var swebench_lite_exports = {};
|
|
11065
|
-
__export(swebench_lite_exports, {
|
|
11066
|
-
SweBenchLiteAdapter: () => SweBenchLiteAdapter,
|
|
11067
|
-
assignSplit: () => assignSplit2,
|
|
11068
|
-
evaluate: () => evaluate2,
|
|
11069
|
-
loadDataset: () => loadDataset2
|
|
11070
|
-
});
|
|
11071
|
-
import { existsSync as existsSync6, readFileSync as readFileSync6 } from "fs";
|
|
11072
|
-
import { spawn } from "child_process";
|
|
11073
|
-
var SweBenchLiteAdapter = class {
|
|
11074
|
-
async loadDataset(split) {
|
|
11075
|
-
const path = process.env.AGENT_EVAL_SWEBENCH_PATH;
|
|
11076
|
-
if (!path) {
|
|
11077
|
-
throw new Error(
|
|
11078
|
-
"SWE-Bench Lite dataset not provided. Set AGENT_EVAL_SWEBENCH_PATH to a JSONL file with the 30 lite instances. STUB: this wrapper does not bundle the dataset; see https://www.swebench.com/lite.html for the canonical source."
|
|
11079
|
-
);
|
|
11080
|
-
}
|
|
11081
|
-
if (!existsSync6(path)) {
|
|
11082
|
-
throw new Error(`AGENT_EVAL_SWEBENCH_PATH=${path} does not exist`);
|
|
11083
|
-
}
|
|
11084
|
-
const all = parseJsonl2(path);
|
|
11085
|
-
return all.filter((it) => assignSplitImpl2(it.id) === split);
|
|
11086
|
-
}
|
|
11087
|
-
async evaluate(item, response) {
|
|
11088
|
-
const cmd = process.env.AGENT_EVAL_SWEBENCH_GRADER_CMD;
|
|
11089
|
-
if (!cmd) {
|
|
11090
|
-
throw new Error(
|
|
11091
|
-
"SWE-Bench Lite grader not configured. Set AGENT_EVAL_SWEBENCH_GRADER_CMD to an executable that reads {instance_id, patch} JSON on stdin and writes {passed, fail_to_pass_passed, pass_to_pass_passed, log} JSON on stdout. TODO(swebench-lite): bundle a default Docker-based runner once the SDK stabilises (https://github.com/swe-bench/SWE-bench)."
|
|
11092
|
-
);
|
|
11093
|
-
}
|
|
11094
|
-
const stdinPayload = JSON.stringify({ instance_id: item.payload.instanceId, patch: response });
|
|
11095
|
-
const result = await runGrader(cmd, stdinPayload);
|
|
11096
|
-
let parsed;
|
|
11097
|
-
try {
|
|
11098
|
-
parsed = JSON.parse(result.stdout);
|
|
11099
|
-
} catch (e) {
|
|
11100
|
-
throw new Error(
|
|
11101
|
-
`SWE-Bench grader emitted non-JSON stdout: ${e.message}
|
|
11102
|
-
stdout=${result.stdout.slice(0, 400)}
|
|
11103
|
-
stderr=${result.stderr.slice(0, 400)}`
|
|
11104
|
-
);
|
|
11105
|
-
}
|
|
11106
|
-
const passed = Boolean(parsed.passed);
|
|
11107
|
-
return {
|
|
11108
|
-
score: passed ? 1 : 0,
|
|
11109
|
-
raw: {
|
|
11110
|
-
passed,
|
|
11111
|
-
failToPassPassed: Boolean(parsed.fail_to_pass_passed),
|
|
11112
|
-
passToPassPassed: Boolean(parsed.pass_to_pass_passed),
|
|
11113
|
-
graderLog: typeof parsed.log === "string" ? parsed.log.slice(0, 4e3) : ""
|
|
11114
|
-
}
|
|
11115
|
-
};
|
|
11116
|
-
}
|
|
11117
|
-
assignSplit(itemId) {
|
|
11118
|
-
return assignSplitImpl2(itemId);
|
|
11119
|
-
}
|
|
11120
|
-
};
|
|
11121
|
-
function assignSplitImpl2(itemId) {
|
|
11122
|
-
return deterministicSplit(`swebench-lite::${itemId}`);
|
|
11123
|
-
}
|
|
11124
|
-
function parseJsonl2(path) {
|
|
11125
|
-
const raw = readFileSync6(path, "utf8");
|
|
11126
|
-
const out = [];
|
|
11127
|
-
let lineNo = 0;
|
|
11128
|
-
for (const line of raw.split("\n")) {
|
|
11129
|
-
lineNo++;
|
|
11130
|
-
const trimmed = line.trim();
|
|
11131
|
-
if (!trimmed) continue;
|
|
11132
|
-
const row = JSON.parse(trimmed);
|
|
11133
|
-
const instanceId = String(row.instance_id ?? row.instanceId ?? "");
|
|
11134
|
-
if (!instanceId) {
|
|
11135
|
-
throw new Error(`swebench-lite line ${lineNo} missing instance_id`);
|
|
11136
|
-
}
|
|
11137
|
-
out.push({
|
|
11138
|
-
id: instanceId,
|
|
11139
|
-
payload: {
|
|
11140
|
-
instanceId,
|
|
11141
|
-
problemStatement: String(row.problem_statement ?? row.problemStatement ?? ""),
|
|
11142
|
-
baseCommit: String(row.base_commit ?? row.baseCommit ?? ""),
|
|
11143
|
-
repo: String(row.repo ?? ""),
|
|
11144
|
-
failToPass: asStringArray(row.FAIL_TO_PASS ?? row.failToPass),
|
|
11145
|
-
passToPass: asStringArray(row.PASS_TO_PASS ?? row.passToPass)
|
|
11146
|
-
}
|
|
11147
|
-
});
|
|
11148
|
-
}
|
|
11149
|
-
return out;
|
|
11150
|
-
}
|
|
11151
|
-
function asStringArray(v) {
|
|
11152
|
-
if (Array.isArray(v)) return v.filter((x) => typeof x === "string");
|
|
11153
|
-
if (typeof v === "string") {
|
|
11154
|
-
try {
|
|
11155
|
-
const parsed = JSON.parse(v);
|
|
11156
|
-
if (Array.isArray(parsed)) return parsed.filter((x) => typeof x === "string");
|
|
11157
|
-
} catch {
|
|
11158
|
-
return [v];
|
|
11159
|
-
}
|
|
11160
|
-
}
|
|
11161
|
-
return [];
|
|
11162
|
-
}
|
|
11163
|
-
function runGrader(cmd, stdin) {
|
|
11164
|
-
return new Promise((resolve, reject) => {
|
|
11165
|
-
const parts = cmd.split(/\s+/);
|
|
11166
|
-
const child = spawn(parts[0], parts.slice(1), { stdio: ["pipe", "pipe", "pipe"] });
|
|
11167
|
-
let stdout = "";
|
|
11168
|
-
let stderr = "";
|
|
11169
|
-
child.stdout.on("data", (b) => stdout += b.toString("utf8"));
|
|
11170
|
-
child.stderr.on("data", (b) => stderr += b.toString("utf8"));
|
|
11171
|
-
child.on("error", reject);
|
|
11172
|
-
child.on("close", (code) => {
|
|
11173
|
-
if (code !== 0) {
|
|
11174
|
-
reject(new Error(`grader exited with code ${code}: ${stderr.slice(0, 400)}`));
|
|
11175
|
-
return;
|
|
11176
|
-
}
|
|
11177
|
-
resolve({ stdout, stderr });
|
|
11178
|
-
});
|
|
11179
|
-
child.stdin.write(stdin);
|
|
11180
|
-
child.stdin.end();
|
|
11181
|
-
});
|
|
11182
|
-
}
|
|
11183
|
-
var adapter2 = new SweBenchLiteAdapter();
|
|
11184
|
-
var loadDataset2 = adapter2.loadDataset.bind(adapter2);
|
|
11185
|
-
var evaluate2 = adapter2.evaluate.bind(adapter2);
|
|
11186
|
-
var assignSplit2 = adapter2.assignSplit.bind(adapter2);
|
|
11187
|
-
|
|
11188
10974
|
// src/benchmarks/routing/index.ts
|
|
11189
10975
|
var routing_exports = {};
|
|
11190
10976
|
__export(routing_exports, {
|
|
11191
10977
|
ROUTING_DATASET: () => ROUTING_DATASET,
|
|
11192
10978
|
RoutingAdapter: () => RoutingAdapter,
|
|
11193
|
-
assignSplit: () =>
|
|
11194
|
-
evaluate: () =>
|
|
10979
|
+
assignSplit: () => assignSplit,
|
|
10980
|
+
evaluate: () => evaluate,
|
|
11195
10981
|
extractRouteTokens: () => extractRouteTokens,
|
|
11196
|
-
loadDataset: () =>
|
|
10982
|
+
loadDataset: () => loadDataset
|
|
11197
10983
|
});
|
|
11198
10984
|
|
|
11199
10985
|
// src/benchmarks/routing/dataset.ts
|
|
@@ -11331,7 +11117,7 @@ var ROUTING_DATASET = [
|
|
|
11331
11117
|
// src/benchmarks/routing/index.ts
|
|
11332
11118
|
var RoutingAdapter = class {
|
|
11333
11119
|
async loadDataset(split) {
|
|
11334
|
-
return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter((it) =>
|
|
11120
|
+
return ROUTING_DATASET.map((item) => ({ id: item.id, payload: item })).filter((it) => assignSplitImpl(it.id) === split);
|
|
11335
11121
|
}
|
|
11336
11122
|
async evaluate(item, response) {
|
|
11337
11123
|
const tokens2 = extractRouteTokens(response);
|
|
@@ -11352,20 +11138,20 @@ var RoutingAdapter = class {
|
|
|
11352
11138
|
};
|
|
11353
11139
|
}
|
|
11354
11140
|
assignSplit(itemId) {
|
|
11355
|
-
return
|
|
11141
|
+
return assignSplitImpl(itemId);
|
|
11356
11142
|
}
|
|
11357
11143
|
};
|
|
11358
|
-
function
|
|
11144
|
+
function assignSplitImpl(itemId) {
|
|
11359
11145
|
return deterministicSplit(`routing::${itemId}`);
|
|
11360
11146
|
}
|
|
11361
11147
|
function extractRouteTokens(response) {
|
|
11362
11148
|
const matches2 = response.match(/[a-z][a-z0-9_]*\.[a-z][a-z0-9_]*/gi);
|
|
11363
11149
|
return matches2 ?? [];
|
|
11364
11150
|
}
|
|
11365
|
-
var
|
|
11366
|
-
var
|
|
11367
|
-
var
|
|
11368
|
-
var
|
|
11151
|
+
var adapter = new RoutingAdapter();
|
|
11152
|
+
var loadDataset = adapter.loadDataset.bind(adapter);
|
|
11153
|
+
var evaluate = adapter.evaluate.bind(adapter);
|
|
11154
|
+
var assignSplit = adapter.assignSplit.bind(adapter);
|
|
11369
11155
|
|
|
11370
11156
|
// src/reference-replay-steering.ts
|
|
11371
11157
|
function referenceReplayRunsToSteeringRows(runs, options = {}) {
|
|
@@ -11632,11 +11418,11 @@ function samePopulation(a, b) {
|
|
|
11632
11418
|
}
|
|
11633
11419
|
|
|
11634
11420
|
// src/jsonl-trial-cache.ts
|
|
11635
|
-
import { appendFileSync as appendFileSync4, existsSync as
|
|
11421
|
+
import { appendFileSync as appendFileSync4, existsSync as existsSync6, mkdirSync as mkdirSync4, readFileSync as readFileSync5 } from "fs";
|
|
11636
11422
|
import { dirname as dirname4 } from "path";
|
|
11637
11423
|
|
|
11638
11424
|
// src/locked-jsonl-appender.ts
|
|
11639
|
-
import { appendFileSync as appendFileSync3, existsSync as
|
|
11425
|
+
import { appendFileSync as appendFileSync3, existsSync as existsSync5, mkdirSync as mkdirSync3 } from "fs";
|
|
11640
11426
|
import { dirname as dirname3 } from "path";
|
|
11641
11427
|
var mutexes = /* @__PURE__ */ new Map();
|
|
11642
11428
|
function getMutex(path) {
|
|
@@ -11651,7 +11437,7 @@ var LockedJsonlAppender = class {
|
|
|
11651
11437
|
constructor(path) {
|
|
11652
11438
|
this.path = path;
|
|
11653
11439
|
this.mutex = getMutex(path);
|
|
11654
|
-
if (!
|
|
11440
|
+
if (!existsSync5(dirname3(path))) {
|
|
11655
11441
|
mkdirSync3(dirname3(path), { recursive: true });
|
|
11656
11442
|
}
|
|
11657
11443
|
}
|
|
@@ -11676,8 +11462,8 @@ var JsonlTrialCache = class {
|
|
|
11676
11462
|
appender;
|
|
11677
11463
|
constructor(path) {
|
|
11678
11464
|
this.path = path;
|
|
11679
|
-
if (
|
|
11680
|
-
for (const line of
|
|
11465
|
+
if (existsSync6(path)) {
|
|
11466
|
+
for (const line of readFileSync5(path, "utf-8").split("\n")) {
|
|
11681
11467
|
if (!line.trim()) continue;
|
|
11682
11468
|
try {
|
|
11683
11469
|
const entry = JSON.parse(line);
|
|
@@ -11715,7 +11501,7 @@ var JsonlTrialCache = class {
|
|
|
11715
11501
|
};
|
|
11716
11502
|
|
|
11717
11503
|
// src/evolution-telemetry.ts
|
|
11718
|
-
import { appendFileSync as appendFileSync5, existsSync as
|
|
11504
|
+
import { appendFileSync as appendFileSync5, existsSync as existsSync7, mkdirSync as mkdirSync5, readFileSync as readFileSync6, writeFileSync } from "fs";
|
|
11719
11505
|
import { dirname as dirname5 } from "path";
|
|
11720
11506
|
var MutationTelemetry = class {
|
|
11721
11507
|
appender;
|
|
@@ -11746,16 +11532,16 @@ var LineageRecorder = class {
|
|
|
11746
11532
|
this.snapshotPath = `${path}.snapshot`;
|
|
11747
11533
|
this.kindOf = kindOf ?? defaultKindOf;
|
|
11748
11534
|
mkdirSync5(dirname5(path), { recursive: true });
|
|
11749
|
-
if (
|
|
11535
|
+
if (existsSync7(this.snapshotPath)) {
|
|
11750
11536
|
try {
|
|
11751
|
-
const parsed = JSON.parse(
|
|
11537
|
+
const parsed = JSON.parse(readFileSync6(this.snapshotPath, "utf-8"));
|
|
11752
11538
|
for (const n of parsed) this.nodes.set(n.id, n);
|
|
11753
11539
|
} catch {
|
|
11754
11540
|
}
|
|
11755
11541
|
}
|
|
11756
|
-
if (
|
|
11542
|
+
if (existsSync7(path)) {
|
|
11757
11543
|
try {
|
|
11758
|
-
for (const line of
|
|
11544
|
+
for (const line of readFileSync6(path, "utf-8").split("\n")) {
|
|
11759
11545
|
if (!line.trim()) continue;
|
|
11760
11546
|
try {
|
|
11761
11547
|
const entry = JSON.parse(line);
|
|
@@ -11767,9 +11553,9 @@ var LineageRecorder = class {
|
|
|
11767
11553
|
} catch {
|
|
11768
11554
|
}
|
|
11769
11555
|
}
|
|
11770
|
-
if (
|
|
11556
|
+
if (existsSync7(path) && this.nodes.size === 0) {
|
|
11771
11557
|
try {
|
|
11772
|
-
const raw =
|
|
11558
|
+
const raw = readFileSync6(path, "utf-8").trim();
|
|
11773
11559
|
if (raw.startsWith("[")) {
|
|
11774
11560
|
const parsed = JSON.parse(raw);
|
|
11775
11561
|
for (const n of parsed) this.nodes.set(n.id, n);
|
|
@@ -11783,8 +11569,8 @@ var LineageRecorder = class {
|
|
|
11783
11569
|
const prev = this.nodes.get(node.id);
|
|
11784
11570
|
this.nodes.set(node.id, { ...prev, ...node });
|
|
11785
11571
|
try {
|
|
11786
|
-
if (
|
|
11787
|
-
const head =
|
|
11572
|
+
if (existsSync7(this.path)) {
|
|
11573
|
+
const head = readFileSync6(this.path, { encoding: "utf-8", flag: "r" }).slice(0, 1);
|
|
11788
11574
|
if (head === "[") {
|
|
11789
11575
|
writeFileSync(this.path, "");
|
|
11790
11576
|
}
|
|
@@ -11850,9 +11636,9 @@ var CostLedger = class {
|
|
|
11850
11636
|
mutex = new Mutex();
|
|
11851
11637
|
constructor(path) {
|
|
11852
11638
|
this.path = path;
|
|
11853
|
-
if (
|
|
11639
|
+
if (existsSync7(path)) {
|
|
11854
11640
|
try {
|
|
11855
|
-
const loaded = JSON.parse(
|
|
11641
|
+
const loaded = JSON.parse(readFileSync6(path, "utf-8"));
|
|
11856
11642
|
for (const k of Object.keys(this.totals)) {
|
|
11857
11643
|
if (k === "byGeneration") {
|
|
11858
11644
|
if (loaded.byGeneration && typeof loaded.byGeneration === "object") {
|
|
@@ -12512,6 +12298,46 @@ function truncate3(s, max) {
|
|
|
12512
12298
|
function quote(s) {
|
|
12513
12299
|
return s.replace(/`/g, "\\`");
|
|
12514
12300
|
}
|
|
12301
|
+
function autoCloseTruncatedJson(raw) {
|
|
12302
|
+
const stack = [];
|
|
12303
|
+
let inString = false;
|
|
12304
|
+
let escape = false;
|
|
12305
|
+
for (const c of raw) {
|
|
12306
|
+
if (escape) {
|
|
12307
|
+
escape = false;
|
|
12308
|
+
continue;
|
|
12309
|
+
}
|
|
12310
|
+
if (inString) {
|
|
12311
|
+
if (c === "\\") {
|
|
12312
|
+
escape = true;
|
|
12313
|
+
continue;
|
|
12314
|
+
}
|
|
12315
|
+
if (c === '"') {
|
|
12316
|
+
inString = false;
|
|
12317
|
+
continue;
|
|
12318
|
+
}
|
|
12319
|
+
continue;
|
|
12320
|
+
}
|
|
12321
|
+
if (c === '"') {
|
|
12322
|
+
inString = true;
|
|
12323
|
+
continue;
|
|
12324
|
+
}
|
|
12325
|
+
if (c === "{" || c === "[") stack.push(c);
|
|
12326
|
+
else if (c === "}") {
|
|
12327
|
+
if (stack.pop() !== "{") return null;
|
|
12328
|
+
} else if (c === "]") {
|
|
12329
|
+
if (stack.pop() !== "[") return null;
|
|
12330
|
+
}
|
|
12331
|
+
}
|
|
12332
|
+
if (stack.length === 0 && !inString) return raw;
|
|
12333
|
+
let suffix = "";
|
|
12334
|
+
if (inString) suffix += '"';
|
|
12335
|
+
while (stack.length > 0) {
|
|
12336
|
+
const opener = stack.pop();
|
|
12337
|
+
suffix += opener === "{" ? "}" : "]";
|
|
12338
|
+
}
|
|
12339
|
+
return raw + suffix;
|
|
12340
|
+
}
|
|
12515
12341
|
function parseReflectionResponse(raw, maxProposals) {
|
|
12516
12342
|
let text = raw.trim();
|
|
12517
12343
|
if (text.startsWith("```")) text = text.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
@@ -12536,6 +12362,18 @@ function parseReflectionResponse(raw, maxProposals) {
|
|
|
12536
12362
|
} catch {
|
|
12537
12363
|
}
|
|
12538
12364
|
}
|
|
12365
|
+
if (parsed == null) {
|
|
12366
|
+
for (const slice of candidates) {
|
|
12367
|
+
const closed = autoCloseTruncatedJson(slice);
|
|
12368
|
+
if (closed != null && closed !== slice) {
|
|
12369
|
+
try {
|
|
12370
|
+
parsed = JSON.parse(closed);
|
|
12371
|
+
break;
|
|
12372
|
+
} catch {
|
|
12373
|
+
}
|
|
12374
|
+
}
|
|
12375
|
+
}
|
|
12376
|
+
}
|
|
12539
12377
|
if (parsed == null) return [];
|
|
12540
12378
|
let proposalsRaw;
|
|
12541
12379
|
if (Array.isArray(parsed)) {
|