@tangle-network/agent-eval 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -11
- package/dist/chunk-ITN4YOZY.js +215 -0
- package/dist/chunk-ITN4YOZY.js.map +1 -0
- package/dist/chunk-OZPRSK4A.js +594 -0
- package/dist/chunk-OZPRSK4A.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +104 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +109 -1
- package/dist/index.js +249 -206
- package/dist/index.js.map +1 -1
- package/dist/wire/index.d.ts +211 -0
- package/dist/wire/index.js +56 -0
- package/dist/wire/index.js.map +1 -0
- package/package.json +25 -10
package/dist/index.js
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
import {
|
|
2
|
+
LlmCallError,
|
|
3
|
+
LlmClient,
|
|
4
|
+
callLlm,
|
|
5
|
+
callLlmJson,
|
|
6
|
+
probeLlm,
|
|
7
|
+
stripFencedJson
|
|
8
|
+
} from "./chunk-ITN4YOZY.js";
|
|
9
|
+
|
|
1
10
|
// src/client.ts
|
|
2
11
|
var ProductClient = class {
|
|
3
12
|
baseUrl;
|
|
@@ -1926,6 +1935,244 @@ function rand(bytes) {
|
|
|
1926
1935
|
return Array.from(arr).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
1927
1936
|
}
|
|
1928
1937
|
|
|
1938
|
+
// src/experiment-tracker-fs.ts
|
|
1939
|
+
var FileSystemExperimentStore = class {
|
|
1940
|
+
dir;
|
|
1941
|
+
maxBytes;
|
|
1942
|
+
index;
|
|
1943
|
+
loaded = false;
|
|
1944
|
+
constructor(options) {
|
|
1945
|
+
this.dir = options.dir;
|
|
1946
|
+
this.maxBytes = options.maxBytes ?? 32 * 1024 * 1024;
|
|
1947
|
+
}
|
|
1948
|
+
async saveExperiment(exp) {
|
|
1949
|
+
const idx = await this.load();
|
|
1950
|
+
await idx.saveExperiment(exp);
|
|
1951
|
+
await this.append("experiments", exp);
|
|
1952
|
+
}
|
|
1953
|
+
async getExperiment(id) {
|
|
1954
|
+
const idx = await this.load();
|
|
1955
|
+
return idx.getExperiment(id);
|
|
1956
|
+
}
|
|
1957
|
+
async listExperiments() {
|
|
1958
|
+
const idx = await this.load();
|
|
1959
|
+
return idx.listExperiments();
|
|
1960
|
+
}
|
|
1961
|
+
async saveRun(run) {
|
|
1962
|
+
const idx = await this.load();
|
|
1963
|
+
await idx.saveRun(run);
|
|
1964
|
+
await this.append("runs", run);
|
|
1965
|
+
}
|
|
1966
|
+
async getRun(id) {
|
|
1967
|
+
const idx = await this.load();
|
|
1968
|
+
return idx.getRun(id);
|
|
1969
|
+
}
|
|
1970
|
+
async listRuns(experimentId) {
|
|
1971
|
+
const idx = await this.load();
|
|
1972
|
+
return idx.listRuns(experimentId);
|
|
1973
|
+
}
|
|
1974
|
+
async ensureDir() {
|
|
1975
|
+
const fs = await import("fs/promises");
|
|
1976
|
+
await fs.mkdir(this.dir, { recursive: true });
|
|
1977
|
+
}
|
|
1978
|
+
async append(name, record) {
|
|
1979
|
+
await this.ensureDir();
|
|
1980
|
+
const fs = await import("fs/promises");
|
|
1981
|
+
const path = await import("path");
|
|
1982
|
+
const active = path.join(this.dir, `${name}.ndjson`);
|
|
1983
|
+
try {
|
|
1984
|
+
const stat = await fs.stat(active);
|
|
1985
|
+
if (stat.size >= this.maxBytes) {
|
|
1986
|
+
const rolled = path.join(this.dir, `${name}.${Date.now()}.ndjson`);
|
|
1987
|
+
await fs.rename(active, rolled);
|
|
1988
|
+
}
|
|
1989
|
+
} catch {
|
|
1990
|
+
}
|
|
1991
|
+
await fs.appendFile(active, JSON.stringify(record) + "\n", "utf8");
|
|
1992
|
+
}
|
|
1993
|
+
async load() {
|
|
1994
|
+
if (this.loaded && this.index) return this.index;
|
|
1995
|
+
const fs = await import("fs/promises");
|
|
1996
|
+
const path = await import("path");
|
|
1997
|
+
const store = new InMemoryExperimentStore();
|
|
1998
|
+
try {
|
|
1999
|
+
const entries = await fs.readdir(this.dir);
|
|
2000
|
+
const sorted = entries.filter((f) => f.endsWith(".ndjson")).sort((a, b) => a.localeCompare(b));
|
|
2001
|
+
for (const file of sorted) {
|
|
2002
|
+
const full = path.join(this.dir, file);
|
|
2003
|
+
const content = await fs.readFile(full, "utf8");
|
|
2004
|
+
const base = file.split(".")[0];
|
|
2005
|
+
for (const line of content.split("\n")) {
|
|
2006
|
+
if (!line.trim()) continue;
|
|
2007
|
+
let record;
|
|
2008
|
+
try {
|
|
2009
|
+
record = JSON.parse(line);
|
|
2010
|
+
} catch {
|
|
2011
|
+
continue;
|
|
2012
|
+
}
|
|
2013
|
+
if (base === "experiments") {
|
|
2014
|
+
await store.saveExperiment(record);
|
|
2015
|
+
} else if (base === "runs") {
|
|
2016
|
+
await store.saveRun(record);
|
|
2017
|
+
}
|
|
2018
|
+
}
|
|
2019
|
+
}
|
|
2020
|
+
} catch {
|
|
2021
|
+
}
|
|
2022
|
+
this.index = store;
|
|
2023
|
+
this.loaded = true;
|
|
2024
|
+
return store;
|
|
2025
|
+
}
|
|
2026
|
+
};
|
|
2027
|
+
|
|
2028
|
+
// src/experiment-tracker-d1.ts
|
|
2029
|
+
var SCHEMA_VERSION = 1;
|
|
2030
|
+
var D1ExperimentStore = class {
|
|
2031
|
+
db;
|
|
2032
|
+
experimentsTable;
|
|
2033
|
+
runsTable;
|
|
2034
|
+
metaTable;
|
|
2035
|
+
schemaReady = false;
|
|
2036
|
+
constructor(options) {
|
|
2037
|
+
this.db = options.db;
|
|
2038
|
+
const prefix = options.tablePrefix ?? "agent_eval_";
|
|
2039
|
+
this.experimentsTable = `${prefix}experiments`;
|
|
2040
|
+
this.runsTable = `${prefix}runs`;
|
|
2041
|
+
this.metaTable = `${prefix}meta`;
|
|
2042
|
+
}
|
|
2043
|
+
/**
|
|
2044
|
+
* Idempotent schema setup. Safe to call before every operation; the second
|
|
2045
|
+
* call short-circuits via `schemaReady`. Most consumers will call it once
|
|
2046
|
+
* during Worker bootstrap.
|
|
2047
|
+
*/
|
|
2048
|
+
async ensureSchema() {
|
|
2049
|
+
if (this.schemaReady) return;
|
|
2050
|
+
const ddl = `
|
|
2051
|
+
CREATE TABLE IF NOT EXISTS ${this.experimentsTable} (
|
|
2052
|
+
id TEXT PRIMARY KEY,
|
|
2053
|
+
name TEXT NOT NULL,
|
|
2054
|
+
created_at TEXT NOT NULL,
|
|
2055
|
+
metadata_json TEXT
|
|
2056
|
+
);
|
|
2057
|
+
CREATE TABLE IF NOT EXISTS ${this.runsTable} (
|
|
2058
|
+
id TEXT PRIMARY KEY,
|
|
2059
|
+
experiment_id TEXT NOT NULL,
|
|
2060
|
+
name TEXT,
|
|
2061
|
+
status TEXT NOT NULL,
|
|
2062
|
+
started_at TEXT NOT NULL,
|
|
2063
|
+
completed_at TEXT,
|
|
2064
|
+
config_json TEXT NOT NULL,
|
|
2065
|
+
report_json TEXT,
|
|
2066
|
+
error TEXT
|
|
2067
|
+
);
|
|
2068
|
+
CREATE INDEX IF NOT EXISTS idx_${this.runsTable}_experiment ON ${this.runsTable}(experiment_id);
|
|
2069
|
+
CREATE INDEX IF NOT EXISTS idx_${this.runsTable}_started ON ${this.runsTable}(started_at);
|
|
2070
|
+
CREATE TABLE IF NOT EXISTS ${this.metaTable} (
|
|
2071
|
+
key TEXT PRIMARY KEY,
|
|
2072
|
+
value TEXT NOT NULL
|
|
2073
|
+
);
|
|
2074
|
+
INSERT OR REPLACE INTO ${this.metaTable}(key, value) VALUES ('schema_version', '${SCHEMA_VERSION}');
|
|
2075
|
+
`;
|
|
2076
|
+
await this.db.exec(ddl.trim().replace(/\s+/g, " "));
|
|
2077
|
+
this.schemaReady = true;
|
|
2078
|
+
}
|
|
2079
|
+
async saveExperiment(exp) {
|
|
2080
|
+
await this.ensureSchema();
|
|
2081
|
+
await this.db.prepare(
|
|
2082
|
+
`INSERT INTO ${this.experimentsTable}(id, name, created_at, metadata_json)
|
|
2083
|
+
VALUES (?1, ?2, ?3, ?4)
|
|
2084
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
2085
|
+
name = excluded.name,
|
|
2086
|
+
created_at = excluded.created_at,
|
|
2087
|
+
metadata_json = excluded.metadata_json`
|
|
2088
|
+
).bind(exp.id, exp.name, exp.createdAt, exp.metadata ? JSON.stringify(exp.metadata) : null).run();
|
|
2089
|
+
}
|
|
2090
|
+
async getExperiment(id) {
|
|
2091
|
+
await this.ensureSchema();
|
|
2092
|
+
const row = await this.db.prepare(
|
|
2093
|
+
`SELECT id, name, created_at, metadata_json
|
|
2094
|
+
FROM ${this.experimentsTable}
|
|
2095
|
+
WHERE id = ?1`
|
|
2096
|
+
).bind(id).first();
|
|
2097
|
+
return row ? rowToExperiment(row) : null;
|
|
2098
|
+
}
|
|
2099
|
+
async listExperiments() {
|
|
2100
|
+
await this.ensureSchema();
|
|
2101
|
+
const { results } = await this.db.prepare(
|
|
2102
|
+
`SELECT id, name, created_at, metadata_json
|
|
2103
|
+
FROM ${this.experimentsTable}
|
|
2104
|
+
ORDER BY created_at DESC`
|
|
2105
|
+
).all();
|
|
2106
|
+
return results.map(rowToExperiment);
|
|
2107
|
+
}
|
|
2108
|
+
async saveRun(run) {
|
|
2109
|
+
await this.ensureSchema();
|
|
2110
|
+
await this.db.prepare(
|
|
2111
|
+
`INSERT INTO ${this.runsTable}(id, experiment_id, name, status, started_at, completed_at, config_json, report_json, error)
|
|
2112
|
+
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)
|
|
2113
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
2114
|
+
experiment_id = excluded.experiment_id,
|
|
2115
|
+
name = excluded.name,
|
|
2116
|
+
status = excluded.status,
|
|
2117
|
+
started_at = excluded.started_at,
|
|
2118
|
+
completed_at = excluded.completed_at,
|
|
2119
|
+
config_json = excluded.config_json,
|
|
2120
|
+
report_json = excluded.report_json,
|
|
2121
|
+
error = excluded.error`
|
|
2122
|
+
).bind(
|
|
2123
|
+
run.id,
|
|
2124
|
+
run.experimentId,
|
|
2125
|
+
run.name ?? null,
|
|
2126
|
+
run.status,
|
|
2127
|
+
run.startedAt,
|
|
2128
|
+
run.completedAt ?? null,
|
|
2129
|
+
JSON.stringify(run.config),
|
|
2130
|
+
run.report ? JSON.stringify(run.report) : null,
|
|
2131
|
+
run.error ?? null
|
|
2132
|
+
).run();
|
|
2133
|
+
}
|
|
2134
|
+
async getRun(id) {
|
|
2135
|
+
await this.ensureSchema();
|
|
2136
|
+
const row = await this.db.prepare(
|
|
2137
|
+
`SELECT id, experiment_id, name, status, started_at, completed_at, config_json, report_json, error
|
|
2138
|
+
FROM ${this.runsTable}
|
|
2139
|
+
WHERE id = ?1`
|
|
2140
|
+
).bind(id).first();
|
|
2141
|
+
return row ? rowToRun(row) : null;
|
|
2142
|
+
}
|
|
2143
|
+
async listRuns(experimentId) {
|
|
2144
|
+
await this.ensureSchema();
|
|
2145
|
+
const { results } = await this.db.prepare(
|
|
2146
|
+
`SELECT id, experiment_id, name, status, started_at, completed_at, config_json, report_json, error
|
|
2147
|
+
FROM ${this.runsTable}
|
|
2148
|
+
WHERE experiment_id = ?1
|
|
2149
|
+
ORDER BY started_at DESC`
|
|
2150
|
+
).bind(experimentId).all();
|
|
2151
|
+
return results.map(rowToRun);
|
|
2152
|
+
}
|
|
2153
|
+
};
|
|
2154
|
+
function rowToExperiment(row) {
|
|
2155
|
+
return {
|
|
2156
|
+
id: row.id,
|
|
2157
|
+
name: row.name,
|
|
2158
|
+
createdAt: row.created_at,
|
|
2159
|
+
...row.metadata_json ? { metadata: JSON.parse(row.metadata_json) } : {}
|
|
2160
|
+
};
|
|
2161
|
+
}
|
|
2162
|
+
function rowToRun(row) {
|
|
2163
|
+
return {
|
|
2164
|
+
id: row.id,
|
|
2165
|
+
experimentId: row.experiment_id,
|
|
2166
|
+
...row.name ? { name: row.name } : {},
|
|
2167
|
+
status: row.status,
|
|
2168
|
+
startedAt: row.started_at,
|
|
2169
|
+
...row.completed_at ? { completedAt: row.completed_at } : {},
|
|
2170
|
+
config: JSON.parse(row.config_json),
|
|
2171
|
+
...row.report_json ? { report: JSON.parse(row.report_json) } : {},
|
|
2172
|
+
...row.error ? { error: row.error } : {}
|
|
2173
|
+
};
|
|
2174
|
+
}
|
|
2175
|
+
|
|
1929
2176
|
// src/power-analysis.ts
|
|
1930
2177
|
function requiredSampleSize(opts) {
|
|
1931
2178
|
const effect = opts.effect;
|
|
@@ -8058,212 +8305,6 @@ async function euAiActReport(ctx, signals) {
|
|
|
8058
8305
|
};
|
|
8059
8306
|
}
|
|
8060
8307
|
|
|
8061
|
-
// src/llm-client.ts
|
|
8062
|
-
var LlmCallError = class extends Error {
|
|
8063
|
-
constructor(message, status, body, model) {
|
|
8064
|
-
super(message);
|
|
8065
|
-
this.status = status;
|
|
8066
|
-
this.body = body;
|
|
8067
|
-
this.model = model;
|
|
8068
|
-
this.name = "LlmCallError";
|
|
8069
|
-
}
|
|
8070
|
-
status;
|
|
8071
|
-
body;
|
|
8072
|
-
model;
|
|
8073
|
-
};
|
|
8074
|
-
var DEFAULT_BASE_URL = "https://router.tangle.tools/v1";
|
|
8075
|
-
var DEFAULT_TIMEOUT_MS = 6e4;
|
|
8076
|
-
var DEFAULT_MAX_RETRIES = 3;
|
|
8077
|
-
var RETRYABLE_STATUS = /* @__PURE__ */ new Set([429, 502, 503, 504]);
|
|
8078
|
-
function isRetryableError(err) {
|
|
8079
|
-
if (err instanceof LlmCallError) return RETRYABLE_STATUS.has(err.status);
|
|
8080
|
-
if (err instanceof Error) {
|
|
8081
|
-
return err.name === "AbortError" || err.name === "TimeoutError" || /fetch failed|ECONNRESET|ETIMEDOUT|EAI_AGAIN/i.test(err.message);
|
|
8082
|
-
}
|
|
8083
|
-
return false;
|
|
8084
|
-
}
|
|
8085
|
-
function parseRetryAfter(headers) {
|
|
8086
|
-
const h = headers.get("retry-after");
|
|
8087
|
-
if (!h) return null;
|
|
8088
|
-
const asNumber = Number(h);
|
|
8089
|
-
if (Number.isFinite(asNumber) && asNumber > 0) return asNumber * 1e3;
|
|
8090
|
-
const asDate = Date.parse(h);
|
|
8091
|
-
if (Number.isFinite(asDate)) return Math.max(0, asDate - Date.now());
|
|
8092
|
-
return null;
|
|
8093
|
-
}
|
|
8094
|
-
function backoffMs(attempt) {
|
|
8095
|
-
return Math.min(500 * Math.pow(2, attempt), 16e3);
|
|
8096
|
-
}
|
|
8097
|
-
function buildHeaders(opts) {
|
|
8098
|
-
const headers = {
|
|
8099
|
-
"Content-Type": "application/json",
|
|
8100
|
-
Accept: "application/json"
|
|
8101
|
-
};
|
|
8102
|
-
if (opts.authHeader) {
|
|
8103
|
-
headers[opts.authHeader.name] = opts.authHeader.value;
|
|
8104
|
-
} else if (opts.bearer || opts.apiKey) {
|
|
8105
|
-
headers.Authorization = `Bearer ${opts.bearer ?? opts.apiKey}`;
|
|
8106
|
-
}
|
|
8107
|
-
return headers;
|
|
8108
|
-
}
|
|
8109
|
-
function isSchemaRejection(status, body) {
|
|
8110
|
-
if (status !== 400) return false;
|
|
8111
|
-
const lower = body.toLowerCase();
|
|
8112
|
-
return lower.includes("response_format") || lower.includes("json_schema") || lower.includes("is unavailable") || lower.includes("not supported");
|
|
8113
|
-
}
|
|
8114
|
-
function buildBody(req, forceJsonObject) {
|
|
8115
|
-
const body = {
|
|
8116
|
-
model: req.model,
|
|
8117
|
-
messages: req.messages,
|
|
8118
|
-
temperature: req.temperature ?? 0
|
|
8119
|
-
};
|
|
8120
|
-
if (req.maxTokens != null) body.max_tokens = req.maxTokens;
|
|
8121
|
-
if (req.jsonSchema && !forceJsonObject) {
|
|
8122
|
-
body.response_format = {
|
|
8123
|
-
type: "json_schema",
|
|
8124
|
-
json_schema: { name: req.jsonSchema.name, schema: req.jsonSchema.schema, strict: true }
|
|
8125
|
-
};
|
|
8126
|
-
} else if (req.jsonMode || req.jsonSchema) {
|
|
8127
|
-
body.response_format = { type: "json_object" };
|
|
8128
|
-
}
|
|
8129
|
-
return body;
|
|
8130
|
-
}
|
|
8131
|
-
async function sleep(ms) {
|
|
8132
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
8133
|
-
}
|
|
8134
|
-
function stripFencedJson(raw) {
|
|
8135
|
-
const trimmed = raw.trim();
|
|
8136
|
-
const m = trimmed.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?```\s*$/);
|
|
8137
|
-
return m ? m[1].trim() : trimmed;
|
|
8138
|
-
}
|
|
8139
|
-
async function callLlm(req, opts = {}) {
|
|
8140
|
-
const baseUrl = (opts.baseUrl ?? DEFAULT_BASE_URL).replace(/\/+$/, "");
|
|
8141
|
-
const url = `${baseUrl}/chat/completions`;
|
|
8142
|
-
const timeoutMs = req.timeoutMs ?? opts.defaultTimeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
8143
|
-
const maxRetries = opts.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
8144
|
-
const fetchFn = opts.fetch ?? globalThis.fetch;
|
|
8145
|
-
const headers = buildHeaders(opts);
|
|
8146
|
-
let lastErr;
|
|
8147
|
-
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
8148
|
-
const controller = new AbortController();
|
|
8149
|
-
const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs);
|
|
8150
|
-
const started = Date.now();
|
|
8151
|
-
try {
|
|
8152
|
-
const res = await fetchFn(url, {
|
|
8153
|
-
method: "POST",
|
|
8154
|
-
headers,
|
|
8155
|
-
body: JSON.stringify(buildBody(req, false)),
|
|
8156
|
-
signal: controller.signal
|
|
8157
|
-
});
|
|
8158
|
-
clearTimeout(timeoutHandle);
|
|
8159
|
-
if (!res.ok) {
|
|
8160
|
-
const body = await res.text();
|
|
8161
|
-
const err = new LlmCallError(
|
|
8162
|
-
`LLM call ${res.status}: ${body.slice(0, 300)}`,
|
|
8163
|
-
res.status,
|
|
8164
|
-
body,
|
|
8165
|
-
req.model
|
|
8166
|
-
);
|
|
8167
|
-
if (RETRYABLE_STATUS.has(res.status) && attempt < maxRetries - 1) {
|
|
8168
|
-
lastErr = err;
|
|
8169
|
-
const retryAfter = parseRetryAfter(res.headers);
|
|
8170
|
-
await sleep(retryAfter ?? backoffMs(attempt));
|
|
8171
|
-
continue;
|
|
8172
|
-
}
|
|
8173
|
-
throw err;
|
|
8174
|
-
}
|
|
8175
|
-
const json = await res.json();
|
|
8176
|
-
const choice = json.choices?.[0];
|
|
8177
|
-
const usageRaw = json.usage ?? {};
|
|
8178
|
-
const costFromProxy = json._response_cost ?? json.cost_usd;
|
|
8179
|
-
return {
|
|
8180
|
-
content: choice?.message?.content ?? "",
|
|
8181
|
-
usage: {
|
|
8182
|
-
promptTokens: Number(usageRaw.prompt_tokens ?? 0),
|
|
8183
|
-
completionTokens: Number(usageRaw.completion_tokens ?? 0),
|
|
8184
|
-
totalTokens: Number(usageRaw.total_tokens ?? 0),
|
|
8185
|
-
cachedPromptTokens: usageRaw.prompt_tokens_details && typeof usageRaw.prompt_tokens_details === "object" ? Number(
|
|
8186
|
-
usageRaw.prompt_tokens_details.cached_tokens ?? 0
|
|
8187
|
-
) : void 0
|
|
8188
|
-
},
|
|
8189
|
-
costUsd: typeof costFromProxy === "number" ? costFromProxy : null,
|
|
8190
|
-
model: json.model ?? req.model,
|
|
8191
|
-
durationMs: Date.now() - started,
|
|
8192
|
-
raw: json
|
|
8193
|
-
};
|
|
8194
|
-
} catch (err) {
|
|
8195
|
-
clearTimeout(timeoutHandle);
|
|
8196
|
-
lastErr = err;
|
|
8197
|
-
if (attempt < maxRetries - 1 && isRetryableError(err)) {
|
|
8198
|
-
await sleep(backoffMs(attempt));
|
|
8199
|
-
continue;
|
|
8200
|
-
}
|
|
8201
|
-
throw err;
|
|
8202
|
-
}
|
|
8203
|
-
}
|
|
8204
|
-
throw lastErr instanceof Error ? lastErr : new Error(String(lastErr));
|
|
8205
|
-
}
|
|
8206
|
-
async function callLlmJson(req, opts = {}) {
|
|
8207
|
-
try {
|
|
8208
|
-
const result = await callLlm({ ...req, jsonMode: req.jsonMode ?? !req.jsonSchema }, opts);
|
|
8209
|
-
const value = parseJsonSafely(result.content, result.model);
|
|
8210
|
-
return { value, result };
|
|
8211
|
-
} catch (err) {
|
|
8212
|
-
if (err instanceof LlmCallError && isSchemaRejection(err.status, err.body) && req.jsonSchema) {
|
|
8213
|
-
const degradedReq = { ...req, jsonMode: true, jsonSchema: void 0 };
|
|
8214
|
-
const result = await callLlm(degradedReq, opts);
|
|
8215
|
-
const value = parseJsonSafely(result.content, result.model);
|
|
8216
|
-
return { value, result };
|
|
8217
|
-
}
|
|
8218
|
-
throw err;
|
|
8219
|
-
}
|
|
8220
|
-
}
|
|
8221
|
-
function parseJsonSafely(content, model) {
|
|
8222
|
-
const stripped = stripFencedJson(content);
|
|
8223
|
-
try {
|
|
8224
|
-
return JSON.parse(stripped);
|
|
8225
|
-
} catch (err) {
|
|
8226
|
-
throw new Error(
|
|
8227
|
-
`LLM returned non-JSON content (model=${model}): ${err instanceof Error ? err.message : String(err)}
|
|
8228
|
-
--- raw content ---
|
|
8229
|
-
${content.slice(0, 800)}`
|
|
8230
|
-
);
|
|
8231
|
-
}
|
|
8232
|
-
}
|
|
8233
|
-
async function probeLlm(model, opts = {}) {
|
|
8234
|
-
const start = Date.now();
|
|
8235
|
-
try {
|
|
8236
|
-
await callLlm(
|
|
8237
|
-
{
|
|
8238
|
-
model,
|
|
8239
|
-
messages: [{ role: "user", content: "ping" }],
|
|
8240
|
-
maxTokens: 64,
|
|
8241
|
-
timeoutMs: opts.timeoutMs ?? 3e4
|
|
8242
|
-
},
|
|
8243
|
-
opts
|
|
8244
|
-
);
|
|
8245
|
-
return { ok: true, latencyMs: Date.now() - start, error: null };
|
|
8246
|
-
} catch (err) {
|
|
8247
|
-
return {
|
|
8248
|
-
ok: false,
|
|
8249
|
-
latencyMs: Date.now() - start,
|
|
8250
|
-
error: err instanceof Error ? err.message : String(err)
|
|
8251
|
-
};
|
|
8252
|
-
}
|
|
8253
|
-
}
|
|
8254
|
-
var LlmClient = class {
|
|
8255
|
-
constructor(opts = {}) {
|
|
8256
|
-
this.opts = opts;
|
|
8257
|
-
}
|
|
8258
|
-
opts;
|
|
8259
|
-
call(req, per) {
|
|
8260
|
-
return callLlm(req, { ...this.opts, ...per });
|
|
8261
|
-
}
|
|
8262
|
-
callJson(req, per) {
|
|
8263
|
-
return callLlmJson(req, { ...this.opts, ...per });
|
|
8264
|
-
}
|
|
8265
|
-
};
|
|
8266
|
-
|
|
8267
8308
|
// src/multi-layer-verifier.ts
|
|
8268
8309
|
function gradeSemanticStatus(input) {
|
|
8269
8310
|
if (!input.available) return "error";
|
|
@@ -10594,6 +10635,7 @@ export {
|
|
|
10594
10635
|
BuilderSession,
|
|
10595
10636
|
ConvergenceTracker,
|
|
10596
10637
|
CostTracker,
|
|
10638
|
+
D1ExperimentStore,
|
|
10597
10639
|
DEFAULT_AGENT_SLOS,
|
|
10598
10640
|
DEFAULT_COMPLEXITY_WEIGHTS,
|
|
10599
10641
|
DEFAULT_RULES as DEFAULT_FAILURE_RULES,
|
|
@@ -10611,6 +10653,7 @@ export {
|
|
|
10611
10653
|
ERROR_COUNT_PATTERNS,
|
|
10612
10654
|
ExperimentTracker,
|
|
10613
10655
|
FAILURE_CLASSES,
|
|
10656
|
+
FileSystemExperimentStore,
|
|
10614
10657
|
FileSystemOutcomeStore,
|
|
10615
10658
|
FileSystemTraceStore,
|
|
10616
10659
|
HoldoutAuditor,
|