@remnic/bench 9.3.680 → 9.3.682
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +372 -2
- package/dist/index.js +605 -53
- package/package.json +6 -3
- package/profiles/README.md +113 -0
- package/profiles/local-lab-3090.json +39 -0
package/dist/index.js
CHANGED
|
@@ -7663,8 +7663,9 @@ var OpenAiCompatibleProvider = class {
|
|
|
7663
7663
|
...opts.systemPrompt ? [{ role: "system", content: opts.systemPrompt }] : [],
|
|
7664
7664
|
{ role: "user", content: prompt }
|
|
7665
7665
|
],
|
|
7666
|
-
temperature: opts.temperature,
|
|
7666
|
+
temperature: opts.temperature ?? this.config.temperature,
|
|
7667
7667
|
max_tokens: opts.maxTokens,
|
|
7668
|
+
...this.config.seed !== void 0 ? { seed: this.config.seed } : {},
|
|
7668
7669
|
...this.config.disableThinking && isThinkingCompatibleBackend(this.config.baseUrl) ? { chat_template_kwargs: { enable_thinking: false } } : {}
|
|
7669
7670
|
})
|
|
7670
7671
|
},
|
|
@@ -7857,8 +7858,9 @@ var LocalLlmProvider = class {
|
|
|
7857
7858
|
...opts.systemPrompt ? [{ role: "system", content: opts.systemPrompt }] : [],
|
|
7858
7859
|
{ role: "user", content: prompt }
|
|
7859
7860
|
],
|
|
7860
|
-
temperature: opts.temperature,
|
|
7861
|
+
temperature: opts.temperature ?? this.config.temperature,
|
|
7861
7862
|
max_tokens: opts.maxTokens,
|
|
7863
|
+
...this.config.seed !== void 0 ? { seed: this.config.seed } : {},
|
|
7862
7864
|
...this.config.disableThinking && isThinkingCompatibleBackend(this.normalizedBaseUrl()) ? { chat_template_kwargs: { enable_thinking: false } } : {}
|
|
7863
7865
|
})
|
|
7864
7866
|
},
|
|
@@ -8012,8 +8014,9 @@ var OllamaProvider = class {
|
|
|
8012
8014
|
system: opts.systemPrompt,
|
|
8013
8015
|
stream: false,
|
|
8014
8016
|
options: {
|
|
8015
|
-
temperature: opts.temperature,
|
|
8016
|
-
num_predict: opts.maxTokens
|
|
8017
|
+
temperature: opts.temperature ?? this.config.temperature,
|
|
8018
|
+
num_predict: opts.maxTokens,
|
|
8019
|
+
...this.config.seed !== void 0 ? { seed: this.config.seed } : {}
|
|
8017
8020
|
}
|
|
8018
8021
|
})
|
|
8019
8022
|
},
|
|
@@ -9656,7 +9659,7 @@ function clampNormalizedScore(value) {
|
|
|
9656
9659
|
|
|
9657
9660
|
// src/runtime-profiles.ts
|
|
9658
9661
|
import path10 from "path";
|
|
9659
|
-
import { readFile as
|
|
9662
|
+
import { readFile as readFile9 } from "fs/promises";
|
|
9660
9663
|
import {
|
|
9661
9664
|
resolvePluginEntry,
|
|
9662
9665
|
setCodexCliFallbackRunnerForProcess
|
|
@@ -10003,6 +10006,469 @@ function buildPromptSpecificRequirements(prompt) {
|
|
|
10003
10006
|
return requirements;
|
|
10004
10007
|
}
|
|
10005
10008
|
|
|
10009
|
+
// src/local-lab/manifest.ts
|
|
10010
|
+
import { readFile as readFile8 } from "fs/promises";
|
|
10011
|
+
var LOCAL_LAB_PROVIDER_KINDS = [
|
|
10012
|
+
"openai-compatible",
|
|
10013
|
+
"ollama"
|
|
10014
|
+
];
|
|
10015
|
+
function parseLocalLabManifest(raw) {
|
|
10016
|
+
if (!isPlainObject(raw)) {
|
|
10017
|
+
throw new Error(
|
|
10018
|
+
"local-lab manifest must be a JSON object (rule 18: parsed JSON must be object-not-null)"
|
|
10019
|
+
);
|
|
10020
|
+
}
|
|
10021
|
+
if (raw.profile !== "local-lab") {
|
|
10022
|
+
throw new Error(
|
|
10023
|
+
`local-lab manifest requires profile === "local-lab"; received ${describeValue(raw.profile)}`
|
|
10024
|
+
);
|
|
10025
|
+
}
|
|
10026
|
+
if (raw.phases !== "sequential") {
|
|
10027
|
+
throw new Error(
|
|
10028
|
+
`local-lab manifest phases must be "sequential" in PR2; received ${describeValue(raw.phases)}`
|
|
10029
|
+
);
|
|
10030
|
+
}
|
|
10031
|
+
const responder = parseRole(raw.responder, "responder");
|
|
10032
|
+
const judge = parseRole(raw.judge, "judge");
|
|
10033
|
+
const embedding = raw.embedding === void 0 ? void 0 : parseRole(raw.embedding, "embedding");
|
|
10034
|
+
const notes = raw.notes === void 0 ? void 0 : parseNotes(raw.notes, "notes");
|
|
10035
|
+
return {
|
|
10036
|
+
profile: "local-lab",
|
|
10037
|
+
responder,
|
|
10038
|
+
judge,
|
|
10039
|
+
...embedding ? { embedding } : {},
|
|
10040
|
+
phases: "sequential",
|
|
10041
|
+
...notes ? { notes } : {}
|
|
10042
|
+
};
|
|
10043
|
+
}
|
|
10044
|
+
async function loadLocalLabManifest(filePath) {
|
|
10045
|
+
let text;
|
|
10046
|
+
try {
|
|
10047
|
+
text = await readFile8(filePath, "utf8");
|
|
10048
|
+
} catch (error) {
|
|
10049
|
+
const code = error?.code ?? "EUNKNOWN";
|
|
10050
|
+
throw new Error(`local-lab manifest at ${filePath} could not be read (${code})`);
|
|
10051
|
+
}
|
|
10052
|
+
let parsed;
|
|
10053
|
+
try {
|
|
10054
|
+
parsed = JSON.parse(text);
|
|
10055
|
+
} catch (error) {
|
|
10056
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
10057
|
+
throw new Error(`local-lab manifest at ${filePath} contains invalid JSON: ${detail}`);
|
|
10058
|
+
}
|
|
10059
|
+
return parseLocalLabManifest(parsed);
|
|
10060
|
+
}
|
|
10061
|
+
function parseRole(value, label) {
|
|
10062
|
+
if (!isPlainObject(value)) {
|
|
10063
|
+
throw new Error(
|
|
10064
|
+
`local-lab manifest ${label} must be an object; received ${describeValue(value)}`
|
|
10065
|
+
);
|
|
10066
|
+
}
|
|
10067
|
+
const provider = parseProviderKind(value.provider, label);
|
|
10068
|
+
const baseUrl = parseNonEmptyString(value.baseUrl, label, "baseUrl");
|
|
10069
|
+
const model = parseNonEmptyString(value.model, label, "model");
|
|
10070
|
+
const ctx = parsePositiveInteger(value.ctx, label, "ctx");
|
|
10071
|
+
const seed = parseInteger(value.seed, label, "seed");
|
|
10072
|
+
if (value.temperature !== 0) {
|
|
10073
|
+
throw new Error(
|
|
10074
|
+
`local-lab manifest ${label}.temperature must be the number 0; received ${describeValue(value.temperature)}`
|
|
10075
|
+
);
|
|
10076
|
+
}
|
|
10077
|
+
const quantization = value.quantization === void 0 ? void 0 : parseNonEmptyString(value.quantization, label, "quantization");
|
|
10078
|
+
return {
|
|
10079
|
+
provider,
|
|
10080
|
+
baseUrl,
|
|
10081
|
+
model,
|
|
10082
|
+
ctx,
|
|
10083
|
+
temperature: 0,
|
|
10084
|
+
seed,
|
|
10085
|
+
...quantization ? { quantization } : {}
|
|
10086
|
+
};
|
|
10087
|
+
}
|
|
10088
|
+
function parseProviderKind(value, label) {
|
|
10089
|
+
for (const kind of LOCAL_LAB_PROVIDER_KINDS) {
|
|
10090
|
+
if (value === kind) {
|
|
10091
|
+
return kind;
|
|
10092
|
+
}
|
|
10093
|
+
}
|
|
10094
|
+
throw new Error(
|
|
10095
|
+
`local-lab manifest ${label}.provider must be one of [${LOCAL_LAB_PROVIDER_KINDS.join(
|
|
10096
|
+
", "
|
|
10097
|
+
)}]; received ${describeValue(value)}`
|
|
10098
|
+
);
|
|
10099
|
+
}
|
|
10100
|
+
function parseNonEmptyString(value, label, field) {
|
|
10101
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
10102
|
+
throw new Error(
|
|
10103
|
+
`local-lab manifest ${label}.${field} must be a non-empty string; received ${describeValue(value)}`
|
|
10104
|
+
);
|
|
10105
|
+
}
|
|
10106
|
+
return value.trim();
|
|
10107
|
+
}
|
|
10108
|
+
function parsePositiveInteger(value, label, field) {
|
|
10109
|
+
if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) {
|
|
10110
|
+
throw new Error(
|
|
10111
|
+
`local-lab manifest ${label}.${field} must be a positive integer; received ${describeValue(value)}`
|
|
10112
|
+
);
|
|
10113
|
+
}
|
|
10114
|
+
return value;
|
|
10115
|
+
}
|
|
10116
|
+
function parseInteger(value, label, field) {
|
|
10117
|
+
if (typeof value !== "number" || !Number.isInteger(value)) {
|
|
10118
|
+
throw new Error(
|
|
10119
|
+
`local-lab manifest ${label}.${field} must be an integer; received ${describeValue(value)}`
|
|
10120
|
+
);
|
|
10121
|
+
}
|
|
10122
|
+
return value;
|
|
10123
|
+
}
|
|
10124
|
+
function parseNotes(value, label) {
|
|
10125
|
+
if (!isPlainObject(value)) {
|
|
10126
|
+
throw new Error(
|
|
10127
|
+
`local-lab manifest ${label} must be an object; received ${describeValue(value)}`
|
|
10128
|
+
);
|
|
10129
|
+
}
|
|
10130
|
+
const notes = {};
|
|
10131
|
+
for (const [key, entry] of Object.entries(value)) {
|
|
10132
|
+
notes[key] = entry;
|
|
10133
|
+
}
|
|
10134
|
+
return notes;
|
|
10135
|
+
}
|
|
10136
|
+
function describeValue(value) {
|
|
10137
|
+
if (value === void 0) {
|
|
10138
|
+
return "undefined";
|
|
10139
|
+
}
|
|
10140
|
+
if (value === null) {
|
|
10141
|
+
return "null";
|
|
10142
|
+
}
|
|
10143
|
+
if (typeof value === "string") {
|
|
10144
|
+
return JSON.stringify(value);
|
|
10145
|
+
}
|
|
10146
|
+
if (typeof value === "number" || typeof value === "boolean") {
|
|
10147
|
+
return String(value);
|
|
10148
|
+
}
|
|
10149
|
+
try {
|
|
10150
|
+
return JSON.stringify(value);
|
|
10151
|
+
} catch {
|
|
10152
|
+
return String(value);
|
|
10153
|
+
}
|
|
10154
|
+
}
|
|
10155
|
+
function isPlainObject(value) {
|
|
10156
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
10157
|
+
}
|
|
10158
|
+
|
|
10159
|
+
// src/local-lab/resolve-local-lab-profile.ts
|
|
10160
|
+
function resolveLocalLabRole(role) {
|
|
10161
|
+
const provider = manifestProviderKindToBuiltIn(role.provider);
|
|
10162
|
+
const baseUrl = normalizeRoleBaseUrl(role.provider, role.baseUrl);
|
|
10163
|
+
const providerConfig = {
|
|
10164
|
+
provider,
|
|
10165
|
+
model: role.model,
|
|
10166
|
+
baseUrl,
|
|
10167
|
+
temperature: role.temperature,
|
|
10168
|
+
seed: role.seed
|
|
10169
|
+
};
|
|
10170
|
+
return {
|
|
10171
|
+
provider: role.provider,
|
|
10172
|
+
baseUrl,
|
|
10173
|
+
model: role.model,
|
|
10174
|
+
ctx: role.ctx,
|
|
10175
|
+
temperature: role.temperature,
|
|
10176
|
+
seed: role.seed,
|
|
10177
|
+
...role.quantization ? { quantization: role.quantization } : {},
|
|
10178
|
+
providerConfig
|
|
10179
|
+
};
|
|
10180
|
+
}
|
|
10181
|
+
function normalizeRoleBaseUrl(providerKind, rawBaseUrl) {
|
|
10182
|
+
const trimmed = rawBaseUrl.endsWith("/") ? rawBaseUrl.slice(0, -1) : rawBaseUrl;
|
|
10183
|
+
if (providerKind === "ollama" && !trimmed.endsWith("/api")) {
|
|
10184
|
+
return `${trimmed}/api`;
|
|
10185
|
+
}
|
|
10186
|
+
if (providerKind === "openai-compatible" && !trimmed.endsWith("/v1")) {
|
|
10187
|
+
return `${trimmed}/v1`;
|
|
10188
|
+
}
|
|
10189
|
+
return trimmed;
|
|
10190
|
+
}
|
|
10191
|
+
function resolveLocalLabProfile(manifest) {
|
|
10192
|
+
return {
|
|
10193
|
+
manifest,
|
|
10194
|
+
responder: resolveLocalLabRole(manifest.responder),
|
|
10195
|
+
judge: resolveLocalLabRole(manifest.judge),
|
|
10196
|
+
...manifest.embedding ? { embedding: resolveLocalLabRole(manifest.embedding) } : {},
|
|
10197
|
+
phases: manifest.phases,
|
|
10198
|
+
...manifest.notes ? { notes: manifest.notes } : {}
|
|
10199
|
+
};
|
|
10200
|
+
}
|
|
10201
|
+
function manifestProviderKindToBuiltIn(kind) {
|
|
10202
|
+
if (kind === "openai-compatible") {
|
|
10203
|
+
return "local-llm";
|
|
10204
|
+
}
|
|
10205
|
+
if (kind === "ollama") {
|
|
10206
|
+
return "ollama";
|
|
10207
|
+
}
|
|
10208
|
+
const exhaustive = kind;
|
|
10209
|
+
throw new Error(`local-lab manifest provider kind unsupported: ${exhaustive}`);
|
|
10210
|
+
}
|
|
10211
|
+
|
|
10212
|
+
// src/local-lab/preflight.ts
|
|
10213
|
+
var DEFAULT_PREFLIGHT_TIMEOUT_MS = 5e3;
|
|
10214
|
+
async function preflightLocalLabRole(input, options = {}) {
|
|
10215
|
+
const fetchImpl = options.fetchImpl ?? fetch;
|
|
10216
|
+
const timeoutMs = options.timeoutMs ?? DEFAULT_PREFLIGHT_TIMEOUT_MS;
|
|
10217
|
+
const endpoint = discoveryEndpointFor(input.provider, input.baseUrl);
|
|
10218
|
+
const controller = new AbortController();
|
|
10219
|
+
const timer = setTimeout(() => controller.abort(new Error("preflight timeout")), timeoutMs);
|
|
10220
|
+
if (options.signal) {
|
|
10221
|
+
if (options.signal.aborted) {
|
|
10222
|
+
controller.abort(options.signal.reason);
|
|
10223
|
+
} else {
|
|
10224
|
+
options.signal.addEventListener(
|
|
10225
|
+
"abort",
|
|
10226
|
+
() => controller.abort(options.signal.reason),
|
|
10227
|
+
{ once: true }
|
|
10228
|
+
);
|
|
10229
|
+
}
|
|
10230
|
+
}
|
|
10231
|
+
const signal = controller.signal;
|
|
10232
|
+
let response;
|
|
10233
|
+
try {
|
|
10234
|
+
response = await fetchImpl(endpoint, {
|
|
10235
|
+
method: "GET",
|
|
10236
|
+
headers: { accept: "application/json" },
|
|
10237
|
+
...signal ? { signal } : {}
|
|
10238
|
+
});
|
|
10239
|
+
} catch (error) {
|
|
10240
|
+
if (timer) clearTimeout(timer);
|
|
10241
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
10242
|
+
return {
|
|
10243
|
+
ok: false,
|
|
10244
|
+
provider: input.provider,
|
|
10245
|
+
endpoint,
|
|
10246
|
+
expectedModel: input.model,
|
|
10247
|
+
foundModels: [],
|
|
10248
|
+
expectedCtx: input.ctx,
|
|
10249
|
+
reason: `preflight request to ${endpoint} failed: ${detail}`
|
|
10250
|
+
};
|
|
10251
|
+
}
|
|
10252
|
+
if (!response.ok) {
|
|
10253
|
+
if (timer) clearTimeout(timer);
|
|
10254
|
+
return {
|
|
10255
|
+
ok: false,
|
|
10256
|
+
provider: input.provider,
|
|
10257
|
+
endpoint,
|
|
10258
|
+
expectedModel: input.model,
|
|
10259
|
+
foundModels: [],
|
|
10260
|
+
expectedCtx: input.ctx,
|
|
10261
|
+
reason: `endpoint ${endpoint} returned HTTP ${response.status} ${response.statusText}`
|
|
10262
|
+
};
|
|
10263
|
+
}
|
|
10264
|
+
let parsed;
|
|
10265
|
+
try {
|
|
10266
|
+
parsed = await response.json();
|
|
10267
|
+
} catch (error) {
|
|
10268
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
10269
|
+
if (timer) clearTimeout(timer);
|
|
10270
|
+
return {
|
|
10271
|
+
ok: false,
|
|
10272
|
+
provider: input.provider,
|
|
10273
|
+
endpoint,
|
|
10274
|
+
expectedModel: input.model,
|
|
10275
|
+
foundModels: [],
|
|
10276
|
+
expectedCtx: input.ctx,
|
|
10277
|
+
reason: `endpoint ${endpoint} returned non-JSON body: ${detail}`
|
|
10278
|
+
};
|
|
10279
|
+
}
|
|
10280
|
+
if (timer) clearTimeout(timer);
|
|
10281
|
+
const foundModels = extractDiscoveredModels(input.provider, parsed);
|
|
10282
|
+
const matched = foundModels.find((model) => model.id === input.model);
|
|
10283
|
+
if (matched === void 0) {
|
|
10284
|
+
return {
|
|
10285
|
+
ok: false,
|
|
10286
|
+
provider: input.provider,
|
|
10287
|
+
endpoint,
|
|
10288
|
+
expectedModel: input.model,
|
|
10289
|
+
foundModels,
|
|
10290
|
+
expectedCtx: input.ctx,
|
|
10291
|
+
reason: modelMismatchReason(input.model, foundModels)
|
|
10292
|
+
};
|
|
10293
|
+
}
|
|
10294
|
+
if (matched.contextLength !== void 0 && matched.contextLength < input.ctx) {
|
|
10295
|
+
return {
|
|
10296
|
+
ok: false,
|
|
10297
|
+
provider: input.provider,
|
|
10298
|
+
endpoint,
|
|
10299
|
+
expectedModel: input.model,
|
|
10300
|
+
foundModels,
|
|
10301
|
+
expectedCtx: input.ctx,
|
|
10302
|
+
matchedContextLength: matched.contextLength,
|
|
10303
|
+
reason: `model ${input.model} reports context length ${matched.contextLength} tokens which is below the manifest ctx ${input.ctx}`
|
|
10304
|
+
};
|
|
10305
|
+
}
|
|
10306
|
+
return {
|
|
10307
|
+
ok: true,
|
|
10308
|
+
provider: input.provider,
|
|
10309
|
+
endpoint,
|
|
10310
|
+
expectedModel: input.model,
|
|
10311
|
+
foundModels,
|
|
10312
|
+
...matched.contextLength !== void 0 ? { matchedContextLength: matched.contextLength } : {}
|
|
10313
|
+
};
|
|
10314
|
+
}
|
|
10315
|
+
function discoveryEndpointFor(provider, baseUrl) {
|
|
10316
|
+
const trimmed = baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
|
|
10317
|
+
if (provider === "openai-compatible") {
|
|
10318
|
+
if (/\/v1$/i.test(trimmed)) {
|
|
10319
|
+
return `${trimmed}/models`;
|
|
10320
|
+
}
|
|
10321
|
+
return `${trimmed}/v1/models`;
|
|
10322
|
+
}
|
|
10323
|
+
if (provider === "ollama") {
|
|
10324
|
+
if (/\/api$/i.test(trimmed)) {
|
|
10325
|
+
return `${trimmed}/tags`;
|
|
10326
|
+
}
|
|
10327
|
+
return `${trimmed}/api/tags`;
|
|
10328
|
+
}
|
|
10329
|
+
const exhaustive = provider;
|
|
10330
|
+
throw new Error(`local-lab preflight provider kind unsupported: ${exhaustive}`);
|
|
10331
|
+
}
|
|
10332
|
+
function modelMismatchReason(expectedModel, foundModels) {
|
|
10333
|
+
if (foundModels.length === 0) {
|
|
10334
|
+
return `endpoint reported no models; expected ${expectedModel}`;
|
|
10335
|
+
}
|
|
10336
|
+
const foundIds = foundModels.map((model) => model.id);
|
|
10337
|
+
const truncated = foundIds.slice(0, 20);
|
|
10338
|
+
const ellipsis = foundIds.length > truncated.length ? "\u2026" : "";
|
|
10339
|
+
return `endpoint did not report the manifest model ${expectedModel}; found [${truncated.join(", ")}${ellipsis}]`;
|
|
10340
|
+
}
|
|
10341
|
+
function extractDiscoveredModels(provider, parsed) {
|
|
10342
|
+
if (!isPlainObject2(parsed)) {
|
|
10343
|
+
return [];
|
|
10344
|
+
}
|
|
10345
|
+
if (provider === "openai-compatible") {
|
|
10346
|
+
return extractOpenAiModels(parsed.data);
|
|
10347
|
+
}
|
|
10348
|
+
if (provider === "ollama") {
|
|
10349
|
+
return extractOllamaModels(parsed.models);
|
|
10350
|
+
}
|
|
10351
|
+
const exhaustive = provider;
|
|
10352
|
+
throw new Error(`local-lab preflight provider kind unsupported: ${exhaustive}`);
|
|
10353
|
+
}
|
|
10354
|
+
function extractOpenAiModels(data) {
|
|
10355
|
+
if (!Array.isArray(data)) {
|
|
10356
|
+
return [];
|
|
10357
|
+
}
|
|
10358
|
+
const models = [];
|
|
10359
|
+
for (const entry of data) {
|
|
10360
|
+
if (!isPlainObject2(entry)) continue;
|
|
10361
|
+
if (typeof entry.id !== "string") continue;
|
|
10362
|
+
models.push({
|
|
10363
|
+
id: entry.id,
|
|
10364
|
+
...typeof entry.context_length === "number" ? { contextLength: entry.context_length } : {}
|
|
10365
|
+
});
|
|
10366
|
+
}
|
|
10367
|
+
return models;
|
|
10368
|
+
}
|
|
10369
|
+
function extractOllamaModels(models) {
|
|
10370
|
+
if (!Array.isArray(models)) {
|
|
10371
|
+
return [];
|
|
10372
|
+
}
|
|
10373
|
+
const out = [];
|
|
10374
|
+
for (const entry of models) {
|
|
10375
|
+
if (!isPlainObject2(entry)) continue;
|
|
10376
|
+
if (typeof entry.name !== "string") continue;
|
|
10377
|
+
out.push({ id: entry.name });
|
|
10378
|
+
}
|
|
10379
|
+
return out;
|
|
10380
|
+
}
|
|
10381
|
+
function isPlainObject2(value) {
|
|
10382
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
10383
|
+
}
|
|
10384
|
+
|
|
10385
|
+
// src/local-lab/sequential-phases.ts
|
|
10386
|
+
function normalizeBaseUrlForSameness(baseUrl) {
|
|
10387
|
+
return baseUrl.endsWith("/") ? baseUrl.slice(0, -1) : baseUrl;
|
|
10388
|
+
}
|
|
10389
|
+
async function runSequentialPhases(manifest, phases, options = {}) {
|
|
10390
|
+
const outcomes = [];
|
|
10391
|
+
for (let index = 0; index < phases.length; index += 1) {
|
|
10392
|
+
const phase = phases[index];
|
|
10393
|
+
const descriptor = {
|
|
10394
|
+
name: phase.name,
|
|
10395
|
+
role: phase.role
|
|
10396
|
+
};
|
|
10397
|
+
if (index > 0) {
|
|
10398
|
+
const previous = phases[index - 1];
|
|
10399
|
+
const sameEndpoint = normalizeBaseUrlForSameness(phase.role.baseUrl) === normalizeBaseUrlForSameness(previous.role.baseUrl);
|
|
10400
|
+
if (!sameEndpoint) {
|
|
10401
|
+
const manifestNote = readHandoffNote(manifest, previous.name, phase.name);
|
|
10402
|
+
const note = formatHandoffNote(
|
|
10403
|
+
{ name: previous.name, role: previous.role },
|
|
10404
|
+
descriptor,
|
|
10405
|
+
manifestNote
|
|
10406
|
+
);
|
|
10407
|
+
options.hooks?.onPhaseHandoff?.(
|
|
10408
|
+
{ name: previous.name, role: previous.role },
|
|
10409
|
+
descriptor,
|
|
10410
|
+
note
|
|
10411
|
+
);
|
|
10412
|
+
}
|
|
10413
|
+
}
|
|
10414
|
+
const preflight = await preflightLocalLabRole(
|
|
10415
|
+
{
|
|
10416
|
+
provider: phase.role.provider,
|
|
10417
|
+
baseUrl: phase.role.baseUrl,
|
|
10418
|
+
model: phase.role.model,
|
|
10419
|
+
ctx: phase.role.ctx
|
|
10420
|
+
},
|
|
10421
|
+
options.preflight
|
|
10422
|
+
);
|
|
10423
|
+
if (!preflight.ok) {
|
|
10424
|
+
throw new LocalLabPreflightError({
|
|
10425
|
+
phase: descriptor,
|
|
10426
|
+
preflight,
|
|
10427
|
+
phaseIndex: index
|
|
10428
|
+
});
|
|
10429
|
+
}
|
|
10430
|
+
options.hooks?.onPhasePreflight?.(preflight);
|
|
10431
|
+
options.hooks?.onPhaseStart?.(descriptor);
|
|
10432
|
+
const resolvedRole = resolveLocalLabRole(phase.role);
|
|
10433
|
+
const result = await phase.execute(resolvedRole);
|
|
10434
|
+
const outcome = {
|
|
10435
|
+
phase: descriptor,
|
|
10436
|
+
preflight,
|
|
10437
|
+
result
|
|
10438
|
+
};
|
|
10439
|
+
outcomes.push(outcome);
|
|
10440
|
+
options.hooks?.onPhaseComplete?.(outcome);
|
|
10441
|
+
}
|
|
10442
|
+
return outcomes;
|
|
10443
|
+
}
|
|
10444
|
+
var LocalLabPreflightError = class extends Error {
|
|
10445
|
+
phase;
|
|
10446
|
+
preflight;
|
|
10447
|
+
phaseIndex;
|
|
10448
|
+
constructor(args) {
|
|
10449
|
+
const reason = args.preflight.ok === false ? args.preflight.reason : "preflight reported success but the scheduler treated it as a failure (internal inconsistency)";
|
|
10450
|
+
super(
|
|
10451
|
+
`local-lab phase "${args.phase.name}" (index ${args.phaseIndex}) preflight failed: ${reason}`
|
|
10452
|
+
);
|
|
10453
|
+
this.name = "LocalLabPreflightError";
|
|
10454
|
+
this.phase = args.phase;
|
|
10455
|
+
this.preflight = args.preflight;
|
|
10456
|
+
this.phaseIndex = args.phaseIndex;
|
|
10457
|
+
}
|
|
10458
|
+
};
|
|
10459
|
+
function formatHandoffNote(from, to, manifestNote) {
|
|
10460
|
+
if (manifestNote !== void 0 && manifestNote.trim().length > 0) {
|
|
10461
|
+
return manifestNote.trim();
|
|
10462
|
+
}
|
|
10463
|
+
return `stop ${from.name} endpoint, start ${to.name} endpoint at ${to.role.baseUrl} serving model ${to.role.model}, then resume the bench`;
|
|
10464
|
+
}
|
|
10465
|
+
function readHandoffNote(manifest, from, to) {
|
|
10466
|
+
if (from === "responder" && to === "judge" && typeof manifest.notes?.responderToJudgeHandoff === "string") {
|
|
10467
|
+
return manifest.notes.responderToJudgeHandoff;
|
|
10468
|
+
}
|
|
10469
|
+
return void 0;
|
|
10470
|
+
}
|
|
10471
|
+
|
|
10006
10472
|
// src/runtime-profiles.ts
|
|
10007
10473
|
var OPENCLAW_REMNIC_PLUGIN_IDS = ["openclaw-remnic", "openclaw-engram"];
|
|
10008
10474
|
function getOpenClawPluginEntries(raw) {
|
|
@@ -10029,6 +10495,9 @@ var codexCliFallbackRegistered = false;
|
|
|
10029
10495
|
var codexCliFallbackChain = Promise.resolve();
|
|
10030
10496
|
async function resolveBenchRuntimeProfile(options) {
|
|
10031
10497
|
const profile = options.runtimeProfile ?? "baseline";
|
|
10498
|
+
if (profile === "local-lab") {
|
|
10499
|
+
return resolveLocalLabRuntimeProfile(options);
|
|
10500
|
+
}
|
|
10032
10501
|
const systemProvider = profile === "openclaw-chain" ? null : resolveProviderConfig(
|
|
10033
10502
|
"system",
|
|
10034
10503
|
options.systemProvider,
|
|
@@ -10205,11 +10674,11 @@ async function resolveBenchRuntimeProfile(options) {
|
|
|
10205
10674
|
async function loadRemnicConfigFile(filePath) {
|
|
10206
10675
|
const parsed = await loadJsonObject(filePath, "Remnic config");
|
|
10207
10676
|
const remnic = parsed.remnic;
|
|
10208
|
-
if (
|
|
10677
|
+
if (isPlainObject3(remnic)) {
|
|
10209
10678
|
return { ...remnic };
|
|
10210
10679
|
}
|
|
10211
10680
|
const engram = parsed.engram;
|
|
10212
|
-
if (
|
|
10681
|
+
if (isPlainObject3(engram)) {
|
|
10213
10682
|
return { ...engram };
|
|
10214
10683
|
}
|
|
10215
10684
|
return parsed;
|
|
@@ -10220,10 +10689,10 @@ async function loadOpenclawRuntimeConfig(filePath) {
|
|
|
10220
10689
|
}
|
|
10221
10690
|
const parsed = await loadJsonObject(filePath, "OpenClaw config");
|
|
10222
10691
|
const entry = resolveOpenClawRemnicPluginEntry(parsed);
|
|
10223
|
-
const remnicConfig =
|
|
10692
|
+
const remnicConfig = isPlainObject3(entry?.config) ? { ...entry.config } : {};
|
|
10224
10693
|
const gatewayConfig = {
|
|
10225
|
-
...
|
|
10226
|
-
...
|
|
10694
|
+
...isPlainObject3(parsed.agents) ? { agents: parsed.agents } : {},
|
|
10695
|
+
...isPlainObject3(parsed.models) ? { models: parsed.models } : {}
|
|
10227
10696
|
};
|
|
10228
10697
|
return {
|
|
10229
10698
|
remnicConfig,
|
|
@@ -10240,7 +10709,7 @@ function deriveOpenclawRuntimeContext(configPath) {
|
|
|
10240
10709
|
};
|
|
10241
10710
|
}
|
|
10242
10711
|
async function loadJsonObject(filePath, label) {
|
|
10243
|
-
const raw = await
|
|
10712
|
+
const raw = await readFile9(filePath, "utf8");
|
|
10244
10713
|
let parsed;
|
|
10245
10714
|
try {
|
|
10246
10715
|
parsed = JSON.parse(raw);
|
|
@@ -10249,7 +10718,7 @@ async function loadJsonObject(filePath, label) {
|
|
|
10249
10718
|
`${label} at ${filePath} contains invalid JSON: ${error instanceof Error ? error.message : String(error)}`
|
|
10250
10719
|
);
|
|
10251
10720
|
}
|
|
10252
|
-
if (!
|
|
10721
|
+
if (!isPlainObject3(parsed)) {
|
|
10253
10722
|
throw new Error(`${label} at ${filePath} must be a JSON object`);
|
|
10254
10723
|
}
|
|
10255
10724
|
return parsed;
|
|
@@ -10429,6 +10898,25 @@ function sanitizeProviderConfig(config) {
|
|
|
10429
10898
|
...config.apiKey ? { apiKey: REDACTED_CONFIG_VALUE } : {}
|
|
10430
10899
|
};
|
|
10431
10900
|
}
|
|
10901
|
+
function applyLocalLabRuntimeOptions(config, options) {
|
|
10902
|
+
const requestTimeout = options.requestTimeout;
|
|
10903
|
+
const max429WaitMs = options.max429WaitMs;
|
|
10904
|
+
const disableThinking = options.disableThinking;
|
|
10905
|
+
const hasRetry = requestTimeout != null || max429WaitMs != null;
|
|
10906
|
+
if (!hasRetry && !disableThinking) {
|
|
10907
|
+
return config;
|
|
10908
|
+
}
|
|
10909
|
+
return {
|
|
10910
|
+
...config,
|
|
10911
|
+
...hasRetry ? {
|
|
10912
|
+
retryOptions: {
|
|
10913
|
+
...requestTimeout != null ? { timeoutMs: requestTimeout } : {},
|
|
10914
|
+
...max429WaitMs != null ? { max429WaitMs } : {}
|
|
10915
|
+
}
|
|
10916
|
+
} : {},
|
|
10917
|
+
...disableThinking ? { disableThinking: true } : {}
|
|
10918
|
+
};
|
|
10919
|
+
}
|
|
10432
10920
|
function registerCodexCliFallbackRunnerIfNeeded(config) {
|
|
10433
10921
|
if (!config || config.provider !== "codex-cli" || codexCliFallbackRegistered) {
|
|
10434
10922
|
return;
|
|
@@ -10524,6 +11012,8 @@ function asProviderFactoryConfig(config) {
|
|
|
10524
11012
|
...config.retryOptions ? { retryOptions: config.retryOptions } : {},
|
|
10525
11013
|
...config.disableThinking ? { disableThinking: config.disableThinking } : {},
|
|
10526
11014
|
...config.reasoningEffort ? { reasoningEffort: config.reasoningEffort } : {},
|
|
11015
|
+
...config.temperature !== void 0 ? { temperature: config.temperature } : {},
|
|
11016
|
+
...config.seed !== void 0 ? { seed: config.seed } : {},
|
|
10527
11017
|
...config.responderContextBudgetChars !== void 0 ? { responderContextBudgetChars: config.responderContextBudgetChars } : {},
|
|
10528
11018
|
...config.responderPromptBudgetChars !== void 0 ? { responderPromptBudgetChars: config.responderPromptBudgetChars } : {}
|
|
10529
11019
|
};
|
|
@@ -10533,17 +11023,17 @@ function asNonEmptyString(value) {
|
|
|
10533
11023
|
}
|
|
10534
11024
|
function sanitizeGatewayConfig(config) {
|
|
10535
11025
|
const sanitized = sanitizePersistedConfig(config);
|
|
10536
|
-
return
|
|
11026
|
+
return isPlainObject3(sanitized) ? sanitized : {};
|
|
10537
11027
|
}
|
|
10538
11028
|
function sanitizePersistedConfig(config) {
|
|
10539
11029
|
const sanitized = sanitizePersistedValue(config);
|
|
10540
|
-
return
|
|
11030
|
+
return isPlainObject3(sanitized) ? sanitized : {};
|
|
10541
11031
|
}
|
|
10542
11032
|
function sanitizePersistedValue(value) {
|
|
10543
11033
|
if (Array.isArray(value)) {
|
|
10544
11034
|
return value.map((entry) => sanitizePersistedValue(entry));
|
|
10545
11035
|
}
|
|
10546
|
-
if (!
|
|
11036
|
+
if (!isPlainObject3(value)) {
|
|
10547
11037
|
return value;
|
|
10548
11038
|
}
|
|
10549
11039
|
const next = {};
|
|
@@ -10556,9 +11046,61 @@ function sanitizePersistedValue(value) {
|
|
|
10556
11046
|
}
|
|
10557
11047
|
return next;
|
|
10558
11048
|
}
|
|
10559
|
-
function
|
|
11049
|
+
function isPlainObject3(value) {
|
|
10560
11050
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
10561
11051
|
}
|
|
11052
|
+
async function resolveLocalLabRuntimeProfile(options) {
|
|
11053
|
+
if (!options.localLabManifestPath) {
|
|
11054
|
+
throw new Error(
|
|
11055
|
+
"local-lab runtime profile requires localLabManifestPath pointing at a manifest JSON file (issue #1573 PR2)"
|
|
11056
|
+
);
|
|
11057
|
+
}
|
|
11058
|
+
const manifest = await loadLocalLabManifest(options.localLabManifestPath);
|
|
11059
|
+
const resolved = resolveLocalLabProfile(manifest);
|
|
11060
|
+
const drainTimeoutMs = normalizeDrainTimeoutMs2(
|
|
11061
|
+
options.drainTimeout ?? options.requestTimeout
|
|
11062
|
+
);
|
|
11063
|
+
const systemProvider = applyLocalLabRuntimeOptions(
|
|
11064
|
+
sanitizeProviderConfig(resolved.responder.providerConfig),
|
|
11065
|
+
options
|
|
11066
|
+
);
|
|
11067
|
+
const judgeProvider = applyLocalLabRuntimeOptions(
|
|
11068
|
+
sanitizeProviderConfig(resolved.judge.providerConfig),
|
|
11069
|
+
options
|
|
11070
|
+
);
|
|
11071
|
+
const judgeFactoryConfig = judgeProvider ? asProviderFactoryConfig(judgeProvider) : void 0;
|
|
11072
|
+
const judgeProviderInstance = judgeFactoryConfig ? createProvider(judgeFactoryConfig) : void 0;
|
|
11073
|
+
const judge = judgeFactoryConfig ? createProviderBackedJudge(judgeFactoryConfig, judgeProviderInstance) : void 0;
|
|
11074
|
+
const structuredJudge = judgeFactoryConfig ? createProviderBackedStructuredJudge(judgeFactoryConfig, judgeProviderInstance) : void 0;
|
|
11075
|
+
const responderFactoryConfig = systemProvider ? asProviderFactoryConfig(systemProvider) : void 0;
|
|
11076
|
+
const responder = responderFactoryConfig ? createProviderBackedResponder(responderFactoryConfig) : void 0;
|
|
11077
|
+
const lcmObserveConcurrencyOverrides = buildLcmObserveConcurrencyOverrides(options.lcmObserveConcurrency);
|
|
11078
|
+
const baselineConfig = buildBenchBaselineRemnicConfig();
|
|
11079
|
+
const localLabRemnicConfig = {
|
|
11080
|
+
...baselineConfig,
|
|
11081
|
+
...lcmObserveConcurrencyOverrides
|
|
11082
|
+
};
|
|
11083
|
+
const effectiveRemnicConfig = withAssistantHooks(
|
|
11084
|
+
localLabRemnicConfig,
|
|
11085
|
+
responder,
|
|
11086
|
+
structuredJudge
|
|
11087
|
+
);
|
|
11088
|
+
return {
|
|
11089
|
+
profile: "local-lab",
|
|
11090
|
+
remnicConfig: sanitizePersistedConfig(localLabRemnicConfig),
|
|
11091
|
+
effectiveRemnicConfig,
|
|
11092
|
+
adapterOptions: {
|
|
11093
|
+
configOverrides: effectiveRemnicConfig,
|
|
11094
|
+
responder,
|
|
11095
|
+
judge,
|
|
11096
|
+
...drainTimeoutMs ? { drainTimeoutMs } : {}
|
|
11097
|
+
},
|
|
11098
|
+
systemProvider,
|
|
11099
|
+
judgeProvider,
|
|
11100
|
+
internalProvider: null,
|
|
11101
|
+
localLab: resolved
|
|
11102
|
+
};
|
|
11103
|
+
}
|
|
10562
11104
|
|
|
10563
11105
|
// src/benchmark.ts
|
|
10564
11106
|
import fs2 from "fs";
|
|
@@ -10570,7 +11112,7 @@ import { expandTildePath as expandTildePath3 } from "@remnic/core";
|
|
|
10570
11112
|
import { createHash as createHash6, randomBytes as randomBytes2 } from "crypto";
|
|
10571
11113
|
import {
|
|
10572
11114
|
mkdir as mkdir8,
|
|
10573
|
-
readFile as
|
|
11115
|
+
readFile as readFile10,
|
|
10574
11116
|
rename as rename2,
|
|
10575
11117
|
rm as rm3,
|
|
10576
11118
|
writeFile as writeFile8
|
|
@@ -10631,7 +11173,7 @@ var JudgeCache = class {
|
|
|
10631
11173
|
const filePath = this.entryPath(key);
|
|
10632
11174
|
let raw;
|
|
10633
11175
|
try {
|
|
10634
|
-
raw = await
|
|
11176
|
+
raw = await readFile10(filePath, "utf8");
|
|
10635
11177
|
} catch {
|
|
10636
11178
|
return void 0;
|
|
10637
11179
|
}
|
|
@@ -10869,7 +11411,7 @@ function isBenchJudgeResult(value) {
|
|
|
10869
11411
|
|
|
10870
11412
|
// src/benchmarks/published/ama-bench/runner.ts
|
|
10871
11413
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
10872
|
-
import { readFile as
|
|
11414
|
+
import { readFile as readFile11 } from "fs/promises";
|
|
10873
11415
|
import path12 from "path";
|
|
10874
11416
|
|
|
10875
11417
|
// src/benchmarks/published/ama-bench/fixture.ts
|
|
@@ -11448,7 +11990,7 @@ async function loadDataset(mode, datasetDir, limit) {
|
|
|
11448
11990
|
const filePath = path12.join(datasetDir, "open_end_qa_set.jsonl");
|
|
11449
11991
|
let raw;
|
|
11450
11992
|
try {
|
|
11451
|
-
raw = await
|
|
11993
|
+
raw = await readFile11(filePath, "utf8");
|
|
11452
11994
|
} catch (error) {
|
|
11453
11995
|
throw new Error(
|
|
11454
11996
|
`AMA-Bench dataset not found at ${filePath}: ${error instanceof Error ? error.message : String(error)}`
|
|
@@ -11740,7 +12282,7 @@ function isValidQaPairs(value) {
|
|
|
11740
12282
|
|
|
11741
12283
|
// src/benchmarks/published/amemgym/runner.ts
|
|
11742
12284
|
import { randomUUID as randomUUID3 } from "crypto";
|
|
11743
|
-
import { readFile as
|
|
12285
|
+
import { readFile as readFile12 } from "fs/promises";
|
|
11744
12286
|
import path13 from "path";
|
|
11745
12287
|
|
|
11746
12288
|
// src/benchmarks/published/amemgym/fixture.ts
|
|
@@ -12270,7 +12812,7 @@ async function loadDataset2(mode, datasetDir, limit) {
|
|
|
12270
12812
|
const datasetErrors = [];
|
|
12271
12813
|
for (const filename of DATASET_FILENAMES) {
|
|
12272
12814
|
try {
|
|
12273
|
-
const raw = await
|
|
12815
|
+
const raw = await readFile12(path13.join(datasetDir, filename), "utf8");
|
|
12274
12816
|
const parsed = parseDataset(raw, filename, normalizedLimit);
|
|
12275
12817
|
return ensureDatasetProfiles(parsed);
|
|
12276
12818
|
} catch (error) {
|
|
@@ -12444,7 +12986,7 @@ function normalizeRole(role) {
|
|
|
12444
12986
|
|
|
12445
12987
|
// src/benchmarks/published/memory-arena/runner.ts
|
|
12446
12988
|
import { randomUUID as randomUUID4 } from "crypto";
|
|
12447
|
-
import { readFile as
|
|
12989
|
+
import { readFile as readFile13, readdir as readdir5, stat as stat3 } from "fs/promises";
|
|
12448
12990
|
import path14 from "path";
|
|
12449
12991
|
import { expandTildePath as expandTildePath2 } from "@remnic/core";
|
|
12450
12992
|
|
|
@@ -12772,7 +13314,7 @@ async function loadDataset3(mode, datasetDir, limit) {
|
|
|
12772
13314
|
if (remainingLimit2 === 0) {
|
|
12773
13315
|
break;
|
|
12774
13316
|
}
|
|
12775
|
-
const raw = await
|
|
13317
|
+
const raw = await readFile13(path14.join(datasetDir, filename), "utf8");
|
|
12776
13318
|
const parsedTasks = [];
|
|
12777
13319
|
raw.split("\n").forEach((line, lineIndex) => {
|
|
12778
13320
|
if (line.trim().length === 0) {
|
|
@@ -13108,7 +13650,7 @@ async function loadMemoryArenaWebshopProductCatalog(datasetDir) {
|
|
|
13108
13650
|
`MemoryArena WebShop product sidecar is ${sourceStat.size} bytes; provide a compact JSON/JSONL sidecar smaller than ${MEMORY_ARENA_WEBSHOP_PRODUCTS_MAX_BYTES} bytes instead of the full WebShop catalog.`
|
|
13109
13651
|
);
|
|
13110
13652
|
}
|
|
13111
|
-
const raw = await
|
|
13653
|
+
const raw = await readFile13(sourcePath, "utf8");
|
|
13112
13654
|
const records = parseMemoryArenaWebshopSidecarRecords(raw, sourcePath);
|
|
13113
13655
|
const byAsin = /* @__PURE__ */ new Map();
|
|
13114
13656
|
for (const record of records) {
|
|
@@ -14557,7 +15099,7 @@ function scoreSubtaskSuccess(scores) {
|
|
|
14557
15099
|
import { collectTemporalLexicalCues } from "@remnic/core";
|
|
14558
15100
|
|
|
14559
15101
|
// src/benchmarks/published/dataset-loader.ts
|
|
14560
|
-
import { readFile as
|
|
15102
|
+
import { readFile as readFile14 } from "fs/promises";
|
|
14561
15103
|
import path15 from "path";
|
|
14562
15104
|
|
|
14563
15105
|
// src/benchmarks/published/longmemeval/fixture.ts
|
|
@@ -14664,7 +15206,7 @@ async function loadDataset4(options) {
|
|
|
14664
15206
|
const abs = path15.join(options.datasetDir, filename);
|
|
14665
15207
|
let raw;
|
|
14666
15208
|
try {
|
|
14667
|
-
raw = await
|
|
15209
|
+
raw = await readFile14(abs, "utf8");
|
|
14668
15210
|
} catch (error) {
|
|
14669
15211
|
errors.push(
|
|
14670
15212
|
`${filename}: ${error instanceof Error ? error.message : String(error)}`
|
|
@@ -17912,7 +18454,7 @@ var StructuredLiteralParser = class {
|
|
|
17912
18454
|
|
|
17913
18455
|
// src/benchmarks/published/personamem/runner.ts
|
|
17914
18456
|
import { createHash as createHash7, randomUUID as randomUUID7 } from "crypto";
|
|
17915
|
-
import { readFile as
|
|
18457
|
+
import { readFile as readFile15, realpath as realpath4 } from "fs/promises";
|
|
17916
18458
|
import path17 from "path";
|
|
17917
18459
|
|
|
17918
18460
|
// src/benchmarks/published/personamem/fixture.ts
|
|
@@ -18192,7 +18734,7 @@ async function loadDataset8(mode, datasetDir, limit) {
|
|
|
18192
18734
|
const datasetPath = path17.join(datasetDir, relativePath);
|
|
18193
18735
|
let raw;
|
|
18194
18736
|
try {
|
|
18195
|
-
raw = await
|
|
18737
|
+
raw = await readFile15(datasetPath, "utf8");
|
|
18196
18738
|
} catch (error) {
|
|
18197
18739
|
datasetErrors.push(
|
|
18198
18740
|
`${relativePath}: ${error instanceof Error ? error.message : String(error)}`
|
|
@@ -18250,7 +18792,7 @@ async function hydrateSample(row, datasetRoot) {
|
|
|
18250
18792
|
datasetRoot,
|
|
18251
18793
|
row.chat_history_32k_link
|
|
18252
18794
|
);
|
|
18253
|
-
const chatHistoryRaw = await
|
|
18795
|
+
const chatHistoryRaw = await readFile15(chatHistoryPath, "utf8");
|
|
18254
18796
|
const chatHistory = parseChatHistory(
|
|
18255
18797
|
chatHistoryRaw,
|
|
18256
18798
|
row.chat_history_32k_link
|
|
@@ -18716,7 +19258,7 @@ function applyLimit6(items, limit) {
|
|
|
18716
19258
|
|
|
18717
19259
|
// src/benchmarks/published/membench/runner.ts
|
|
18718
19260
|
import { randomUUID as randomUUID8 } from "crypto";
|
|
18719
|
-
import { readFile as
|
|
19261
|
+
import { readFile as readFile16, readdir as readdir7 } from "fs/promises";
|
|
18720
19262
|
import path18 from "path";
|
|
18721
19263
|
|
|
18722
19264
|
// src/benchmarks/published/membench/fixture.ts
|
|
@@ -18978,7 +19520,7 @@ async function loadDataset9(mode, datasetDir, limit) {
|
|
|
18978
19520
|
let remainingLimit = normalizedLimit;
|
|
18979
19521
|
for (const filename of filenames) {
|
|
18980
19522
|
try {
|
|
18981
|
-
const raw = await
|
|
19523
|
+
const raw = await readFile16(path18.join(datasetDir, filename), "utf8");
|
|
18982
19524
|
const parsed = filename.endsWith(".jsonl") ? parseJsonlDataset(raw, filename) : parseJsonDataset(raw, filename);
|
|
18983
19525
|
const limitedCases = remainingLimit === 0 ? [] : applyLimit7(parsed, remainingLimit);
|
|
18984
19526
|
if (limitedCases.length > 0) {
|
|
@@ -19058,7 +19600,7 @@ function parseJsonlDataset(raw, filename) {
|
|
|
19058
19600
|
return cases;
|
|
19059
19601
|
}
|
|
19060
19602
|
function parseCase(entry, location) {
|
|
19061
|
-
if (!
|
|
19603
|
+
if (!isPlainObject4(entry)) {
|
|
19062
19604
|
throw new Error(`MemBench case ${location} must be an object.`);
|
|
19063
19605
|
}
|
|
19064
19606
|
const {
|
|
@@ -19177,7 +19719,7 @@ function normalizePublishedNode(node, hints, location) {
|
|
|
19177
19719
|
(entry, index) => normalizePublishedNode(entry, hints, `${location}[${index}]`)
|
|
19178
19720
|
);
|
|
19179
19721
|
}
|
|
19180
|
-
if (!
|
|
19722
|
+
if (!isPlainObject4(node)) {
|
|
19181
19723
|
return [];
|
|
19182
19724
|
}
|
|
19183
19725
|
const flatCase = normalizeFlatCase(node, hints, location);
|
|
@@ -19223,7 +19765,7 @@ function normalizeFlatCase(record, hints, location) {
|
|
|
19223
19765
|
function normalizeTrajectoryQaRecord(record, hints, location) {
|
|
19224
19766
|
const trajectory = record.trajectory ?? record.message_list ?? record.messages;
|
|
19225
19767
|
const rawQa = record.qa ?? record.QA ?? record.qas ?? record.qa_pairs ?? record.question_answers;
|
|
19226
|
-
const qa = Array.isArray(rawQa) ? rawQa :
|
|
19768
|
+
const qa = Array.isArray(rawQa) ? rawQa : isPlainObject4(rawQa) ? [rawQa] : void 0;
|
|
19227
19769
|
if (!Array.isArray(trajectory) || !Array.isArray(qa) || qa.length === 0) {
|
|
19228
19770
|
return [];
|
|
19229
19771
|
}
|
|
@@ -19311,7 +19853,7 @@ function appendTrajectoryTurn(turn, _location, coordinate, turns, coordinateInde
|
|
|
19311
19853
|
turns.push({ role: "user", content: turn });
|
|
19312
19854
|
return true;
|
|
19313
19855
|
}
|
|
19314
|
-
if (!
|
|
19856
|
+
if (!isPlainObject4(turn)) {
|
|
19315
19857
|
return false;
|
|
19316
19858
|
}
|
|
19317
19859
|
const directMessage = parseDirectMessageTurn(turn);
|
|
@@ -19361,7 +19903,7 @@ function normalizeQaPairs(qa, location, coordinateIndex) {
|
|
|
19361
19903
|
const pairs = [];
|
|
19362
19904
|
for (let index = 0; index < qa.length; index += 1) {
|
|
19363
19905
|
const item = qa[index];
|
|
19364
|
-
if (!
|
|
19906
|
+
if (!isPlainObject4(item)) {
|
|
19365
19907
|
continue;
|
|
19366
19908
|
}
|
|
19367
19909
|
const question = firstString(item.question, item.query, item.prompt);
|
|
@@ -19501,7 +20043,7 @@ function firstString(...values) {
|
|
|
19501
20043
|
return null;
|
|
19502
20044
|
}
|
|
19503
20045
|
function parseTurn(turn, location) {
|
|
19504
|
-
if (!
|
|
20046
|
+
if (!isPlainObject4(turn)) {
|
|
19505
20047
|
throw new Error(`MemBench turn ${location} must be an object.`);
|
|
19506
20048
|
}
|
|
19507
20049
|
const { role, content } = turn;
|
|
@@ -19676,7 +20218,7 @@ function parseChoices(choices, location) {
|
|
|
19676
20218
|
D: parsedArray[3]
|
|
19677
20219
|
};
|
|
19678
20220
|
}
|
|
19679
|
-
if (!
|
|
20221
|
+
if (!isPlainObject4(choices)) {
|
|
19680
20222
|
return void 0;
|
|
19681
20223
|
}
|
|
19682
20224
|
const parsed = {
|
|
@@ -19839,13 +20381,13 @@ function applyLimit7(items, limit) {
|
|
|
19839
20381
|
}
|
|
19840
20382
|
return items.slice(0, limit);
|
|
19841
20383
|
}
|
|
19842
|
-
function
|
|
20384
|
+
function isPlainObject4(value) {
|
|
19843
20385
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
19844
20386
|
}
|
|
19845
20387
|
|
|
19846
20388
|
// src/benchmarks/published/memoryagentbench/runner.ts
|
|
19847
20389
|
import { randomUUID as randomUUID9 } from "crypto";
|
|
19848
|
-
import { access, readFile as
|
|
20390
|
+
import { access, readFile as readFile17 } from "fs/promises";
|
|
19849
20391
|
import path19 from "path";
|
|
19850
20392
|
|
|
19851
20393
|
// src/benchmarks/published/memoryagentbench/fixture.ts
|
|
@@ -20868,7 +21410,7 @@ async function loadRecSysEntityMapping(datasetDir) {
|
|
|
20868
21410
|
}
|
|
20869
21411
|
let parsed;
|
|
20870
21412
|
try {
|
|
20871
|
-
parsed = JSON.parse(await
|
|
21413
|
+
parsed = JSON.parse(await readFile17(candidate, "utf8"));
|
|
20872
21414
|
} catch (error) {
|
|
20873
21415
|
console.error(
|
|
20874
21416
|
` [WARN] MemoryAgentBench ReDial entity mapping ${candidate} is invalid JSON; trying the next candidate: ${error instanceof Error ? error.message : String(error)}`
|
|
@@ -21031,7 +21573,7 @@ async function loadDataset10(mode, datasetDir, limit) {
|
|
|
21031
21573
|
return ensureDatasetItems(applyLimit8(MEMORY_AGENT_BENCH_SMOKE_FIXTURE, normalizedLimit));
|
|
21032
21574
|
}
|
|
21033
21575
|
async function readDatasetFile(filePath, filename) {
|
|
21034
|
-
const raw = await
|
|
21576
|
+
const raw = await readFile17(filePath, "utf8");
|
|
21035
21577
|
const parsed = filename.endsWith(".jsonl") ? parseJsonLines(raw, filename) : parseJsonArray(raw, filename);
|
|
21036
21578
|
return parsed.map(
|
|
21037
21579
|
(item, index) => parseMemoryAgentBenchItem(item, `${filename} item ${index + 1}`)
|
|
@@ -22644,7 +23186,7 @@ function parseNonNegativeInt(rawValue) {
|
|
|
22644
23186
|
|
|
22645
23187
|
// src/benchmarks/remnic/page-versioning/runner.ts
|
|
22646
23188
|
import { randomUUID as randomUUID14 } from "crypto";
|
|
22647
|
-
import { mkdir as mkdir9, mkdtemp as mkdtemp4, readFile as
|
|
23189
|
+
import { mkdir as mkdir9, mkdtemp as mkdtemp4, readFile as readFile18, rm as rm5, writeFile as writeFile9 } from "fs/promises";
|
|
22648
23190
|
import os6 from "os";
|
|
22649
23191
|
import path22 from "path";
|
|
22650
23192
|
import {
|
|
@@ -22824,7 +23366,7 @@ async function executeCase2(sample, dependencies) {
|
|
|
22824
23366
|
await dependencies.createVersion(pagePath, "modified content", "write", config, void 0, void 0, tmpDir);
|
|
22825
23367
|
await dependencies.revertToVersion(pagePath, "1", config, void 0, tmpDir);
|
|
22826
23368
|
const history = await dependencies.listVersions(pagePath, config, tmpDir);
|
|
22827
|
-
const pageContent = await
|
|
23369
|
+
const pageContent = await readFile18(pagePath, "utf-8");
|
|
22828
23370
|
const observed = await dependencies.getVersion(pagePath, "3", config, tmpDir);
|
|
22829
23371
|
return {
|
|
22830
23372
|
versionIds: history.versions.map((version) => version.versionId),
|
|
@@ -22841,7 +23383,7 @@ async function executeCase2(sample, dependencies) {
|
|
|
22841
23383
|
await dependencies.createVersion(pagePath, content, "write", pruningConfig, void 0, void 0, tmpDir);
|
|
22842
23384
|
}
|
|
22843
23385
|
const history = await dependencies.listVersions(pagePath, pruningConfig, tmpDir);
|
|
22844
|
-
const pageContent = await
|
|
23386
|
+
const pageContent = await readFile18(pagePath, "utf-8");
|
|
22845
23387
|
const prunedIds = [];
|
|
22846
23388
|
for (const versionId of ["1", "2"]) {
|
|
22847
23389
|
try {
|
|
@@ -22882,7 +23424,7 @@ async function executeCase2(sample, dependencies) {
|
|
|
22882
23424
|
tmpDir
|
|
22883
23425
|
);
|
|
22884
23426
|
const history = await dependencies.listVersions(pagePath, config, tmpDir);
|
|
22885
|
-
const pageContent = await
|
|
23427
|
+
const pageContent = await readFile18(pagePath, "utf-8");
|
|
22886
23428
|
const diff = await dependencies.diffVersions(pagePath, "1", "2", config, tmpDir);
|
|
22887
23429
|
const observedLines = normalizeDiffChangedLines(diff);
|
|
22888
23430
|
return {
|
|
@@ -30099,7 +30641,7 @@ function getBenchmarkLowerIsBetter(benchmarkId) {
|
|
|
30099
30641
|
}
|
|
30100
30642
|
|
|
30101
30643
|
// src/integrity/sealed-qrels.ts
|
|
30102
|
-
import { readFile as
|
|
30644
|
+
import { readFile as readFile19 } from "fs/promises";
|
|
30103
30645
|
function isSealedQrelsArtifact(value) {
|
|
30104
30646
|
if (!value || typeof value !== "object") {
|
|
30105
30647
|
return false;
|
|
@@ -30169,7 +30711,7 @@ function parseSealedQrels(raw, options = {}) {
|
|
|
30169
30711
|
};
|
|
30170
30712
|
}
|
|
30171
30713
|
async function loadSealedQrels(filePath, options = {}) {
|
|
30172
|
-
const raw = await
|
|
30714
|
+
const raw = await readFile19(filePath, "utf8");
|
|
30173
30715
|
return parseSealedQrels(raw, options);
|
|
30174
30716
|
}
|
|
30175
30717
|
function serializeSealedQrels(artifact) {
|
|
@@ -30289,7 +30831,7 @@ function selectFixtureVariant(variants, seed) {
|
|
|
30289
30831
|
}
|
|
30290
30832
|
|
|
30291
30833
|
// src/benchmarks/custom/loader.ts
|
|
30292
|
-
import { readFile as
|
|
30834
|
+
import { readFile as readFile20 } from "fs/promises";
|
|
30293
30835
|
import { parse as parseYaml } from "yaml";
|
|
30294
30836
|
var CUSTOM_SCORING_VALUES = /* @__PURE__ */ new Set([
|
|
30295
30837
|
"exact_match",
|
|
@@ -30309,7 +30851,7 @@ function parseCustomBenchmark(source) {
|
|
|
30309
30851
|
async function loadCustomBenchmarkFile(filePath) {
|
|
30310
30852
|
let source;
|
|
30311
30853
|
try {
|
|
30312
|
-
source = await
|
|
30854
|
+
source = await readFile20(filePath, "utf8");
|
|
30313
30855
|
} catch (error) {
|
|
30314
30856
|
throw new Error(
|
|
30315
30857
|
`Failed to read custom benchmark file ${filePath}: ${formatError(error)}`
|
|
@@ -31501,7 +32043,7 @@ var chatFixture = {
|
|
|
31501
32043
|
};
|
|
31502
32044
|
|
|
31503
32045
|
// src/benchmarks/remnic/procedural-recall/ablation.ts
|
|
31504
|
-
import { mkdir as mkdir16, mkdtemp as mkdtemp11, rm as rm13, writeFile as writeFile15, readFile as
|
|
32046
|
+
import { mkdir as mkdir16, mkdtemp as mkdtemp11, rm as rm13, writeFile as writeFile15, readFile as readFile21 } from "fs/promises";
|
|
31505
32047
|
import os9 from "os";
|
|
31506
32048
|
import path34 from "path";
|
|
31507
32049
|
import {
|
|
@@ -31624,7 +32166,7 @@ async function runProceduralAblation(options) {
|
|
|
31624
32166
|
};
|
|
31625
32167
|
}
|
|
31626
32168
|
async function loadAblationFixture(fixturePath) {
|
|
31627
|
-
const raw = await
|
|
32169
|
+
const raw = await readFile21(fixturePath, "utf8");
|
|
31628
32170
|
let parsed;
|
|
31629
32171
|
try {
|
|
31630
32172
|
parsed = JSON.parse(raw);
|
|
@@ -32816,8 +33358,10 @@ export {
|
|
|
32816
33358
|
INTEGRITY_CIPHER_ALGORITHM,
|
|
32817
33359
|
INTEGRITY_HASH_ALGORITHM,
|
|
32818
33360
|
INTEGRITY_META_FIELDS,
|
|
33361
|
+
LOCAL_LAB_PROVIDER_KINDS,
|
|
32819
33362
|
LOCOMO_DATASET_FILENAMES,
|
|
32820
33363
|
LONG_MEM_EVAL_DATASET_FILENAMES,
|
|
33364
|
+
LocalLabPreflightError,
|
|
32821
33365
|
MEMORY_EVAL_DIMENSIONS,
|
|
32822
33366
|
MEMORY_EVAL_PUBLIC_LINE,
|
|
32823
33367
|
MITIGATED_BASELINE_SCENARIOS,
|
|
@@ -32896,12 +33440,14 @@ export {
|
|
|
32896
33440
|
defaultBenchmarkPublishPath,
|
|
32897
33441
|
deleteBenchmarkResults,
|
|
32898
33442
|
discoverAllProviders,
|
|
33443
|
+
discoveryEndpointFor,
|
|
32899
33444
|
emailFixture,
|
|
32900
33445
|
entityRecall,
|
|
32901
33446
|
exactMatch,
|
|
32902
33447
|
extractMarkdownSectionsByTitle,
|
|
32903
33448
|
f1Score,
|
|
32904
33449
|
fixtureToAblationScenarios,
|
|
33450
|
+
formatHandoffNote,
|
|
32905
33451
|
formatMissingDatasetError,
|
|
32906
33452
|
generateReport,
|
|
32907
33453
|
getBenchmark,
|
|
@@ -32935,6 +33481,7 @@ export {
|
|
|
32935
33481
|
loadBenchmarkResult,
|
|
32936
33482
|
loadCustomBenchmarkFile,
|
|
32937
33483
|
loadLoCoMo10,
|
|
33484
|
+
loadLocalLabManifest,
|
|
32938
33485
|
loadLongMemEvalS,
|
|
32939
33486
|
loadSealKeyFromEnv,
|
|
32940
33487
|
loadSealedQrels,
|
|
@@ -32946,9 +33493,11 @@ export {
|
|
|
32946
33493
|
pairedDeltaConfidenceInterval,
|
|
32947
33494
|
parseBenchmarkArtifact,
|
|
32948
33495
|
parseCustomBenchmark,
|
|
33496
|
+
parseLocalLabManifest,
|
|
32949
33497
|
parseRubricResponse,
|
|
32950
33498
|
parseSealedQrels,
|
|
32951
33499
|
precisionAtK,
|
|
33500
|
+
preflightLocalLabRole,
|
|
32952
33501
|
projectFolderFixture,
|
|
32953
33502
|
recallAtK,
|
|
32954
33503
|
redactBenchmarkResultSecrets,
|
|
@@ -32965,6 +33514,8 @@ export {
|
|
|
32965
33514
|
resolveBenchmarkProgressLogging,
|
|
32966
33515
|
resolveBenchmarkResultReference,
|
|
32967
33516
|
resolveBenchmarkRunCount,
|
|
33517
|
+
resolveLocalLabProfile,
|
|
33518
|
+
resolveLocalLabRole,
|
|
32968
33519
|
resolveStructuredJudge,
|
|
32969
33520
|
rotateDistractors,
|
|
32970
33521
|
rougeL,
|
|
@@ -32983,6 +33534,7 @@ export {
|
|
|
32983
33534
|
runProceduralAblation,
|
|
32984
33535
|
runProceduralAblationCli,
|
|
32985
33536
|
runSealedJudge,
|
|
33537
|
+
runSequentialPhases,
|
|
32986
33538
|
safeHexEqual,
|
|
32987
33539
|
saveBaseline,
|
|
32988
33540
|
saveBenchmarkBaseline,
|