@remnic/cli 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -0
- package/dist/assets/download-datasets.sh +182 -0
- package/dist/chunk-GAZ3DFWX.js +12027 -0
- package/dist/dist-7DCVQLUB.js +292 -0
- package/dist/index.js +1191 -20
- package/package.json +5 -3
- package/dist/chunk-U4MQO3IF.js +0 -1144
- package/dist/dist-B67STFFX.js +0 -48
package/dist/index.js
CHANGED
|
@@ -1,9 +1,24 @@
|
|
|
1
1
|
import {
|
|
2
|
+
buildBenchmarkPublishFeed,
|
|
2
3
|
checkRegression,
|
|
4
|
+
compareResults,
|
|
5
|
+
defaultBenchmarkBaselineDir,
|
|
6
|
+
defaultBenchmarkPublishPath,
|
|
7
|
+
deleteBenchmarkResults,
|
|
8
|
+
discoverAllProviders,
|
|
9
|
+
getBenchmarkLowerIsBetter,
|
|
10
|
+
listBenchmarkBaselines,
|
|
11
|
+
listBenchmarkResults,
|
|
3
12
|
loadBaseline,
|
|
13
|
+
loadBenchmarkBaseline,
|
|
14
|
+
loadBenchmarkResult,
|
|
15
|
+
renderBenchmarkResultExport,
|
|
16
|
+
resolveBenchmarkResultReference,
|
|
4
17
|
runBenchSuite,
|
|
5
|
-
runExplain
|
|
6
|
-
|
|
18
|
+
runExplain,
|
|
19
|
+
saveBenchmarkBaseline,
|
|
20
|
+
writeBenchmarkPublishFeed
|
|
21
|
+
} from "./chunk-GAZ3DFWX.js";
|
|
7
22
|
|
|
8
23
|
// src/index.ts
|
|
9
24
|
import fs from "fs";
|
|
@@ -87,6 +102,186 @@ import {
|
|
|
87
102
|
resolveExtensionsRoot,
|
|
88
103
|
coerceInstallExtension
|
|
89
104
|
} from "@remnic/core";
|
|
105
|
+
import {
|
|
106
|
+
convertMemoriesToRecords,
|
|
107
|
+
getTrainingExportAdapter as getTrainingExportAdapter2,
|
|
108
|
+
listTrainingExportAdapters,
|
|
109
|
+
parseStrictCliDate
|
|
110
|
+
} from "@remnic/core";
|
|
111
|
+
|
|
112
|
+
// ../export-weclone/dist/index.js
|
|
113
|
+
import {
|
|
114
|
+
getTrainingExportAdapter,
|
|
115
|
+
registerTrainingExportAdapter
|
|
116
|
+
} from "@remnic/core";
|
|
117
|
+
var wecloneExportAdapter = {
|
|
118
|
+
name: "weclone",
|
|
119
|
+
fileExtension: ".json",
|
|
120
|
+
formatRecords(records) {
|
|
121
|
+
const alpacaRecords = records.map((r) => ({
|
|
122
|
+
instruction: r.instruction,
|
|
123
|
+
input: r.input,
|
|
124
|
+
output: r.output
|
|
125
|
+
}));
|
|
126
|
+
return JSON.stringify(alpacaRecords, null, 2);
|
|
127
|
+
}
|
|
128
|
+
};
|
|
129
|
+
var DEFAULT_MAX_PAIRS = 1;
|
|
130
|
+
var QUESTION_TEMPLATES = {
|
|
131
|
+
preferences: [
|
|
132
|
+
"What kind of {topic} do you like?",
|
|
133
|
+
"What's your preference for {topic}?",
|
|
134
|
+
"What are your favorite {topic}?"
|
|
135
|
+
],
|
|
136
|
+
opinions: [
|
|
137
|
+
"What do you think about {topic}?",
|
|
138
|
+
"How do you feel about {topic}?",
|
|
139
|
+
"What's your opinion on {topic}?"
|
|
140
|
+
],
|
|
141
|
+
expertise: [
|
|
142
|
+
"Tell me about {topic}.",
|
|
143
|
+
"What do you know about {topic}?",
|
|
144
|
+
"Can you explain {topic}?"
|
|
145
|
+
],
|
|
146
|
+
personal: [
|
|
147
|
+
"Can you tell me about your {topic}?",
|
|
148
|
+
"Tell me about your {topic}.",
|
|
149
|
+
"What can you share about your {topic}?"
|
|
150
|
+
]
|
|
151
|
+
};
|
|
152
|
+
var DEFAULT_TEMPLATES = [
|
|
153
|
+
"Tell me about {topic}.",
|
|
154
|
+
"What can you share about {topic}?"
|
|
155
|
+
];
|
|
156
|
+
var CATEGORY_TO_TEMPLATE = {
|
|
157
|
+
preference: "preferences",
|
|
158
|
+
fact: "expertise",
|
|
159
|
+
entity: "expertise",
|
|
160
|
+
skill: "expertise",
|
|
161
|
+
correction: "opinions",
|
|
162
|
+
decision: "opinions",
|
|
163
|
+
principle: "opinions",
|
|
164
|
+
rule: "opinions",
|
|
165
|
+
personal: "personal",
|
|
166
|
+
relationship: "personal",
|
|
167
|
+
commitment: "personal",
|
|
168
|
+
moment: "personal"
|
|
169
|
+
};
|
|
170
|
+
function synthesizeTrainingPairs(records, options) {
|
|
171
|
+
const maxPairs = options?.maxPairsPerRecord ?? DEFAULT_MAX_PAIRS;
|
|
172
|
+
const style = options?.styleMarkers;
|
|
173
|
+
const result = [];
|
|
174
|
+
for (let i = 0; i < records.length; i++) {
|
|
175
|
+
const record = records[i];
|
|
176
|
+
const templateKey = resolveTemplateKey(record.category);
|
|
177
|
+
const topic = extractTopic(record.instruction);
|
|
178
|
+
const templates = QUESTION_TEMPLATES[templateKey] ?? DEFAULT_TEMPLATES;
|
|
179
|
+
const pairCount = Math.min(maxPairs, templates.length);
|
|
180
|
+
for (let j = 0; j < pairCount; j++) {
|
|
181
|
+
const templateIndex = (i + j) % templates.length;
|
|
182
|
+
const question = templates[templateIndex].replace("{topic}", topic);
|
|
183
|
+
let output = record.output;
|
|
184
|
+
if (style?.usesLowercase) {
|
|
185
|
+
output = output.toLowerCase();
|
|
186
|
+
}
|
|
187
|
+
result.push({
|
|
188
|
+
instruction: question,
|
|
189
|
+
input: "",
|
|
190
|
+
output,
|
|
191
|
+
category: record.category,
|
|
192
|
+
confidence: record.confidence,
|
|
193
|
+
sourceIds: record.sourceIds
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return result;
|
|
198
|
+
}
|
|
199
|
+
function resolveTemplateKey(category) {
|
|
200
|
+
if (!category) return "";
|
|
201
|
+
return CATEGORY_TO_TEMPLATE[category.toLowerCase()] ?? "";
|
|
202
|
+
}
|
|
203
|
+
function extractTopic(instruction) {
|
|
204
|
+
const tagMatch = instruction.match(/\(([^()]+)\)/);
|
|
205
|
+
if (tagMatch) {
|
|
206
|
+
return tagMatch[1].trim().toLowerCase();
|
|
207
|
+
}
|
|
208
|
+
return "this";
|
|
209
|
+
}
|
|
210
|
+
var PII_PATTERNS = [
|
|
211
|
+
{
|
|
212
|
+
// Email: user@domain.tld
|
|
213
|
+
name: "email",
|
|
214
|
+
regex: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
// SSN: 123-45-6789 (exactly 3-2-4 digit groups)
|
|
218
|
+
name: "ssn",
|
|
219
|
+
regex: /\b\d{3}-\d{2}-\d{4}\b/g
|
|
220
|
+
},
|
|
221
|
+
{
|
|
222
|
+
// Credit card: 4 groups of 4 digits separated by dashes or spaces
|
|
223
|
+
name: "credit_card",
|
|
224
|
+
regex: /\b\d{4}[-\s]\d{4}[-\s]\d{4}[-\s]\d{4}\b/g
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
// IP address: four octets 0-255
|
|
228
|
+
name: "ip_address",
|
|
229
|
+
regex: /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b/g
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
// Phone: optional +1- prefix, then 3-3-4 with dashes, dots, or spaces
|
|
233
|
+
// Also matches (555) 123-4567 format
|
|
234
|
+
name: "phone",
|
|
235
|
+
regex: /(?:\+\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]\d{3}[-.\s]\d{4}\b/g
|
|
236
|
+
}
|
|
237
|
+
];
|
|
238
|
+
var SCANNED_FIELDS = [
|
|
239
|
+
"instruction",
|
|
240
|
+
"input",
|
|
241
|
+
"output"
|
|
242
|
+
];
|
|
243
|
+
function sweepPii(records) {
|
|
244
|
+
const redactionDetails = [];
|
|
245
|
+
const recordHasRedaction = /* @__PURE__ */ new Set();
|
|
246
|
+
const cleanRecords = records.map((record, idx) => {
|
|
247
|
+
const cleaned = { ...record };
|
|
248
|
+
for (const field of SCANNED_FIELDS) {
|
|
249
|
+
let value = record[field];
|
|
250
|
+
if (!value) continue;
|
|
251
|
+
for (const pattern of PII_PATTERNS) {
|
|
252
|
+
pattern.regex.lastIndex = 0;
|
|
253
|
+
if (pattern.regex.test(value)) {
|
|
254
|
+
pattern.regex.lastIndex = 0;
|
|
255
|
+
value = value.replace(pattern.regex, "[REDACTED]");
|
|
256
|
+
recordHasRedaction.add(idx);
|
|
257
|
+
redactionDetails.push({
|
|
258
|
+
index: idx,
|
|
259
|
+
field,
|
|
260
|
+
pattern: pattern.name
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
cleaned[field] = value;
|
|
265
|
+
}
|
|
266
|
+
return cleaned;
|
|
267
|
+
});
|
|
268
|
+
return {
|
|
269
|
+
cleanRecords,
|
|
270
|
+
redactedCount: recordHasRedaction.size,
|
|
271
|
+
redactionDetails
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
function ensureWecloneExportAdapterRegistered() {
|
|
275
|
+
if (getTrainingExportAdapter(wecloneExportAdapter.name) !== void 0) {
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
registerTrainingExportAdapter(wecloneExportAdapter);
|
|
279
|
+
return true;
|
|
280
|
+
}
|
|
281
|
+
try {
|
|
282
|
+
ensureWecloneExportAdapterRegistered();
|
|
283
|
+
} catch {
|
|
284
|
+
}
|
|
90
285
|
|
|
91
286
|
// src/service-candidates.ts
|
|
92
287
|
function firstSuccessfulResult(candidates, attempt) {
|
|
@@ -143,7 +338,7 @@ function collectBenchmarks(argv) {
|
|
|
143
338
|
const benchmarks = [];
|
|
144
339
|
for (let index = 0; index < argv.length; index += 1) {
|
|
145
340
|
const arg = argv[index];
|
|
146
|
-
if (arg === "--dataset-dir") {
|
|
341
|
+
if (arg === "--dataset-dir" || arg === "--results-dir" || arg === "--baselines-dir" || arg === "--threshold" || arg === "--custom" || arg === "--format" || arg === "--output" || arg === "--target") {
|
|
147
342
|
index += 1;
|
|
148
343
|
continue;
|
|
149
344
|
}
|
|
@@ -155,7 +350,7 @@ function collectBenchmarks(argv) {
|
|
|
155
350
|
}
|
|
156
351
|
function parseBenchActionArgs(argv) {
|
|
157
352
|
const [first, ...rest] = argv;
|
|
158
|
-
const action = first === "list" || first === "run" || first === "check" || first === "report" ? first : first === void 0 || first === "--help" || first === "-h" ? "help" : "run";
|
|
353
|
+
const action = first === "list" || first === "run" || first === "datasets" || first === "runs" || first === "compare" || first === "ui" || first === "results" || first === "baseline" || first === "export" || first === "providers" || first === "publish" || first === "check" || first === "report" ? first : first === void 0 || first === "--help" || first === "-h" ? "help" : "run";
|
|
159
354
|
return {
|
|
160
355
|
action,
|
|
161
356
|
args: action === "run" && action !== first ? argv : rest
|
|
@@ -163,15 +358,72 @@ function parseBenchActionArgs(argv) {
|
|
|
163
358
|
}
|
|
164
359
|
function parseBenchArgs(argv) {
|
|
165
360
|
const { action, args } = parseBenchActionArgs(argv);
|
|
166
|
-
const
|
|
361
|
+
const baselineAction = action === "baseline" ? args[0] === "save" || args[0] === "list" ? args[0] : void 0 : void 0;
|
|
362
|
+
const datasetAction = action === "datasets" ? args[0] === "download" || args[0] === "status" ? args[0] : void 0 : void 0;
|
|
363
|
+
const providerAction = action === "providers" ? args[0] === "discover" ? args[0] : void 0 : void 0;
|
|
364
|
+
const runAction = action === "runs" ? args[0] === "list" || args[0] === "show" || args[0] === "delete" ? args[0] : void 0 : void 0;
|
|
365
|
+
if (action === "baseline" && baselineAction === void 0) {
|
|
366
|
+
throw new Error("ERROR: baseline requires a subcommand: save or list.");
|
|
367
|
+
}
|
|
368
|
+
if (action === "datasets" && datasetAction === void 0) {
|
|
369
|
+
throw new Error("ERROR: datasets requires a subcommand: download or status.");
|
|
370
|
+
}
|
|
371
|
+
if (action === "providers" && providerAction === void 0) {
|
|
372
|
+
throw new Error("ERROR: providers requires a subcommand: discover.");
|
|
373
|
+
}
|
|
374
|
+
if (action === "runs" && runAction === void 0) {
|
|
375
|
+
throw new Error("ERROR: runs requires a subcommand: list, show, or delete.");
|
|
376
|
+
}
|
|
377
|
+
const benchmarkArgs = action === "baseline" || action === "datasets" || action === "providers" || action === "runs" ? args.slice(1) : args;
|
|
378
|
+
const benchmarks = collectBenchmarks(benchmarkArgs);
|
|
167
379
|
const datasetDir = readBenchOptionValue(args, "--dataset-dir");
|
|
380
|
+
const resultsDir = readBenchOptionValue(args, "--results-dir");
|
|
381
|
+
const baselinesDir = readBenchOptionValue(args, "--baselines-dir");
|
|
382
|
+
const thresholdRaw = readBenchOptionValue(args, "--threshold");
|
|
383
|
+
const customRaw = readBenchOptionValue(args, "--custom");
|
|
384
|
+
const formatRaw = readBenchOptionValue(args, "--format");
|
|
385
|
+
const output = readBenchOptionValue(args, "--output");
|
|
386
|
+
const targetRaw = readBenchOptionValue(args, "--target");
|
|
387
|
+
let threshold;
|
|
388
|
+
if (thresholdRaw !== void 0) {
|
|
389
|
+
threshold = Number(thresholdRaw);
|
|
390
|
+
if (!Number.isFinite(threshold) || threshold < 0) {
|
|
391
|
+
throw new Error("ERROR: --threshold must be a non-negative number.");
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
let format;
|
|
395
|
+
if (formatRaw !== void 0) {
|
|
396
|
+
if (formatRaw !== "json" && formatRaw !== "csv" && formatRaw !== "html") {
|
|
397
|
+
throw new Error('ERROR: --format must be "json", "csv", or "html".');
|
|
398
|
+
}
|
|
399
|
+
format = formatRaw;
|
|
400
|
+
}
|
|
401
|
+
let target;
|
|
402
|
+
if (targetRaw !== void 0) {
|
|
403
|
+
if (targetRaw !== "remnic-ai") {
|
|
404
|
+
throw new Error('ERROR: --target must be "remnic-ai".');
|
|
405
|
+
}
|
|
406
|
+
target = targetRaw;
|
|
407
|
+
}
|
|
168
408
|
return {
|
|
169
409
|
action,
|
|
170
410
|
benchmarks,
|
|
171
411
|
quick: args.includes("--quick"),
|
|
172
412
|
all: args.includes("--all"),
|
|
173
413
|
json: args.includes("--json"),
|
|
174
|
-
|
|
414
|
+
detail: args.includes("--detail"),
|
|
415
|
+
datasetDir: datasetDir ? path.resolve(expandTilde(datasetDir)) : void 0,
|
|
416
|
+
resultsDir: resultsDir ? path.resolve(expandTilde(resultsDir)) : void 0,
|
|
417
|
+
baselinesDir: baselinesDir ? path.resolve(expandTilde(baselinesDir)) : void 0,
|
|
418
|
+
threshold,
|
|
419
|
+
custom: customRaw ? path.resolve(expandTilde(customRaw)) : void 0,
|
|
420
|
+
baselineAction,
|
|
421
|
+
datasetAction,
|
|
422
|
+
providerAction,
|
|
423
|
+
runAction,
|
|
424
|
+
format,
|
|
425
|
+
output: output ? path.resolve(expandTilde(output)) : void 0,
|
|
426
|
+
target
|
|
175
427
|
};
|
|
176
428
|
}
|
|
177
429
|
|
|
@@ -296,12 +548,29 @@ var BENCHMARK_CATALOG = [
|
|
|
296
548
|
];
|
|
297
549
|
var BENCHMARK_IDS = new Set(BENCHMARK_CATALOG.map((entry) => entry.id));
|
|
298
550
|
function getBenchUsageText() {
|
|
299
|
-
return `Usage: remnic bench <list|run> [options] [benchmark...]
|
|
300
|
-
remnic benchmark <list|run|check|report> [options] [benchmark...]
|
|
551
|
+
return `Usage: remnic bench <list|run|datasets|runs|compare|results|baseline|export|publish|ui|providers> [options] [benchmark...]
|
|
552
|
+
remnic benchmark <list|run|datasets|runs|compare|results|baseline|export|publish|ui|providers|check|report> [options] [benchmark...]
|
|
301
553
|
|
|
302
554
|
Commands:
|
|
303
555
|
list List published benchmark packs
|
|
304
556
|
run [benchmark...] Run one or more benchmark packs
|
|
557
|
+
datasets download [benchmark...]
|
|
558
|
+
Download local datasets for supported published benchmarks
|
|
559
|
+
datasets status Show local dataset availability for supported benchmarks
|
|
560
|
+
runs list List stored benchmark runs
|
|
561
|
+
runs show <run> Show one stored benchmark run
|
|
562
|
+
runs delete <run...> Delete one or more stored benchmark runs
|
|
563
|
+
compare <base> <cand> Compare two stored benchmark runs by id or file path
|
|
564
|
+
results [run] List stored runs or inspect a stored run
|
|
565
|
+
baseline save <name> [run]
|
|
566
|
+
Save a stored run as a named baseline
|
|
567
|
+
baseline list List saved baselines
|
|
568
|
+
export <run> --format <json|csv|html>
|
|
569
|
+
Export one stored run as JSON, aggregate-metrics CSV, or static HTML
|
|
570
|
+
publish --target remnic-ai
|
|
571
|
+
Generate the Remnic.ai benchmark feed from stored runs
|
|
572
|
+
ui Launch the local benchmark overview UI
|
|
573
|
+
providers discover Auto-detect available local provider backends
|
|
305
574
|
check Legacy latency regression gate (compatibility)
|
|
306
575
|
report Legacy latency report generator (compatibility)
|
|
307
576
|
|
|
@@ -309,12 +578,36 @@ Options:
|
|
|
309
578
|
--quick Run a lightweight quick pass (maps to --lightweight --limit 1)
|
|
310
579
|
--all Run every published benchmark
|
|
311
580
|
--dataset-dir <path> Override the benchmark dataset directory for full runs
|
|
581
|
+
--custom <path> Run a YAML-defined custom benchmark file
|
|
582
|
+
--results-dir <path> Override the stored benchmark results directory
|
|
583
|
+
--baselines-dir <path> Override the named baseline directory
|
|
584
|
+
--threshold <value> Regression threshold for compare (default: 0.05)
|
|
585
|
+
--detail Include per-task details for bench results
|
|
586
|
+
--format <json|csv|html> Output format for bench export
|
|
587
|
+
--output <path> Write bench export output to a file
|
|
588
|
+
--target <name> Publish target for bench publish (remnic-ai)
|
|
312
589
|
--json Output JSON for \`list\`
|
|
313
590
|
|
|
314
591
|
Examples:
|
|
315
592
|
remnic bench list
|
|
593
|
+
remnic bench datasets status
|
|
594
|
+
remnic bench datasets download longmemeval
|
|
595
|
+
remnic bench datasets download --all
|
|
596
|
+
remnic bench runs list
|
|
597
|
+
remnic bench runs show candidate-run --detail
|
|
598
|
+
remnic bench runs delete candidate-run
|
|
316
599
|
remnic bench run --quick longmemeval
|
|
317
600
|
remnic bench run longmemeval --dataset-dir ~/datasets/longmemeval
|
|
601
|
+
remnic bench compare base-run candidate-run
|
|
602
|
+
remnic bench results
|
|
603
|
+
remnic bench results candidate-run --detail
|
|
604
|
+
remnic bench baseline save main candidate-run
|
|
605
|
+
remnic bench baseline list
|
|
606
|
+
remnic bench export candidate-run --format csv --output ./candidate.csv
|
|
607
|
+
remnic bench export candidate-run --format html --output ./report.html
|
|
608
|
+
remnic bench publish --target remnic-ai
|
|
609
|
+
remnic bench providers discover
|
|
610
|
+
remnic bench run --custom ./my-bench.yaml
|
|
318
611
|
remnic benchmark run --quick longmemeval`;
|
|
319
612
|
}
|
|
320
613
|
function buildBenchRunnerArgs(parsed, benchmarkId) {
|
|
@@ -347,7 +640,7 @@ async function listBenchmarksFromPackage() {
|
|
|
347
640
|
}
|
|
348
641
|
async function loadBenchDefinitionsFromPackage() {
|
|
349
642
|
try {
|
|
350
|
-
const benchModule = await import("./dist-
|
|
643
|
+
const benchModule = await import("./dist-7DCVQLUB.js");
|
|
351
644
|
if (!benchModule.listBenchmarks) return void 0;
|
|
352
645
|
const result = benchModule.listBenchmarks();
|
|
353
646
|
return Array.isArray(result) ? result : void 0;
|
|
@@ -395,6 +688,154 @@ async function runBenchViaFallback(parsed, benchmarkId) {
|
|
|
395
688
|
function resolveBenchOutputDir() {
|
|
396
689
|
return path2.join(resolveHomeDir(), ".remnic", "bench", "results");
|
|
397
690
|
}
|
|
691
|
+
var DOWNLOADABLE_BENCHMARK_DATASETS = [
|
|
692
|
+
"ama-bench",
|
|
693
|
+
"memory-arena",
|
|
694
|
+
"amemgym",
|
|
695
|
+
"longmemeval",
|
|
696
|
+
"locomo"
|
|
697
|
+
];
|
|
698
|
+
var DOWNLOADED_DATASET_MARKERS = {
|
|
699
|
+
"ama-bench": { anyOf: ["open_end_qa_set.jsonl"] },
|
|
700
|
+
longmemeval: {
|
|
701
|
+
anyOf: ["longmemeval_oracle.json", "longmemeval_s_cleaned.json", "longmemeval.json"]
|
|
702
|
+
},
|
|
703
|
+
amemgym: {
|
|
704
|
+
anyOf: ["amemgym-v1-base.json", "amemgym-tasks.json", "data.json"]
|
|
705
|
+
},
|
|
706
|
+
locomo: { anyOf: ["locomo10.json", "locomo.json"] },
|
|
707
|
+
"memory-arena": { ext: ".jsonl" }
|
|
708
|
+
};
|
|
709
|
+
function isDatasetDownloaded(datasetPath, benchmarkId) {
|
|
710
|
+
let stats;
|
|
711
|
+
try {
|
|
712
|
+
stats = fs.statSync(datasetPath);
|
|
713
|
+
} catch {
|
|
714
|
+
return false;
|
|
715
|
+
}
|
|
716
|
+
if (!stats.isDirectory()) {
|
|
717
|
+
return false;
|
|
718
|
+
}
|
|
719
|
+
const marker = DOWNLOADED_DATASET_MARKERS[benchmarkId];
|
|
720
|
+
if (!marker) {
|
|
721
|
+
try {
|
|
722
|
+
return fs.readdirSync(datasetPath).length > 0;
|
|
723
|
+
} catch {
|
|
724
|
+
return false;
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
if (marker.anyOf) {
|
|
728
|
+
return marker.anyOf.some((name) => {
|
|
729
|
+
try {
|
|
730
|
+
return fs.statSync(path2.join(datasetPath, name)).isFile();
|
|
731
|
+
} catch {
|
|
732
|
+
return false;
|
|
733
|
+
}
|
|
734
|
+
});
|
|
735
|
+
}
|
|
736
|
+
if (marker.ext) {
|
|
737
|
+
try {
|
|
738
|
+
return fs.readdirSync(datasetPath).some((name) => name.endsWith(marker.ext));
|
|
739
|
+
} catch {
|
|
740
|
+
return false;
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
return false;
|
|
744
|
+
}
|
|
745
|
+
async function launchBenchUi(resultsDir) {
|
|
746
|
+
const benchUiDir = path2.join(CLI_REPO_ROOT, "packages", "bench-ui");
|
|
747
|
+
const pnpmCmd = process.platform === "win32" ? "pnpm.cmd" : "pnpm";
|
|
748
|
+
if (!fs.existsSync(path2.join(benchUiDir, "package.json"))) {
|
|
749
|
+
console.error("ERROR: @remnic/bench-ui is not available in this checkout.");
|
|
750
|
+
process.exit(1);
|
|
751
|
+
}
|
|
752
|
+
console.log(`Launching bench UI with results from ${resultsDir}`);
|
|
753
|
+
console.log("Press Ctrl+C to stop the local server.");
|
|
754
|
+
const child = childProcess.spawn(pnpmCmd, ["exec", "vite", "--host", "127.0.0.1"], {
|
|
755
|
+
cwd: benchUiDir,
|
|
756
|
+
stdio: "inherit",
|
|
757
|
+
shell: process.platform === "win32",
|
|
758
|
+
env: {
|
|
759
|
+
...process.env,
|
|
760
|
+
REMNIC_BENCH_RESULTS_DIR: resultsDir
|
|
761
|
+
}
|
|
762
|
+
});
|
|
763
|
+
await new Promise((resolve, reject) => {
|
|
764
|
+
child.on("error", reject);
|
|
765
|
+
child.on("close", (code, signal) => {
|
|
766
|
+
if (code === 0 || signal === "SIGINT" || signal === "SIGTERM") {
|
|
767
|
+
resolve();
|
|
768
|
+
return;
|
|
769
|
+
}
|
|
770
|
+
reject(new Error(`bench UI exited with code ${code ?? "unknown"}`));
|
|
771
|
+
});
|
|
772
|
+
});
|
|
773
|
+
}
|
|
774
|
+
function resolveBenchBaselineDir() {
|
|
775
|
+
return defaultBenchmarkBaselineDir();
|
|
776
|
+
}
|
|
777
|
+
function resolveRepoDatasetRoot() {
|
|
778
|
+
const repoCandidate = path2.join(CLI_REPO_ROOT, "evals", "datasets");
|
|
779
|
+
if (isRepoCheckout()) {
|
|
780
|
+
return repoCandidate;
|
|
781
|
+
}
|
|
782
|
+
return path2.join(resolveHomeDir(), ".remnic", "bench", "datasets");
|
|
783
|
+
}
|
|
784
|
+
function listDownloadableBenchmarks() {
|
|
785
|
+
return [...DOWNLOADABLE_BENCHMARK_DATASETS];
|
|
786
|
+
}
|
|
787
|
+
function resolveDatasetDownloadScriptPath() {
|
|
788
|
+
const bundled = path2.join(CLI_MODULE_DIR, "assets", "download-datasets.sh");
|
|
789
|
+
if (fs.existsSync(bundled)) {
|
|
790
|
+
return bundled;
|
|
791
|
+
}
|
|
792
|
+
return path2.join(CLI_REPO_ROOT, "evals", "scripts", "download-datasets.sh");
|
|
793
|
+
}
|
|
794
|
+
function isRepoCheckout() {
|
|
795
|
+
return fs.existsSync(path2.join(CLI_REPO_ROOT, "pnpm-workspace.yaml")) && fs.existsSync(path2.join(CLI_REPO_ROOT, "evals", "scripts", "download-datasets.sh"));
|
|
796
|
+
}
|
|
797
|
+
function runDatasetDownloadScript(scriptPath, benchmarkId, datasetRoot, jsonMode) {
|
|
798
|
+
const stdio = jsonMode ? ["inherit", process.stderr, "inherit"] : "inherit";
|
|
799
|
+
const env = { ...process.env, DATASETS_DIR: datasetRoot };
|
|
800
|
+
const options = {
|
|
801
|
+
cwd: CLI_REPO_ROOT,
|
|
802
|
+
stdio,
|
|
803
|
+
env
|
|
804
|
+
};
|
|
805
|
+
const args = ["--benchmark", benchmarkId];
|
|
806
|
+
if (process.platform !== "win32") {
|
|
807
|
+
childProcess.execFileSync(scriptPath, args, options);
|
|
808
|
+
return;
|
|
809
|
+
}
|
|
810
|
+
const bashProbe = childProcess.spawnSync("bash", ["--version"], { stdio: "ignore" });
|
|
811
|
+
if (bashProbe.error || bashProbe.status !== 0) {
|
|
812
|
+
throw new Error(
|
|
813
|
+
"bench datasets download requires bash on Windows (Git Bash or WSL). Install bash or run this command from a Unix shell."
|
|
814
|
+
);
|
|
815
|
+
}
|
|
816
|
+
childProcess.execFileSync("bash", [scriptPath, ...args], options);
|
|
817
|
+
}
|
|
818
|
+
function resolveSelectedDatasetDownloads(parsed) {
|
|
819
|
+
const supported = listDownloadableBenchmarks();
|
|
820
|
+
if (parsed.all) {
|
|
821
|
+
return supported;
|
|
822
|
+
}
|
|
823
|
+
if (parsed.benchmarks.length === 0) {
|
|
824
|
+
console.error(
|
|
825
|
+
"ERROR: datasets download requires at least one benchmark id or --all. Usage: remnic bench datasets download <benchmark...> [--all] [--json]"
|
|
826
|
+
);
|
|
827
|
+
process.exit(1);
|
|
828
|
+
}
|
|
829
|
+
const selected = [...new Set(parsed.benchmarks)];
|
|
830
|
+
const unsupported = selected.filter((benchmarkId) => !supported.includes(benchmarkId));
|
|
831
|
+
if (unsupported.length > 0) {
|
|
832
|
+
console.error(
|
|
833
|
+
`ERROR: unsupported downloadable benchmark dataset(s): ${unsupported.join(", ")}. Supported datasets: ${supported.join(", ")}.`
|
|
834
|
+
);
|
|
835
|
+
process.exit(1);
|
|
836
|
+
}
|
|
837
|
+
return selected;
|
|
838
|
+
}
|
|
398
839
|
function resolveBenchDatasetDir(benchmarkId, quick, datasetDirOverride) {
|
|
399
840
|
if (datasetDirOverride) {
|
|
400
841
|
return datasetDirOverride;
|
|
@@ -402,14 +843,13 @@ function resolveBenchDatasetDir(benchmarkId, quick, datasetDirOverride) {
|
|
|
402
843
|
if (quick) {
|
|
403
844
|
return void 0;
|
|
404
845
|
}
|
|
405
|
-
const
|
|
406
|
-
|
|
407
|
-
return
|
|
408
|
-
} catch {
|
|
409
|
-
return void 0;
|
|
846
|
+
const datasetDir = path2.join(resolveRepoDatasetRoot(), benchmarkId);
|
|
847
|
+
if (isDatasetDownloaded(datasetDir, benchmarkId)) {
|
|
848
|
+
return datasetDir;
|
|
410
849
|
}
|
|
850
|
+
return void 0;
|
|
411
851
|
}
|
|
412
|
-
function printBenchPackageSummary(result, outputPath) {
|
|
852
|
+
function printBenchPackageSummary(result, outputPath, outputLabel = "Results saved") {
|
|
413
853
|
console.log(`Benchmark: ${result.meta.benchmark}`);
|
|
414
854
|
console.log(`Mode: ${result.meta.mode}`);
|
|
415
855
|
console.log(`Tasks: ${result.results.tasks.length}`);
|
|
@@ -417,12 +857,426 @@ function printBenchPackageSummary(result, outputPath) {
|
|
|
417
857
|
for (const [metric, aggregate] of Object.entries(result.results.aggregates).sort()) {
|
|
418
858
|
console.log(` ${metric.padEnd(20)} ${aggregate.mean.toFixed(4)}`);
|
|
419
859
|
}
|
|
420
|
-
console.log(
|
|
860
|
+
console.log(`${outputLabel}: ${outputPath}`);
|
|
861
|
+
}
|
|
862
|
+
function printStoredBenchResultSummary(result, summary) {
|
|
863
|
+
printBenchPackageSummary(result, summary.path, "Stored result");
|
|
864
|
+
console.log(`Run id: ${summary.id}`);
|
|
865
|
+
}
|
|
866
|
+
function printStoredBenchResultDetails(result, summary) {
|
|
867
|
+
printStoredBenchResultSummary(result, summary);
|
|
868
|
+
if (result.results.tasks.length === 0) {
|
|
869
|
+
console.log("Tasks: none");
|
|
870
|
+
return;
|
|
871
|
+
}
|
|
872
|
+
console.log("Task breakdown:");
|
|
873
|
+
for (const task of result.results.tasks) {
|
|
874
|
+
const scores = Object.entries(task.scores).sort(([left], [right]) => left.localeCompare(right)).map(([metric, value]) => `${metric}=${value.toFixed(4)}`).join(", ");
|
|
875
|
+
console.log(
|
|
876
|
+
` ${task.taskId}: ${task.latencyMs.toFixed(1)}ms${scores.length > 0 ? ` [${scores}]` : ""}`
|
|
877
|
+
);
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
function printBenchComparisonSummary(comparison, baseline, candidate) {
|
|
881
|
+
console.log(`Benchmark: ${comparison.benchmark}`);
|
|
882
|
+
console.log(`Baseline: ${baseline.id} (${baseline.path})`);
|
|
883
|
+
console.log(`Candidate: ${candidate.id} (${candidate.path})`);
|
|
884
|
+
console.log(`Verdict: ${comparison.verdict}`);
|
|
885
|
+
const metrics = Object.entries(comparison.metricDeltas).sort(
|
|
886
|
+
([left], [right]) => left.localeCompare(right)
|
|
887
|
+
);
|
|
888
|
+
if (metrics.length === 0) {
|
|
889
|
+
console.log("No overlapping metrics were found between the two results.");
|
|
890
|
+
return;
|
|
891
|
+
}
|
|
892
|
+
console.log("Metrics:");
|
|
893
|
+
for (const [metric, delta] of metrics) {
|
|
894
|
+
const percent = Number.isFinite(delta.percentChange) ? `${(delta.percentChange * 100).toFixed(2)}%` : delta.percentChange > 0 ? "+Infinity%" : "-Infinity%";
|
|
895
|
+
const direction = delta.delta >= 0 ? "+" : "";
|
|
896
|
+
console.log(
|
|
897
|
+
` ${metric.padEnd(18)} ${delta.baseline.toFixed(4)} -> ${delta.candidate.toFixed(4)} (${direction}${delta.delta.toFixed(4)}, ${percent}, d=${delta.effectSize.cohensD.toFixed(3)} ${delta.effectSize.interpretation})`
|
|
898
|
+
);
|
|
899
|
+
if (delta.ciOnDelta) {
|
|
900
|
+
console.log(
|
|
901
|
+
` CI95 delta: [${delta.ciOnDelta.lower.toFixed(4)}, ${delta.ciOnDelta.upper.toFixed(4)}]`
|
|
902
|
+
);
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
async function compareBenchPackageResults(parsed) {
|
|
907
|
+
const refs = parsed.benchmarks;
|
|
908
|
+
if (refs.length !== 2) {
|
|
909
|
+
console.error(
|
|
910
|
+
"ERROR: compare requires exactly two stored result references. Usage: remnic bench compare <baseline> <candidate> [--results-dir <path>] [--threshold <value>] [--json]"
|
|
911
|
+
);
|
|
912
|
+
process.exit(1);
|
|
913
|
+
}
|
|
914
|
+
const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
|
|
915
|
+
const [baselineRef, candidateRef] = refs;
|
|
916
|
+
const baselineSummary = await resolveBenchmarkResultReference(resultsDir, baselineRef);
|
|
917
|
+
const candidateSummary = await resolveBenchmarkResultReference(resultsDir, candidateRef);
|
|
918
|
+
if (!baselineSummary) {
|
|
919
|
+
console.error(`ERROR: benchmark result not found: ${baselineRef}`);
|
|
920
|
+
process.exit(1);
|
|
921
|
+
}
|
|
922
|
+
if (!candidateSummary) {
|
|
923
|
+
console.error(`ERROR: benchmark result not found: ${candidateRef}`);
|
|
924
|
+
process.exit(1);
|
|
925
|
+
}
|
|
926
|
+
const baseline = await loadBenchmarkResult(baselineSummary.path);
|
|
927
|
+
const candidate = await loadBenchmarkResult(candidateSummary.path);
|
|
928
|
+
if (baseline.meta.benchmark !== candidate.meta.benchmark) {
|
|
929
|
+
console.error(
|
|
930
|
+
`ERROR: benchmark mismatch: ${baseline.meta.benchmark} vs ${candidate.meta.benchmark}. Compare runs from the same benchmark.`
|
|
931
|
+
);
|
|
932
|
+
process.exit(1);
|
|
933
|
+
}
|
|
934
|
+
const comparison = compareResults(
|
|
935
|
+
baseline,
|
|
936
|
+
candidate,
|
|
937
|
+
parsed.threshold ?? 0.05,
|
|
938
|
+
getBenchmarkLowerIsBetter(candidate.meta.benchmark)
|
|
939
|
+
);
|
|
940
|
+
if (parsed.json) {
|
|
941
|
+
console.log(JSON.stringify({
|
|
942
|
+
benchmark: comparison.benchmark,
|
|
943
|
+
baseline: baselineSummary,
|
|
944
|
+
candidate: candidateSummary,
|
|
945
|
+
comparison
|
|
946
|
+
}, null, 2));
|
|
947
|
+
} else {
|
|
948
|
+
printBenchComparisonSummary(comparison, baselineSummary, candidateSummary);
|
|
949
|
+
}
|
|
950
|
+
if (comparison.verdict === "regression") {
|
|
951
|
+
process.exit(1);
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
async function showBenchPackageResults(parsed) {
|
|
955
|
+
const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
|
|
956
|
+
if (parsed.benchmarks.length === 0) {
|
|
957
|
+
const summaries = await listBenchmarkResults(resultsDir);
|
|
958
|
+
if (parsed.json) {
|
|
959
|
+
console.log(JSON.stringify(summaries, null, 2));
|
|
960
|
+
return;
|
|
961
|
+
}
|
|
962
|
+
if (summaries.length === 0) {
|
|
963
|
+
console.log(`No stored benchmark runs found in ${resultsDir}`);
|
|
964
|
+
return;
|
|
965
|
+
}
|
|
966
|
+
console.log("Stored benchmark runs:");
|
|
967
|
+
for (const summary2 of summaries) {
|
|
968
|
+
console.log(
|
|
969
|
+
` ${summary2.id.padEnd(24)} ${summary2.benchmark.padEnd(16)} ${summary2.mode.padEnd(5)} ${summary2.timestamp}`
|
|
970
|
+
);
|
|
971
|
+
}
|
|
972
|
+
return;
|
|
973
|
+
}
|
|
974
|
+
if (parsed.benchmarks.length !== 1) {
|
|
975
|
+
console.error(
|
|
976
|
+
"ERROR: results accepts at most one stored result reference. Usage: remnic bench results [run] [--detail] [--results-dir <path>] [--json]"
|
|
977
|
+
);
|
|
978
|
+
process.exit(1);
|
|
979
|
+
}
|
|
980
|
+
const reference = parsed.benchmarks[0];
|
|
981
|
+
const summary = await resolveBenchmarkResultReference(resultsDir, reference);
|
|
982
|
+
if (!summary) {
|
|
983
|
+
console.error(`ERROR: benchmark result not found: ${reference}`);
|
|
984
|
+
process.exit(1);
|
|
985
|
+
}
|
|
986
|
+
const result = await loadBenchmarkResult(summary.path);
|
|
987
|
+
if (parsed.json) {
|
|
988
|
+
console.log(JSON.stringify(result, null, 2));
|
|
989
|
+
return;
|
|
990
|
+
}
|
|
991
|
+
if (parsed.detail) {
|
|
992
|
+
printStoredBenchResultDetails(result, summary);
|
|
993
|
+
} else {
|
|
994
|
+
printStoredBenchResultSummary(result, summary);
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
async function manageBenchBaselines(parsed) {
|
|
998
|
+
const baselineDir = parsed.baselinesDir ?? resolveBenchBaselineDir();
|
|
999
|
+
if (parsed.baselineAction === "list") {
|
|
1000
|
+
const baselines = await listBenchmarkBaselines(baselineDir);
|
|
1001
|
+
if (parsed.json) {
|
|
1002
|
+
console.log(JSON.stringify(baselines, null, 2));
|
|
1003
|
+
return;
|
|
1004
|
+
}
|
|
1005
|
+
if (baselines.length === 0) {
|
|
1006
|
+
console.log(`No saved baselines found in ${baselineDir}`);
|
|
1007
|
+
return;
|
|
1008
|
+
}
|
|
1009
|
+
console.log("Saved baselines:");
|
|
1010
|
+
for (const baseline of baselines) {
|
|
1011
|
+
console.log(
|
|
1012
|
+
` ${baseline.name.padEnd(20)} ${baseline.benchmark.padEnd(16)} ${baseline.mode.padEnd(5)} ${baseline.timestamp}`
|
|
1013
|
+
);
|
|
1014
|
+
}
|
|
1015
|
+
return;
|
|
1016
|
+
}
|
|
1017
|
+
if (parsed.baselineAction !== "save") {
|
|
1018
|
+
console.error("ERROR: baseline requires a subcommand: save or list.");
|
|
1019
|
+
process.exit(1);
|
|
1020
|
+
}
|
|
1021
|
+
if (parsed.benchmarks.length < 1 || parsed.benchmarks.length > 2) {
|
|
1022
|
+
console.error(
|
|
1023
|
+
"ERROR: baseline save requires a name and optionally one stored result reference. Usage: remnic bench baseline save <name> [run] [--results-dir <path>] [--baselines-dir <path>] [--json]"
|
|
1024
|
+
);
|
|
1025
|
+
process.exit(1);
|
|
1026
|
+
}
|
|
1027
|
+
const [name, explicitReference] = parsed.benchmarks;
|
|
1028
|
+
const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
|
|
1029
|
+
const sourceSummary = explicitReference ? await resolveBenchmarkResultReference(resultsDir, explicitReference) : (await listBenchmarkResults(resultsDir))[0];
|
|
1030
|
+
if (!sourceSummary) {
|
|
1031
|
+
console.error(
|
|
1032
|
+
explicitReference ? `ERROR: benchmark result not found: ${explicitReference}` : `ERROR: no stored benchmark runs found in ${resultsDir}`
|
|
1033
|
+
);
|
|
1034
|
+
process.exit(1);
|
|
1035
|
+
}
|
|
1036
|
+
const result = await loadBenchmarkResult(sourceSummary.path);
|
|
1037
|
+
let writtenPath;
|
|
1038
|
+
try {
|
|
1039
|
+
writtenPath = await saveBenchmarkBaseline(
|
|
1040
|
+
baselineDir,
|
|
1041
|
+
name,
|
|
1042
|
+
result,
|
|
1043
|
+
{ id: sourceSummary.id, path: sourceSummary.path }
|
|
1044
|
+
);
|
|
1045
|
+
} catch (error) {
|
|
1046
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
1047
|
+
process.exit(1);
|
|
1048
|
+
}
|
|
1049
|
+
if (parsed.json) {
|
|
1050
|
+
const baseline = await loadBenchmarkBaseline(writtenPath);
|
|
1051
|
+
console.log(JSON.stringify({
|
|
1052
|
+
name: baseline.name,
|
|
1053
|
+
path: writtenPath,
|
|
1054
|
+
source: baseline.source,
|
|
1055
|
+
benchmark: baseline.result.meta.benchmark,
|
|
1056
|
+
timestamp: baseline.savedAt
|
|
1057
|
+
}, null, 2));
|
|
1058
|
+
return;
|
|
1059
|
+
}
|
|
1060
|
+
console.log(`Saved baseline "${name}" to ${writtenPath}`);
|
|
1061
|
+
console.log(` Source run: ${sourceSummary.id}`);
|
|
1062
|
+
console.log(` Benchmark: ${result.meta.benchmark}`);
|
|
1063
|
+
}
|
|
1064
|
+
async function exportBenchPackageResult(parsed) {
|
|
1065
|
+
if (parsed.benchmarks.length !== 1) {
|
|
1066
|
+
console.error(
|
|
1067
|
+
"ERROR: export requires exactly one stored result reference. Usage: remnic bench export <run> --format <json|csv|html> [--output <path>] [--results-dir <path>]"
|
|
1068
|
+
);
|
|
1069
|
+
process.exit(1);
|
|
1070
|
+
}
|
|
1071
|
+
if (!parsed.format) {
|
|
1072
|
+
console.error("ERROR: export requires --format json, csv, or html.");
|
|
1073
|
+
process.exit(1);
|
|
1074
|
+
}
|
|
1075
|
+
const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
|
|
1076
|
+
const reference = parsed.benchmarks[0];
|
|
1077
|
+
const summary = await resolveBenchmarkResultReference(resultsDir, reference);
|
|
1078
|
+
if (!summary) {
|
|
1079
|
+
console.error(`ERROR: benchmark result not found: ${reference}`);
|
|
1080
|
+
process.exit(1);
|
|
1081
|
+
}
|
|
1082
|
+
const result = await loadBenchmarkResult(summary.path);
|
|
1083
|
+
const rendered = renderBenchmarkResultExport(result, parsed.format);
|
|
1084
|
+
if (parsed.output) {
|
|
1085
|
+
fs.mkdirSync(path2.dirname(parsed.output), { recursive: true });
|
|
1086
|
+
fs.writeFileSync(parsed.output, rendered);
|
|
1087
|
+
console.log(`Exported ${summary.id} as ${parsed.format} to ${parsed.output}`);
|
|
1088
|
+
return;
|
|
1089
|
+
}
|
|
1090
|
+
process.stdout.write(rendered);
|
|
1091
|
+
}
|
|
1092
|
+
async function manageBenchDatasets(parsed) {
|
|
1093
|
+
const datasetRoot = resolveRepoDatasetRoot();
|
|
1094
|
+
const supported = listDownloadableBenchmarks();
|
|
1095
|
+
if (parsed.datasetAction === "status") {
|
|
1096
|
+
if (parsed.benchmarks.length > 0 || parsed.all) {
|
|
1097
|
+
console.error(
|
|
1098
|
+
"ERROR: datasets status does not accept benchmark names or --all. Usage: remnic bench datasets status [--json]"
|
|
1099
|
+
);
|
|
1100
|
+
process.exit(1);
|
|
1101
|
+
}
|
|
1102
|
+
const status = supported.map((benchmarkId) => {
|
|
1103
|
+
const datasetPath = path2.join(datasetRoot, benchmarkId);
|
|
1104
|
+
return {
|
|
1105
|
+
benchmark: benchmarkId,
|
|
1106
|
+
downloaded: isDatasetDownloaded(datasetPath, benchmarkId),
|
|
1107
|
+
path: datasetPath
|
|
1108
|
+
};
|
|
1109
|
+
});
|
|
1110
|
+
if (parsed.json) {
|
|
1111
|
+
console.log(JSON.stringify(status, null, 2));
|
|
1112
|
+
return;
|
|
1113
|
+
}
|
|
1114
|
+
console.log("Downloadable benchmark datasets:");
|
|
1115
|
+
for (const entry of status) {
|
|
1116
|
+
console.log(
|
|
1117
|
+
` ${entry.benchmark.padEnd(16)} ${entry.downloaded ? "downloaded" : "missing"} ${entry.path}`
|
|
1118
|
+
);
|
|
1119
|
+
}
|
|
1120
|
+
console.log("");
|
|
1121
|
+
console.log(
|
|
1122
|
+
"Only the script-backed published datasets are managed here. Other benchmark fixtures remain repo-managed or manual."
|
|
1123
|
+
);
|
|
1124
|
+
return;
|
|
1125
|
+
}
|
|
1126
|
+
if (parsed.datasetAction !== "download") {
|
|
1127
|
+
console.error("ERROR: datasets requires a subcommand: download or status.");
|
|
1128
|
+
process.exit(1);
|
|
1129
|
+
}
|
|
1130
|
+
const scriptPath = resolveDatasetDownloadScriptPath();
|
|
1131
|
+
if (!fs.existsSync(scriptPath)) {
|
|
1132
|
+
console.error(`ERROR: dataset download script not found: ${scriptPath}`);
|
|
1133
|
+
process.exit(1);
|
|
1134
|
+
}
|
|
1135
|
+
const selected = resolveSelectedDatasetDownloads(parsed);
|
|
1136
|
+
const downloaded = [];
|
|
1137
|
+
for (const benchmarkId of selected) {
|
|
1138
|
+
runDatasetDownloadScript(scriptPath, benchmarkId, datasetRoot, parsed.json === true);
|
|
1139
|
+
downloaded.push({
|
|
1140
|
+
benchmark: benchmarkId,
|
|
1141
|
+
path: path2.join(datasetRoot, benchmarkId)
|
|
1142
|
+
});
|
|
1143
|
+
}
|
|
1144
|
+
if (parsed.json) {
|
|
1145
|
+
console.log(JSON.stringify(downloaded, null, 2));
|
|
1146
|
+
return;
|
|
1147
|
+
}
|
|
1148
|
+
console.log("Downloaded benchmark datasets:");
|
|
1149
|
+
for (const entry of downloaded) {
|
|
1150
|
+
console.log(` ${entry.benchmark} ${entry.path}`);
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
async function manageBenchRuns(parsed) {
|
|
1154
|
+
const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
|
|
1155
|
+
if (parsed.runAction === "list") {
|
|
1156
|
+
if (parsed.benchmarks.length > 0 || parsed.all) {
|
|
1157
|
+
console.error(
|
|
1158
|
+
"ERROR: runs list does not accept benchmark names or --all. Usage: remnic bench runs list [--results-dir <path>] [--json]"
|
|
1159
|
+
);
|
|
1160
|
+
process.exit(1);
|
|
1161
|
+
}
|
|
1162
|
+
await showBenchPackageResults({ ...parsed, action: "results", benchmarks: [] });
|
|
1163
|
+
return;
|
|
1164
|
+
}
|
|
1165
|
+
if (parsed.runAction === "show") {
|
|
1166
|
+
if (parsed.benchmarks.length !== 1 || parsed.all) {
|
|
1167
|
+
console.error(
|
|
1168
|
+
"ERROR: runs show requires exactly one stored result reference. Usage: remnic bench runs show <run> [--detail] [--results-dir <path>] [--json]"
|
|
1169
|
+
);
|
|
1170
|
+
process.exit(1);
|
|
1171
|
+
}
|
|
1172
|
+
await showBenchPackageResults(parsed);
|
|
1173
|
+
return;
|
|
1174
|
+
}
|
|
1175
|
+
if (parsed.runAction === "delete") {
|
|
1176
|
+
if (parsed.benchmarks.length === 0 || parsed.all) {
|
|
1177
|
+
console.error(
|
|
1178
|
+
"ERROR: runs delete requires at least one stored result reference. Usage: remnic bench runs delete <run...> [--results-dir <path>] [--json]"
|
|
1179
|
+
);
|
|
1180
|
+
process.exit(1);
|
|
1181
|
+
}
|
|
1182
|
+
const deleted = await deleteBenchmarkResults(resultsDir, parsed.benchmarks);
|
|
1183
|
+
if (parsed.json) {
|
|
1184
|
+
console.log(JSON.stringify(deleted, null, 2));
|
|
1185
|
+
} else {
|
|
1186
|
+
if (deleted.deleted.length === 0) {
|
|
1187
|
+
console.log("No benchmark runs were deleted.");
|
|
1188
|
+
} else {
|
|
1189
|
+
console.log("Deleted benchmark runs:");
|
|
1190
|
+
for (const summary of deleted.deleted) {
|
|
1191
|
+
console.log(` ${summary.id} ${summary.path}`);
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
if (deleted.missing.length > 0) {
|
|
1195
|
+
console.log("Missing benchmark runs:");
|
|
1196
|
+
for (const reference of deleted.missing) {
|
|
1197
|
+
console.log(` ${reference}`);
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
}
|
|
1201
|
+
if (deleted.missing.length > 0) {
|
|
1202
|
+
process.exit(1);
|
|
1203
|
+
}
|
|
1204
|
+
return;
|
|
1205
|
+
}
|
|
1206
|
+
console.error("ERROR: runs requires a subcommand: list, show, or delete.");
|
|
1207
|
+
process.exit(1);
|
|
1208
|
+
}
|
|
1209
|
+
async function discoverBenchProviders(parsed) {
|
|
1210
|
+
if (parsed.benchmarks.length > 0) {
|
|
1211
|
+
console.error(
|
|
1212
|
+
"ERROR: providers discover does not accept positional arguments. Usage: remnic bench providers discover [--json]"
|
|
1213
|
+
);
|
|
1214
|
+
process.exit(1);
|
|
1215
|
+
}
|
|
1216
|
+
const discovered = await discoverAllProviders();
|
|
1217
|
+
if (parsed.json) {
|
|
1218
|
+
console.log(JSON.stringify(discovered, null, 2));
|
|
1219
|
+
return;
|
|
1220
|
+
}
|
|
1221
|
+
if (discovered.length === 0) {
|
|
1222
|
+
console.log("No local bench providers were discovered.");
|
|
1223
|
+
return;
|
|
1224
|
+
}
|
|
1225
|
+
console.log("Discovered bench providers:");
|
|
1226
|
+
for (const entry of discovered) {
|
|
1227
|
+
console.log(` ${entry.provider}`);
|
|
1228
|
+
for (const model of entry.models) {
|
|
1229
|
+
const capabilities = model.capabilities.join(", ");
|
|
1230
|
+
const details = [
|
|
1231
|
+
model.contextLength > 0 ? `context=${model.contextLength}` : void 0,
|
|
1232
|
+
model.parameterCount ? `params=${model.parameterCount}` : void 0,
|
|
1233
|
+
model.quantization ? `quant=${model.quantization}` : void 0,
|
|
1234
|
+
capabilities.length > 0 ? `caps=${capabilities}` : void 0
|
|
1235
|
+
].filter((value) => Boolean(value));
|
|
1236
|
+
console.log(
|
|
1237
|
+
` - ${model.id}${details.length > 0 ? ` (${details.join(", ")})` : ""}`
|
|
1238
|
+
);
|
|
1239
|
+
}
|
|
1240
|
+
}
|
|
1241
|
+
}
|
|
1242
|
+
async function publishBenchPackageResults(parsed) {
|
|
1243
|
+
if (parsed.benchmarks.length > 0) {
|
|
1244
|
+
console.error(
|
|
1245
|
+
"ERROR: publish does not accept positional result references. Usage: remnic bench publish --target remnic-ai [--results-dir <path>] [--output <path>] [--json]"
|
|
1246
|
+
);
|
|
1247
|
+
process.exit(1);
|
|
1248
|
+
}
|
|
1249
|
+
if (parsed.target !== "remnic-ai") {
|
|
1250
|
+
console.error("ERROR: publish requires --target remnic-ai.");
|
|
1251
|
+
process.exit(1);
|
|
1252
|
+
}
|
|
1253
|
+
const resultsDir = parsed.resultsDir ?? resolveBenchOutputDir();
|
|
1254
|
+
const feed = await buildBenchmarkPublishFeed(resultsDir, parsed.target);
|
|
1255
|
+
if (feed.benchmarks.length === 0) {
|
|
1256
|
+
console.error(
|
|
1257
|
+
`ERROR: no publishable benchmark results found in ${resultsDir}. remnic-ai requires stored full runs for published benchmarks.`
|
|
1258
|
+
);
|
|
1259
|
+
process.exit(1);
|
|
1260
|
+
}
|
|
1261
|
+
const outputPath = parsed.output ?? defaultBenchmarkPublishPath(parsed.target);
|
|
1262
|
+
const writtenPath = await writeBenchmarkPublishFeed(feed, outputPath);
|
|
1263
|
+
if (parsed.json) {
|
|
1264
|
+
console.log(JSON.stringify({
|
|
1265
|
+
target: parsed.target,
|
|
1266
|
+
outputPath: writtenPath,
|
|
1267
|
+
benchmarkCount: feed.benchmarks.length,
|
|
1268
|
+
feed
|
|
1269
|
+
}, null, 2));
|
|
1270
|
+
return;
|
|
1271
|
+
}
|
|
1272
|
+
console.log(
|
|
1273
|
+
`Published ${feed.benchmarks.length} benchmark entries for ${parsed.target} to ${writtenPath}`
|
|
1274
|
+
);
|
|
421
1275
|
}
|
|
422
1276
|
async function runBenchViaPackage(parsed, benchmarkId) {
|
|
423
1277
|
let benchModule;
|
|
424
1278
|
try {
|
|
425
|
-
benchModule = await import("./dist-
|
|
1279
|
+
benchModule = await import("./dist-7DCVQLUB.js");
|
|
426
1280
|
} catch {
|
|
427
1281
|
return false;
|
|
428
1282
|
}
|
|
@@ -430,6 +1284,11 @@ async function runBenchViaPackage(parsed, benchmarkId) {
|
|
|
430
1284
|
if (!definition?.runnerAvailable || !benchModule.runBenchmark || !benchModule.writeBenchmarkResult) {
|
|
431
1285
|
return false;
|
|
432
1286
|
}
|
|
1287
|
+
if (definition.meta?.category === "ingestion") {
|
|
1288
|
+
throw new Error(
|
|
1289
|
+
`Benchmark "${benchmarkId}" requires an ingestion adapter which is not yet available via the CLI. Run ingestion benchmarks programmatically by passing an ingestionAdapter to runBenchmark().`
|
|
1290
|
+
);
|
|
1291
|
+
}
|
|
433
1292
|
const createAdapter = parsed.quick ? benchModule.createLightweightAdapter : benchModule.createRemnicAdapter;
|
|
434
1293
|
if (!createAdapter) {
|
|
435
1294
|
return false;
|
|
@@ -442,7 +1301,7 @@ async function runBenchViaPackage(parsed, benchmarkId) {
|
|
|
442
1301
|
);
|
|
443
1302
|
if (!parsed.quick && !datasetDir) {
|
|
444
1303
|
throw new Error(
|
|
445
|
-
`full benchmark runs for "${benchmarkId}" require dataset files.
|
|
1304
|
+
`full benchmark runs for "${benchmarkId}" require dataset files. Run "remnic bench datasets download ${benchmarkId}" or pass --dataset-dir <path>.`
|
|
446
1305
|
);
|
|
447
1306
|
}
|
|
448
1307
|
const system = await createAdapter();
|
|
@@ -466,6 +1325,41 @@ async function runBenchViaPackage(parsed, benchmarkId) {
|
|
|
466
1325
|
await system.destroy();
|
|
467
1326
|
}
|
|
468
1327
|
}
|
|
1328
|
+
async function runCustomBenchViaPackage(parsed) {
|
|
1329
|
+
let benchModule;
|
|
1330
|
+
try {
|
|
1331
|
+
benchModule = await import("./dist-7DCVQLUB.js");
|
|
1332
|
+
} catch {
|
|
1333
|
+
return false;
|
|
1334
|
+
}
|
|
1335
|
+
if (!benchModule.runCustomBenchmarkFile || !benchModule.writeBenchmarkResult) {
|
|
1336
|
+
return false;
|
|
1337
|
+
}
|
|
1338
|
+
const createAdapter = parsed.quick ? benchModule.createLightweightAdapter : benchModule.createRemnicAdapter;
|
|
1339
|
+
if (!createAdapter) {
|
|
1340
|
+
return false;
|
|
1341
|
+
}
|
|
1342
|
+
const outputDir = resolveBenchOutputDir();
|
|
1343
|
+
const system = await createAdapter();
|
|
1344
|
+
try {
|
|
1345
|
+
const result = await benchModule.runCustomBenchmarkFile(parsed.custom, {
|
|
1346
|
+
mode: parsed.quick ? "quick" : "full",
|
|
1347
|
+
outputDir,
|
|
1348
|
+
limit: parsed.quick ? 1 : void 0,
|
|
1349
|
+
adapterMode: parsed.quick ? "lightweight" : "direct",
|
|
1350
|
+
system
|
|
1351
|
+
});
|
|
1352
|
+
const writtenPath = await benchModule.writeBenchmarkResult(result, outputDir);
|
|
1353
|
+
if (parsed.json) {
|
|
1354
|
+
console.log(JSON.stringify(result, null, 2));
|
|
1355
|
+
} else {
|
|
1356
|
+
printBenchPackageSummary(result, writtenPath);
|
|
1357
|
+
}
|
|
1358
|
+
return true;
|
|
1359
|
+
} finally {
|
|
1360
|
+
await system.destroy();
|
|
1361
|
+
}
|
|
1362
|
+
}
|
|
469
1363
|
function resolveConfigPath(cliPath) {
|
|
470
1364
|
if (cliPath) return path2.resolve(cliPath);
|
|
471
1365
|
const envPath = readCompatEnv("REMNIC_CONFIG_PATH", "ENGRAM_CONFIG_PATH");
|
|
@@ -628,6 +1522,7 @@ async function cmdQuery(queryText, json, explain) {
|
|
|
628
1522
|
const config = parseConfig(remnicCfg);
|
|
629
1523
|
const orchestrator = new Orchestrator(config);
|
|
630
1524
|
await orchestrator.initialize();
|
|
1525
|
+
await orchestrator.deferredReady;
|
|
631
1526
|
const service = new EngramAccessService(orchestrator);
|
|
632
1527
|
if (explain) {
|
|
633
1528
|
const result2 = await runExplain(service, queryText);
|
|
@@ -808,6 +1703,7 @@ async function cmdEnrich(rest) {
|
|
|
808
1703
|
];
|
|
809
1704
|
const orchestrator2 = new Orchestrator(config);
|
|
810
1705
|
await orchestrator2.initialize();
|
|
1706
|
+
await orchestrator2.deferredReady;
|
|
811
1707
|
const searchBackend2 = orchestrator2.qmd;
|
|
812
1708
|
const searchFn2 = searchBackend2.isAvailable() ? async (query) => {
|
|
813
1709
|
const results2 = await searchBackend2.search(query, void 0, 10);
|
|
@@ -843,6 +1739,7 @@ Registered providers:`);
|
|
|
843
1739
|
}
|
|
844
1740
|
const orchestrator = new Orchestrator(config);
|
|
845
1741
|
await orchestrator.initialize();
|
|
1742
|
+
await orchestrator.deferredReady;
|
|
846
1743
|
const storage = await orchestrator.getStorage(config.defaultNamespace);
|
|
847
1744
|
const entityFiles = await storage.readAllEntityFiles();
|
|
848
1745
|
let targets = entityFiles;
|
|
@@ -1925,6 +2822,42 @@ async function cmdBench(rest) {
|
|
|
1925
2822
|
await cmdLegacyBenchmark(parsed.action, benchAction.args, parsed.json);
|
|
1926
2823
|
return;
|
|
1927
2824
|
}
|
|
2825
|
+
if (parsed.action === "compare") {
|
|
2826
|
+
await compareBenchPackageResults(parsed);
|
|
2827
|
+
return;
|
|
2828
|
+
}
|
|
2829
|
+
if (parsed.action === "results") {
|
|
2830
|
+
await showBenchPackageResults(parsed);
|
|
2831
|
+
return;
|
|
2832
|
+
}
|
|
2833
|
+
if (parsed.action === "baseline") {
|
|
2834
|
+
await manageBenchBaselines(parsed);
|
|
2835
|
+
return;
|
|
2836
|
+
}
|
|
2837
|
+
if (parsed.action === "export") {
|
|
2838
|
+
await exportBenchPackageResult(parsed);
|
|
2839
|
+
return;
|
|
2840
|
+
}
|
|
2841
|
+
if (parsed.action === "datasets") {
|
|
2842
|
+
await manageBenchDatasets(parsed);
|
|
2843
|
+
return;
|
|
2844
|
+
}
|
|
2845
|
+
if (parsed.action === "runs") {
|
|
2846
|
+
await manageBenchRuns(parsed);
|
|
2847
|
+
return;
|
|
2848
|
+
}
|
|
2849
|
+
if (parsed.action === "publish") {
|
|
2850
|
+
await publishBenchPackageResults(parsed);
|
|
2851
|
+
return;
|
|
2852
|
+
}
|
|
2853
|
+
if (parsed.action === "ui") {
|
|
2854
|
+
await launchBenchUi(parsed.resultsDir ?? resolveBenchOutputDir());
|
|
2855
|
+
return;
|
|
2856
|
+
}
|
|
2857
|
+
if (parsed.action === "providers") {
|
|
2858
|
+
await discoverBenchProviders(parsed);
|
|
2859
|
+
return;
|
|
2860
|
+
}
|
|
1928
2861
|
if (parsed.action === "list") {
|
|
1929
2862
|
const catalog = await listBenchmarksFromPackage() ?? BENCHMARK_CATALOG;
|
|
1930
2863
|
if (parsed.json) {
|
|
@@ -1937,6 +2870,20 @@ async function cmdBench(rest) {
|
|
|
1937
2870
|
}
|
|
1938
2871
|
return;
|
|
1939
2872
|
}
|
|
2873
|
+
if (parsed.custom) {
|
|
2874
|
+
if (parsed.all || parsed.benchmarks.length > 0) {
|
|
2875
|
+
console.error("ERROR: --custom cannot be combined with benchmark names or --all.");
|
|
2876
|
+
process.exit(1);
|
|
2877
|
+
}
|
|
2878
|
+
const handledByPackage = await runCustomBenchViaPackage(parsed);
|
|
2879
|
+
if (!handledByPackage) {
|
|
2880
|
+
console.error(
|
|
2881
|
+
"Benchmark runner not found. Expected a phase-1 @remnic/bench runtime export for custom benchmarks."
|
|
2882
|
+
);
|
|
2883
|
+
process.exit(1);
|
|
2884
|
+
}
|
|
2885
|
+
return;
|
|
2886
|
+
}
|
|
1940
2887
|
const selectedBenchmarks = parsed.all ? await resolveAllBenchmarks() : parsed.benchmarks;
|
|
1941
2888
|
if (selectedBenchmarks.length === 0) {
|
|
1942
2889
|
console.error(
|
|
@@ -2789,6 +3736,180 @@ Usage:
|
|
|
2789
3736
|
break;
|
|
2790
3737
|
}
|
|
2791
3738
|
}
|
|
3739
|
+
function resolveRequiredValueFlag(args, flag) {
|
|
3740
|
+
if (!hasFlag(args, flag)) return void 0;
|
|
3741
|
+
const value = resolveFlagStrict(args, flag);
|
|
3742
|
+
if (value === void 0) {
|
|
3743
|
+
throw new Error(
|
|
3744
|
+
`${flag} requires a value. Provide it as \`${flag} <value>\`, not as a bare flag.`
|
|
3745
|
+
);
|
|
3746
|
+
}
|
|
3747
|
+
return value;
|
|
3748
|
+
}
|
|
3749
|
+
function parseTrainingExportArgs(rest, defaultMemoryDir) {
|
|
3750
|
+
const format = resolveRequiredValueFlag(rest, "--format");
|
|
3751
|
+
if (!format) {
|
|
3752
|
+
throw new Error(
|
|
3753
|
+
"--format <name> is required. Run `remnic training:export --help` for the list of registered adapters."
|
|
3754
|
+
);
|
|
3755
|
+
}
|
|
3756
|
+
const dryRun = hasFlag(rest, "--dry-run");
|
|
3757
|
+
const outputRaw = resolveRequiredValueFlag(rest, "--output") ?? resolveRequiredValueFlag(rest, "--out");
|
|
3758
|
+
if (!outputRaw && !dryRun) {
|
|
3759
|
+
throw new Error(
|
|
3760
|
+
"--output <path> (or --out <path>) is required for training:export. Use --dry-run to print statistics without writing a file."
|
|
3761
|
+
);
|
|
3762
|
+
}
|
|
3763
|
+
const output = outputRaw ? expandTilde(outputRaw) : "";
|
|
3764
|
+
const memoryDirFlag = resolveRequiredValueFlag(rest, "--memory-dir");
|
|
3765
|
+
const memoryDir = expandTilde(memoryDirFlag ?? defaultMemoryDir);
|
|
3766
|
+
const since = resolveRequiredValueFlag(rest, "--since");
|
|
3767
|
+
const until = resolveRequiredValueFlag(rest, "--until");
|
|
3768
|
+
const minConfidenceRaw = resolveRequiredValueFlag(rest, "--min-confidence");
|
|
3769
|
+
let minConfidence;
|
|
3770
|
+
if (minConfidenceRaw !== void 0) {
|
|
3771
|
+
const n = Number(minConfidenceRaw);
|
|
3772
|
+
if (!Number.isFinite(n) || n < 0 || n > 1) {
|
|
3773
|
+
throw new Error(
|
|
3774
|
+
`Invalid --min-confidence value "${minConfidenceRaw}": expected a number in [0, 1].`
|
|
3775
|
+
);
|
|
3776
|
+
}
|
|
3777
|
+
minConfidence = n;
|
|
3778
|
+
}
|
|
3779
|
+
const categoriesRaw = resolveRequiredValueFlag(rest, "--categories");
|
|
3780
|
+
const categories = categoriesRaw ? categoriesRaw.split(",").map((c) => c.trim()).filter((c) => c.length > 0) : void 0;
|
|
3781
|
+
const maxPairsRaw = resolveRequiredValueFlag(rest, "--max-pairs-per-record");
|
|
3782
|
+
let maxPairsPerRecord;
|
|
3783
|
+
if (maxPairsRaw !== void 0) {
|
|
3784
|
+
const n = Number(maxPairsRaw);
|
|
3785
|
+
if (!Number.isInteger(n) || n < 1) {
|
|
3786
|
+
throw new Error(
|
|
3787
|
+
`Invalid --max-pairs-per-record value "${maxPairsRaw}": expected a positive integer.`
|
|
3788
|
+
);
|
|
3789
|
+
}
|
|
3790
|
+
maxPairsPerRecord = n;
|
|
3791
|
+
}
|
|
3792
|
+
const includeEntities = hasFlag(rest, "--include-entities");
|
|
3793
|
+
const synthesize = hasFlag(rest, "--synthesize");
|
|
3794
|
+
const privacySweep = !hasFlag(rest, "--no-privacy-sweep");
|
|
3795
|
+
return {
|
|
3796
|
+
format,
|
|
3797
|
+
output,
|
|
3798
|
+
memoryDir,
|
|
3799
|
+
since,
|
|
3800
|
+
until,
|
|
3801
|
+
minConfidence,
|
|
3802
|
+
categories,
|
|
3803
|
+
includeEntities,
|
|
3804
|
+
synthesize,
|
|
3805
|
+
maxPairsPerRecord,
|
|
3806
|
+
privacySweep,
|
|
3807
|
+
dryRun
|
|
3808
|
+
};
|
|
3809
|
+
}
|
|
3810
|
+
async function runTrainingExport(args, stdout = process.stdout) {
|
|
3811
|
+
ensureWecloneExportAdapterRegistered();
|
|
3812
|
+
const adapter = getTrainingExportAdapter2(args.format);
|
|
3813
|
+
if (!adapter) {
|
|
3814
|
+
const registered = listTrainingExportAdapters();
|
|
3815
|
+
const validList = registered.length > 0 ? `Valid formats: [${registered.join(", ")}]` : "No adapters are currently registered.";
|
|
3816
|
+
throw new Error(
|
|
3817
|
+
`Unknown training-export format "${args.format}". ${validList}`
|
|
3818
|
+
);
|
|
3819
|
+
}
|
|
3820
|
+
if (!fs.existsSync(args.memoryDir)) {
|
|
3821
|
+
throw new Error(
|
|
3822
|
+
`--memory-dir "${args.memoryDir}" does not exist. Provide the path to an existing memory directory.`
|
|
3823
|
+
);
|
|
3824
|
+
}
|
|
3825
|
+
if (!fs.statSync(args.memoryDir).isDirectory()) {
|
|
3826
|
+
throw new Error(
|
|
3827
|
+
`--memory-dir "${args.memoryDir}" is not a directory. Provide the path to a memory directory, not a file.`
|
|
3828
|
+
);
|
|
3829
|
+
}
|
|
3830
|
+
let since;
|
|
3831
|
+
if (args.since) since = parseStrictCliDate(args.since, "--since");
|
|
3832
|
+
let until;
|
|
3833
|
+
if (args.until) until = parseStrictCliDate(args.until, "--until");
|
|
3834
|
+
const convertOptions = {
|
|
3835
|
+
memoryDir: args.memoryDir,
|
|
3836
|
+
since,
|
|
3837
|
+
until,
|
|
3838
|
+
minConfidence: args.minConfidence,
|
|
3839
|
+
categories: args.categories,
|
|
3840
|
+
includeEntities: args.includeEntities
|
|
3841
|
+
};
|
|
3842
|
+
let records = await convertMemoriesToRecords(convertOptions);
|
|
3843
|
+
const recordsRead = records.length;
|
|
3844
|
+
if (args.synthesize) {
|
|
3845
|
+
records = synthesizeTrainingPairs(records, {
|
|
3846
|
+
maxPairsPerRecord: args.maxPairsPerRecord
|
|
3847
|
+
});
|
|
3848
|
+
}
|
|
3849
|
+
let redactedCount = 0;
|
|
3850
|
+
if (args.privacySweep) {
|
|
3851
|
+
const swept = sweepPii(records);
|
|
3852
|
+
records = swept.cleanRecords;
|
|
3853
|
+
redactedCount = swept.redactedCount;
|
|
3854
|
+
}
|
|
3855
|
+
if (args.dryRun) {
|
|
3856
|
+
stdout.write(`Training export dry run
|
|
3857
|
+
`);
|
|
3858
|
+
stdout.write(`Format: ${adapter.name}
|
|
3859
|
+
`);
|
|
3860
|
+
stdout.write(`Records read: ${recordsRead}
|
|
3861
|
+
`);
|
|
3862
|
+
stdout.write(`Records to write: ${records.length}
|
|
3863
|
+
`);
|
|
3864
|
+
if (args.privacySweep) {
|
|
3865
|
+
stdout.write(`Redacted records: ${redactedCount}
|
|
3866
|
+
`);
|
|
3867
|
+
}
|
|
3868
|
+
const cats = /* @__PURE__ */ new Map();
|
|
3869
|
+
for (const r of records) {
|
|
3870
|
+
const c = r.category ?? "unknown";
|
|
3871
|
+
cats.set(c, (cats.get(c) ?? 0) + 1);
|
|
3872
|
+
}
|
|
3873
|
+
const sortedCats = [...cats.entries()].sort(
|
|
3874
|
+
(a, b) => a[0].localeCompare(b[0])
|
|
3875
|
+
);
|
|
3876
|
+
for (const [cat, count] of sortedCats) {
|
|
3877
|
+
stdout.write(` ${cat}: ${count}
|
|
3878
|
+
`);
|
|
3879
|
+
}
|
|
3880
|
+
return {
|
|
3881
|
+
recordsRead,
|
|
3882
|
+
recordsWritten: 0,
|
|
3883
|
+
redactedCount,
|
|
3884
|
+
outputPath: null
|
|
3885
|
+
};
|
|
3886
|
+
}
|
|
3887
|
+
if (!args.output) {
|
|
3888
|
+
throw new Error(
|
|
3889
|
+
"runTrainingExport: `output` is required when dryRun is false. Pass dryRun: true to skip file I/O."
|
|
3890
|
+
);
|
|
3891
|
+
}
|
|
3892
|
+
const formatted = adapter.formatRecords(records);
|
|
3893
|
+
const outDir = path2.dirname(args.output);
|
|
3894
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
3895
|
+
const tmpPath = `${args.output}.tmp-${process.pid}-${Date.now()}`;
|
|
3896
|
+
fs.writeFileSync(tmpPath, formatted, "utf-8");
|
|
3897
|
+
fs.renameSync(tmpPath, args.output);
|
|
3898
|
+
stdout.write(
|
|
3899
|
+
`Exported ${records.length} records to ${args.output} (${adapter.name} format)
|
|
3900
|
+
`
|
|
3901
|
+
);
|
|
3902
|
+
if (args.privacySweep && redactedCount > 0) {
|
|
3903
|
+
stdout.write(`Privacy sweep redacted PII in ${redactedCount} record(s).
|
|
3904
|
+
`);
|
|
3905
|
+
}
|
|
3906
|
+
return {
|
|
3907
|
+
recordsRead,
|
|
3908
|
+
recordsWritten: records.length,
|
|
3909
|
+
redactedCount,
|
|
3910
|
+
outputPath: args.output
|
|
3911
|
+
};
|
|
3912
|
+
}
|
|
2792
3913
|
async function main(argv = process.argv.slice(2)) {
|
|
2793
3914
|
const [command, ...rest] = argv;
|
|
2794
3915
|
if (command !== "migrate") {
|
|
@@ -3047,6 +4168,51 @@ Options:
|
|
|
3047
4168
|
await cmdExtensions(action, rest.slice(1));
|
|
3048
4169
|
break;
|
|
3049
4170
|
}
|
|
4171
|
+
case "training:export": {
|
|
4172
|
+
if (rest.includes("--help") || rest.includes("-h")) {
|
|
4173
|
+
console.log(`
|
|
4174
|
+
remnic training:export \u2014 Export Remnic memories as fine-tuning datasets (issue #459)
|
|
4175
|
+
|
|
4176
|
+
Usage:
|
|
4177
|
+
remnic training:export --format <name> --output <path> [options]
|
|
4178
|
+
|
|
4179
|
+
Required:
|
|
4180
|
+
--format <name> Registered adapter name (e.g. weclone)
|
|
4181
|
+
--output <path> | --out Path to write the dataset file
|
|
4182
|
+
|
|
4183
|
+
Filters:
|
|
4184
|
+
--memory-dir <path> Memory directory (defaults to resolved memoryDir)
|
|
4185
|
+
--since <YYYY-MM-DD[T...]> Only include memories created at or after this date
|
|
4186
|
+
--until <YYYY-MM-DD[T...]> Only include memories created before this date (exclusive)
|
|
4187
|
+
--min-confidence <0..1> Inclusive lower bound on memory confidence
|
|
4188
|
+
--categories <list> Comma-separated category filter (fact,preference,...)
|
|
4189
|
+
--include-entities Also read from entities/ (off by default)
|
|
4190
|
+
|
|
4191
|
+
Adapter options:
|
|
4192
|
+
--synthesize Generate conversational Q/A pairs (WeClone-optimised)
|
|
4193
|
+
--max-pairs-per-record <n> When --synthesize, max pairs emitted per memory
|
|
4194
|
+
--no-privacy-sweep Skip the final PII redaction pass (default: on)
|
|
4195
|
+
|
|
4196
|
+
Other:
|
|
4197
|
+
--dry-run Print statistics only; do not write the file
|
|
4198
|
+
`);
|
|
4199
|
+
break;
|
|
4200
|
+
}
|
|
4201
|
+
let parsed;
|
|
4202
|
+
try {
|
|
4203
|
+
parsed = parseTrainingExportArgs(rest, resolveMemoryDir());
|
|
4204
|
+
} catch (err) {
|
|
4205
|
+
console.error(err instanceof Error ? err.message : String(err));
|
|
4206
|
+
process.exit(1);
|
|
4207
|
+
}
|
|
4208
|
+
try {
|
|
4209
|
+
await runTrainingExport(parsed);
|
|
4210
|
+
} catch (err) {
|
|
4211
|
+
console.error(err instanceof Error ? err.message : String(err));
|
|
4212
|
+
process.exit(1);
|
|
4213
|
+
}
|
|
4214
|
+
break;
|
|
4215
|
+
}
|
|
3050
4216
|
case "openclaw": {
|
|
3051
4217
|
const subAction = rest[0] ?? "help";
|
|
3052
4218
|
if (subAction === "install") {
|
|
@@ -3103,9 +4269,9 @@ Usage:
|
|
|
3103
4269
|
remnic extensions <list|show|validate|reload> Manage memory extensions
|
|
3104
4270
|
remnic space <list|switch|create|delete|push|pull|share|promote|audit> Manage spaces
|
|
3105
4271
|
create accepts --parent <id> to set parent-child relationship
|
|
3106
|
-
remnic bench <list|run> [benchmark...] [--quick] [--all] [--dataset-dir <path>] [--json]
|
|
4272
|
+
remnic bench <list|run|datasets|runs|compare|results|baseline|export|publish|ui|providers> [benchmark...] [--quick] [--all] [--dataset-dir <path>] [--results-dir <path>] [--baselines-dir <path>] [--threshold <value>] [--detail] [--format <json|csv|html>] [--output <path>] [--target remnic-ai] [--json]
|
|
3107
4273
|
benchmark is kept as a compatibility alias. check/report remain under that alias.
|
|
3108
|
-
remnic benchmark <list|run|check|report> [queries...] [--explain] [--baseline=<path>] [--report=<path>]
|
|
4274
|
+
remnic benchmark <list|run|datasets|runs|compare|results|baseline|export|publish|ui|providers|check|report> [queries...] [--explain] [--baseline=<path>] [--report=<path>]
|
|
3109
4275
|
remnic briefing [--since <window>] [--focus <filter>] [--save] [--format markdown|json]
|
|
3110
4276
|
Daily context briefing. Windows: yesterday, today, NNh, NNd, NNw.
|
|
3111
4277
|
Focus: person:<name>, project:<name>, topic:<name>.
|
|
@@ -3126,6 +4292,9 @@ Usage:
|
|
|
3126
4292
|
remnic enrich --dry-run Preview what would be enriched
|
|
3127
4293
|
remnic enrich audit Show recent enrichment audit log
|
|
3128
4294
|
remnic enrich providers List registered providers and their status
|
|
4295
|
+
remnic training:export --format <name> --output <path> [options]
|
|
4296
|
+
Export memories as a fine-tuning dataset (issue #459). Run
|
|
4297
|
+
'remnic training:export --help' for the full option list.
|
|
3129
4298
|
|
|
3130
4299
|
Options:
|
|
3131
4300
|
--json Output in JSON format
|
|
@@ -3152,7 +4321,9 @@ export {
|
|
|
3152
4321
|
main,
|
|
3153
4322
|
parseBenchArgs,
|
|
3154
4323
|
parseConnectorConfig,
|
|
4324
|
+
parseTrainingExportArgs,
|
|
3155
4325
|
resolveFlag,
|
|
4326
|
+
runTrainingExport,
|
|
3156
4327
|
stripConfigArgv,
|
|
3157
4328
|
stripResolveFlags
|
|
3158
4329
|
};
|