labgate 0.5.30 → 0.5.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -0
- package/dist/cli.js +616 -0
- package/dist/cli.js.map +1 -1
- package/dist/lib/config.d.ts +11 -0
- package/dist/lib/config.js +44 -0
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/container.d.ts +22 -3
- package/dist/lib/container.js +373 -67
- package/dist/lib/container.js.map +1 -1
- package/dist/lib/display-mcp.d.ts +10 -0
- package/dist/lib/display-mcp.js +160 -0
- package/dist/lib/display-mcp.js.map +1 -0
- package/dist/lib/display-store.d.ts +24 -0
- package/dist/lib/display-store.js +150 -0
- package/dist/lib/display-store.js.map +1 -0
- package/dist/lib/explorer-autopilot.d.ts +16 -0
- package/dist/lib/explorer-autopilot.js +573 -0
- package/dist/lib/explorer-autopilot.js.map +1 -0
- package/dist/lib/explorer-claude.d.ts +16 -0
- package/dist/lib/explorer-claude.js +361 -0
- package/dist/lib/explorer-claude.js.map +1 -0
- package/dist/lib/explorer-compare.d.ts +9 -0
- package/dist/lib/explorer-compare.js +190 -0
- package/dist/lib/explorer-compare.js.map +1 -0
- package/dist/lib/explorer-eval.d.ts +23 -0
- package/dist/lib/explorer-eval.js +161 -0
- package/dist/lib/explorer-eval.js.map +1 -0
- package/dist/lib/explorer-gc.d.ts +11 -0
- package/dist/lib/explorer-gc.js +304 -0
- package/dist/lib/explorer-gc.js.map +1 -0
- package/dist/lib/explorer-git.d.ts +14 -0
- package/dist/lib/explorer-git.js +136 -0
- package/dist/lib/explorer-git.js.map +1 -0
- package/dist/lib/explorer-lock.d.ts +5 -0
- package/dist/lib/explorer-lock.js +100 -0
- package/dist/lib/explorer-lock.js.map +1 -0
- package/dist/lib/explorer-mcp.d.ts +11 -0
- package/dist/lib/explorer-mcp.js +611 -0
- package/dist/lib/explorer-mcp.js.map +1 -0
- package/dist/lib/explorer-retention.d.ts +4 -0
- package/dist/lib/explorer-retention.js +58 -0
- package/dist/lib/explorer-retention.js.map +1 -0
- package/dist/lib/explorer-store.d.ts +77 -0
- package/dist/lib/explorer-store.js +950 -0
- package/dist/lib/explorer-store.js.map +1 -0
- package/dist/lib/explorer-types.d.ts +161 -0
- package/dist/lib/explorer-types.js +3 -0
- package/dist/lib/explorer-types.js.map +1 -0
- package/dist/lib/explorer.d.ts +31 -0
- package/dist/lib/explorer.js +247 -0
- package/dist/lib/explorer.js.map +1 -0
- package/dist/lib/results-mcp.d.ts +2 -2
- package/dist/lib/results-mcp.js +26 -4
- package/dist/lib/results-mcp.js.map +1 -1
- package/dist/lib/results-store.d.ts +1 -0
- package/dist/lib/results-store.js +87 -3
- package/dist/lib/results-store.js.map +1 -1
- package/dist/lib/runtime.d.ts +6 -0
- package/dist/lib/runtime.js +46 -19
- package/dist/lib/runtime.js.map +1 -1
- package/dist/lib/test/integration-harness.js +1 -1
- package/dist/lib/test/integration-harness.js.map +1 -1
- package/dist/lib/ui.d.ts +1 -0
- package/dist/lib/ui.html +11231 -4370
- package/dist/lib/ui.js +2564 -277
- package/dist/lib/ui.js.map +1 -1
- package/dist/lib/web-terminal.d.ts +13 -0
- package/dist/lib/web-terminal.js +118 -15
- package/dist/lib/web-terminal.js.map +1 -1
- package/dist/mcp-bundles/display-mcp.bundle.mjs +30209 -0
- package/dist/mcp-bundles/explorer-mcp.bundle.mjs +40044 -0
- package/dist/mcp-bundles/results-mcp.bundle.mjs +100 -7
- package/package.json +4 -3
- package/templates/tsp-lab/API_CONTRACT.md +20 -0
- package/templates/tsp-lab/EVAL.md +20 -0
- package/templates/tsp-lab/PROBLEM.md +18 -0
- package/templates/tsp-lab/data/generate_instances.py +51 -0
- package/templates/tsp-lab/data/instances.jsonl +12 -0
- package/templates/tsp-lab/eval.py +148 -0
- package/templates/tsp-lab/solver.py +88 -0
- package/templates/tsp-lab/stub-patches/enable_two_opt.patch +14 -0
package/dist/cli.js
CHANGED
|
@@ -37,6 +37,7 @@ const commander_1 = require("commander");
|
|
|
37
37
|
const path_1 = require("path");
|
|
38
38
|
const fs_1 = require("fs");
|
|
39
39
|
const os_1 = require("os");
|
|
40
|
+
const child_process_1 = require("child_process");
|
|
40
41
|
const net_1 = require("net");
|
|
41
42
|
const readline_1 = require("readline");
|
|
42
43
|
const config_js_1 = require("./lib/config.js");
|
|
@@ -378,11 +379,13 @@ program
|
|
|
378
379
|
process.exit(1);
|
|
379
380
|
}
|
|
380
381
|
const { startUI } = await import('./lib/ui.js');
|
|
382
|
+
const prewarmImageOnStartup = process.env.LABGATE_UI_PREWARM_ON_START === '1';
|
|
381
383
|
const server = startUI({
|
|
382
384
|
port: Number.isFinite(parsedPort) ? parsedPort : undefined,
|
|
383
385
|
socketPath: hasSocket ? socketInput : undefined,
|
|
384
386
|
listenAddress: hasListenAddress ? listenAddressInput : undefined,
|
|
385
387
|
standalone: true,
|
|
388
|
+
prewarmImageOnStartup,
|
|
386
389
|
});
|
|
387
390
|
// In standalone mode, keep the process alive
|
|
388
391
|
server.ref();
|
|
@@ -395,6 +398,86 @@ program
|
|
|
395
398
|
const { listSessions } = await import('./lib/container.js');
|
|
396
399
|
await listSessions();
|
|
397
400
|
});
|
|
401
|
+
// ── labgate continue [id-or-name] ─────────────────────────
|
|
402
|
+
program
|
|
403
|
+
.command('continue')
|
|
404
|
+
.description('Attach to a running web terminal session (id, prefix, or custom name)')
|
|
405
|
+
.argument('[idOrName]', 'Web terminal session id, id prefix, or custom name')
|
|
406
|
+
.option('--latest', 'Attach to the most recent runnable local web terminal session')
|
|
407
|
+
.action(async (idOrName, opts) => {
|
|
408
|
+
const web = await import('./lib/web-terminal.js');
|
|
409
|
+
const { getTmuxBinary, hasTmuxSession, listWebTerminalRecords, resolveWebTerminalRecord } = web;
|
|
410
|
+
const query = (idOrName || '').trim();
|
|
411
|
+
const localNode = (0, os_1.hostname)();
|
|
412
|
+
const isInteractive = !!(process.stdin.isTTY && process.stdout.isTTY);
|
|
413
|
+
if (!query && !opts.latest && !isInteractive) {
|
|
414
|
+
console.error('No session id provided in non-interactive mode. Pass a session id/name or use `--latest`.');
|
|
415
|
+
process.exit(1);
|
|
416
|
+
}
|
|
417
|
+
let target = null;
|
|
418
|
+
if (query) {
|
|
419
|
+
const resolved = resolveWebTerminalRecord(query);
|
|
420
|
+
const localMatches = resolved.matches.filter((record) => record.node === localNode);
|
|
421
|
+
if (resolved.record && resolved.record.node === localNode) {
|
|
422
|
+
target = resolved.record;
|
|
423
|
+
}
|
|
424
|
+
else if (localMatches.length === 1) {
|
|
425
|
+
target = localMatches[0];
|
|
426
|
+
}
|
|
427
|
+
else if (localMatches.length > 1) {
|
|
428
|
+
console.error(`Ambiguous session selector "${query}". Matches:`);
|
|
429
|
+
for (const match of localMatches) {
|
|
430
|
+
const name = match.name ? ` (${match.name})` : '';
|
|
431
|
+
console.error(` - ${match.id}${name}`);
|
|
432
|
+
}
|
|
433
|
+
process.exit(1);
|
|
434
|
+
}
|
|
435
|
+
else if (resolved.record && resolved.record.node !== localNode) {
|
|
436
|
+
console.error(`Session "${query}" was found on node "${resolved.record.node}". ` +
|
|
437
|
+
`Run \`labgate continue\` on that node.`);
|
|
438
|
+
process.exit(1);
|
|
439
|
+
}
|
|
440
|
+
else {
|
|
441
|
+
console.error(`Session "${query}" not found.`);
|
|
442
|
+
process.exit(1);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
if (!target) {
|
|
446
|
+
const localRecords = listWebTerminalRecords().filter((record) => record.node === localNode);
|
|
447
|
+
for (const record of localRecords) {
|
|
448
|
+
if (await hasTmuxSession(record.tmuxSession)) {
|
|
449
|
+
target = record;
|
|
450
|
+
break;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
if (!target) {
|
|
454
|
+
console.error('No runnable local web terminal session found. Start one in `labgate ui` first.');
|
|
455
|
+
process.exit(1);
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
else if (!await hasTmuxSession(target.tmuxSession)) {
|
|
459
|
+
console.error(`Session "${target.id}" is not running.`);
|
|
460
|
+
process.exit(1);
|
|
461
|
+
}
|
|
462
|
+
console.error('Attaching terminal. Tip: press Ctrl+b then d to detach and keep the session running for Web UI reconnect. ' +
|
|
463
|
+
'Ctrl+C sends SIGINT to the active process.');
|
|
464
|
+
const tmuxBin = await getTmuxBinary();
|
|
465
|
+
const attached = (0, child_process_1.spawnSync)(tmuxBin, ['attach-session', '-d', '-t', target.tmuxSession], {
|
|
466
|
+
stdio: 'inherit',
|
|
467
|
+
});
|
|
468
|
+
if (attached.error) {
|
|
469
|
+
console.error(`Failed to run tmux attach-session: ${attached.error.message}`);
|
|
470
|
+
process.exit(1);
|
|
471
|
+
}
|
|
472
|
+
if (attached.status !== null) {
|
|
473
|
+
process.exit(attached.status);
|
|
474
|
+
}
|
|
475
|
+
if (attached.signal) {
|
|
476
|
+
const signalNum = os_1.constants.signals[attached.signal];
|
|
477
|
+
process.exit(typeof signalNum === 'number' ? 128 + signalNum : 1);
|
|
478
|
+
}
|
|
479
|
+
process.exit(1);
|
|
480
|
+
});
|
|
398
481
|
// ── labgate stop <id> ─────────────────────────────────────
|
|
399
482
|
program
|
|
400
483
|
.command('stop')
|
|
@@ -702,6 +785,539 @@ datasetCmd
|
|
|
702
785
|
console.log(` Path: ${hostPath}`);
|
|
703
786
|
console.log(` Scanned: ${stats.scanned_at}`);
|
|
704
787
|
});
|
|
788
|
+
// ── labgate explore ─────────────────────────────────────
|
|
789
|
+
const exploreCmd = program
|
|
790
|
+
.command('explore')
|
|
791
|
+
.description('Solution Explorer commands');
|
|
792
|
+
exploreCmd
|
|
793
|
+
.command('create')
|
|
794
|
+
.description('Create a new Solution Explorer experiment from an existing git repository')
|
|
795
|
+
.requiredOption('--name <name>', 'Experiment name')
|
|
796
|
+
.requiredOption('--repo <path>', 'Source repository path to clone for isolated exploration')
|
|
797
|
+
.requiredOption('--eval <command>', 'Evaluation command (must print JSON as last stdout line)')
|
|
798
|
+
.option('--base-ref <ref>', 'Base git ref (default: HEAD)', 'HEAD')
|
|
799
|
+
.option('--timeout <sec>', 'Evaluation timeout seconds', '120')
|
|
800
|
+
.option('--epsilon <value>', 'Exploration epsilon (0..1)', '0.15')
|
|
801
|
+
.option('--top-n <count>', 'Top-N pool for epsilon exploration', '5')
|
|
802
|
+
.option('--max-runs <count>', 'Optional max run budget')
|
|
803
|
+
.option('--agent-mode <mode>', 'Agent mode: stub|claude_headless', 'stub')
|
|
804
|
+
.option('--stub-patch <path>', 'Patch file path for stub agent mode')
|
|
805
|
+
.option('--claude-resume-session <id>', 'Optional Claude session id to resume in claude_headless mode')
|
|
806
|
+
.option('--claude-timeout <sec>', 'Optional Claude headless timeout seconds (60..14400)')
|
|
807
|
+
.option('--keep-worktrees', 'Retention policy: keep run worktrees instead of pruning')
|
|
808
|
+
.option('--artifacts-policy <mode>', 'Retention artifacts mode: all|minimal|none', 'minimal')
|
|
809
|
+
.option('--keep-last <count>', 'Retention keep_last_n', '50')
|
|
810
|
+
.option('--keep-failed-last <count>', 'Retention keep_failed_last_n', '20')
|
|
811
|
+
.option('--keep-best', 'Retention keep best run protected', true)
|
|
812
|
+
.option('--no-keep-best', 'Retention do not specially protect best run')
|
|
813
|
+
.option('--prune-after-days <days>', 'Retention prune_artifacts_after_days')
|
|
814
|
+
.option('--max-delete-runs <count>', 'Retention max_delete_runs', '200')
|
|
815
|
+
.option('--skip-baseline', 'Skip baseline evaluation check after experiment creation')
|
|
816
|
+
.action(async (opts) => {
|
|
817
|
+
try {
|
|
818
|
+
const timeout = parseInt(opts.timeout, 10);
|
|
819
|
+
if (!Number.isFinite(timeout) || timeout < 5 || timeout > 86400) {
|
|
820
|
+
console.error('Error: --timeout must be an integer between 5 and 86400.');
|
|
821
|
+
process.exit(1);
|
|
822
|
+
}
|
|
823
|
+
const epsilon = Number(opts.epsilon);
|
|
824
|
+
if (!Number.isFinite(epsilon) || epsilon < 0 || epsilon > 1) {
|
|
825
|
+
console.error('Error: --epsilon must be a number between 0 and 1.');
|
|
826
|
+
process.exit(1);
|
|
827
|
+
}
|
|
828
|
+
const topN = parseInt(opts.topN, 10);
|
|
829
|
+
if (!Number.isFinite(topN) || topN < 1 || topN > 100) {
|
|
830
|
+
console.error('Error: --top-n must be an integer between 1 and 100.');
|
|
831
|
+
process.exit(1);
|
|
832
|
+
}
|
|
833
|
+
let maxRuns;
|
|
834
|
+
if (opts.maxRuns !== undefined) {
|
|
835
|
+
const parsed = parseInt(opts.maxRuns, 10);
|
|
836
|
+
if (!Number.isFinite(parsed) || parsed < 1) {
|
|
837
|
+
console.error('Error: --max-runs must be an integer >= 1.');
|
|
838
|
+
process.exit(1);
|
|
839
|
+
}
|
|
840
|
+
maxRuns = parsed;
|
|
841
|
+
}
|
|
842
|
+
const agentModeRaw = String(opts.agentMode || 'stub').trim().toLowerCase();
|
|
843
|
+
const agentMode = agentModeRaw === 'claude' ? 'claude_headless' : agentModeRaw;
|
|
844
|
+
if (agentMode !== 'stub' && agentMode !== 'claude_headless') {
|
|
845
|
+
console.error('Error: --agent-mode must be one of: stub, claude_headless.');
|
|
846
|
+
process.exit(1);
|
|
847
|
+
}
|
|
848
|
+
let claudeTimeout;
|
|
849
|
+
if (opts.claudeTimeout !== undefined) {
|
|
850
|
+
const parsed = parseInt(opts.claudeTimeout, 10);
|
|
851
|
+
if (!Number.isFinite(parsed) || parsed < 60 || parsed > 4 * 60 * 60) {
|
|
852
|
+
console.error('Error: --claude-timeout must be an integer between 60 and 14400.');
|
|
853
|
+
process.exit(1);
|
|
854
|
+
}
|
|
855
|
+
claudeTimeout = parsed;
|
|
856
|
+
}
|
|
857
|
+
const artifactsPolicy = String(opts.artifactsPolicy || 'minimal').trim().toLowerCase();
|
|
858
|
+
if (!['all', 'minimal', 'none'].includes(artifactsPolicy)) {
|
|
859
|
+
console.error('Error: --artifacts-policy must be one of: all, minimal, none.');
|
|
860
|
+
process.exit(1);
|
|
861
|
+
}
|
|
862
|
+
const keepLast = parseInt(opts.keepLast, 10);
|
|
863
|
+
const keepFailedLast = parseInt(opts.keepFailedLast, 10);
|
|
864
|
+
const maxDeleteRuns = parseInt(opts.maxDeleteRuns, 10);
|
|
865
|
+
if (!Number.isFinite(keepLast) || keepLast < 0) {
|
|
866
|
+
console.error('Error: --keep-last must be an integer >= 0.');
|
|
867
|
+
process.exit(1);
|
|
868
|
+
}
|
|
869
|
+
if (!Number.isFinite(keepFailedLast) || keepFailedLast < 0) {
|
|
870
|
+
console.error('Error: --keep-failed-last must be an integer >= 0.');
|
|
871
|
+
process.exit(1);
|
|
872
|
+
}
|
|
873
|
+
if (!Number.isFinite(maxDeleteRuns) || maxDeleteRuns < 1) {
|
|
874
|
+
console.error('Error: --max-delete-runs must be an integer >= 1.');
|
|
875
|
+
process.exit(1);
|
|
876
|
+
}
|
|
877
|
+
let pruneAfterDays;
|
|
878
|
+
if (opts.pruneAfterDays !== undefined) {
|
|
879
|
+
const parsed = parseInt(opts.pruneAfterDays, 10);
|
|
880
|
+
if (!Number.isFinite(parsed) || parsed < 1) {
|
|
881
|
+
console.error('Error: --prune-after-days must be an integer >= 1.');
|
|
882
|
+
process.exit(1);
|
|
883
|
+
}
|
|
884
|
+
pruneAfterDays = parsed;
|
|
885
|
+
}
|
|
886
|
+
const policy = {
|
|
887
|
+
epsilon,
|
|
888
|
+
top_n: topN,
|
|
889
|
+
agent_mode: agentMode,
|
|
890
|
+
};
|
|
891
|
+
if (maxRuns !== undefined)
|
|
892
|
+
policy.max_runs = maxRuns;
|
|
893
|
+
if (opts.stubPatch)
|
|
894
|
+
policy.stub_patch_file = opts.stubPatch;
|
|
895
|
+
if (agentMode === 'claude_headless') {
|
|
896
|
+
const resumeId = String(opts.claudeResumeSession || '').trim();
|
|
897
|
+
if (resumeId)
|
|
898
|
+
policy.claude_resume_session_id = resumeId;
|
|
899
|
+
if (claudeTimeout !== undefined)
|
|
900
|
+
policy.claude_timeout_sec = claudeTimeout;
|
|
901
|
+
}
|
|
902
|
+
const retention = {
|
|
903
|
+
keep_worktrees: !!opts.keepWorktrees,
|
|
904
|
+
artifacts: artifactsPolicy,
|
|
905
|
+
keep_last_n: keepLast,
|
|
906
|
+
keep_best: opts.keepBest !== false,
|
|
907
|
+
keep_failed_last_n: keepFailedLast,
|
|
908
|
+
...(pruneAfterDays !== undefined ? { prune_artifacts_after_days: pruneAfterDays } : {}),
|
|
909
|
+
max_delete_runs: maxDeleteRuns,
|
|
910
|
+
};
|
|
911
|
+
const { createExplorerExperiment } = await import('./lib/explorer.js');
|
|
912
|
+
const { runEvaluation } = await import('./lib/explorer-eval.js');
|
|
913
|
+
const { getExplorerArtifactDir } = await import('./lib/config.js');
|
|
914
|
+
const { ExplorerStore } = await import('./lib/explorer-store.js');
|
|
915
|
+
const experiment = createExplorerExperiment({
|
|
916
|
+
name: opts.name,
|
|
917
|
+
source_repo_path: opts.repo,
|
|
918
|
+
eval_command: opts.eval,
|
|
919
|
+
base_ref: opts.baseRef,
|
|
920
|
+
eval_timeout_sec: timeout,
|
|
921
|
+
policy,
|
|
922
|
+
retention,
|
|
923
|
+
});
|
|
924
|
+
console.log(`Experiment created: ${experiment.id}`);
|
|
925
|
+
console.log(`Name: ${experiment.name}`);
|
|
926
|
+
console.log(`Repo clone: ${experiment.repo_path}`);
|
|
927
|
+
console.log(`Base SHA: ${experiment.base_ref}`);
|
|
928
|
+
if (!opts.skipBaseline) {
|
|
929
|
+
const baselineArtifactDir = getExplorerArtifactDir(experiment.id, 'baseline');
|
|
930
|
+
const baseline = runEvaluation({
|
|
931
|
+
worktree_path: experiment.repo_path,
|
|
932
|
+
eval_command: experiment.eval_command,
|
|
933
|
+
timeout_sec: experiment.eval_timeout_sec,
|
|
934
|
+
artifact_dir: baselineArtifactDir,
|
|
935
|
+
});
|
|
936
|
+
const store = new ExplorerStore();
|
|
937
|
+
try {
|
|
938
|
+
store.createEvent(experiment.id, 'note', {
|
|
939
|
+
message: 'baseline evaluation',
|
|
940
|
+
status: baseline.status,
|
|
941
|
+
score: baseline.score ?? null,
|
|
942
|
+
artifact_dir: baselineArtifactDir,
|
|
943
|
+
error: baseline.error || null,
|
|
944
|
+
});
|
|
945
|
+
}
|
|
946
|
+
finally {
|
|
947
|
+
store.close();
|
|
948
|
+
}
|
|
949
|
+
if (baseline.status === 'eval_ok' && Number.isFinite(baseline.score)) {
|
|
950
|
+
console.log(`Baseline score: ${baseline.score.toFixed(6)}`);
|
|
951
|
+
console.log(`Baseline artifacts: ${baselineArtifactDir}`);
|
|
952
|
+
}
|
|
953
|
+
else {
|
|
954
|
+
console.error(`Baseline eval status: ${baseline.status}`);
|
|
955
|
+
if (baseline.error)
|
|
956
|
+
console.error(`Baseline eval error: ${baseline.error}`);
|
|
957
|
+
console.error(`Baseline logs: ${baseline.stdout_path}, ${baseline.stderr_path}`);
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
catch (err) {
|
|
962
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
963
|
+
process.exit(1);
|
|
964
|
+
}
|
|
965
|
+
});
|
|
966
|
+
exploreCmd
|
|
967
|
+
.command('list')
|
|
968
|
+
.description('List existing Solution Explorer experiments')
|
|
969
|
+
.option('-n, --limit <count>', 'Max experiments to list', '50')
|
|
970
|
+
.option('--offset <count>', 'Offset for pagination', '0')
|
|
971
|
+
.action(async (opts) => {
|
|
972
|
+
try {
|
|
973
|
+
const limit = parseInt(opts.limit, 10);
|
|
974
|
+
const offset = parseInt(opts.offset, 10);
|
|
975
|
+
const { ExplorerStore } = await import('./lib/explorer-store.js');
|
|
976
|
+
const store = new ExplorerStore();
|
|
977
|
+
try {
|
|
978
|
+
const rows = store.listExperiments(Number.isFinite(limit) ? limit : 50, Number.isFinite(offset) ? offset : 0);
|
|
979
|
+
if (rows.length === 0) {
|
|
980
|
+
console.log('No experiments found.');
|
|
981
|
+
return;
|
|
982
|
+
}
|
|
983
|
+
for (const row of rows) {
|
|
984
|
+
console.log(`${row.id} ${row.status.padEnd(8)} ${row.name} base=${row.base_ref.slice(0, 12)} created=${row.created_at}`);
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
finally {
|
|
988
|
+
store.close();
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
catch (err) {
|
|
992
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
993
|
+
process.exit(1);
|
|
994
|
+
}
|
|
995
|
+
});
|
|
996
|
+
exploreCmd
|
|
997
|
+
.command('status')
|
|
998
|
+
.description('Show experiment status, best run, and recent runs')
|
|
999
|
+
.argument('<experimentId>', 'Experiment id')
|
|
1000
|
+
.option('-n, --limit <count>', 'How many recent runs to show', '10')
|
|
1001
|
+
.action(async (experimentId, opts) => {
|
|
1002
|
+
try {
|
|
1003
|
+
const limitParsed = parseInt(opts.limit, 10);
|
|
1004
|
+
const limit = Number.isFinite(limitParsed) && limitParsed > 0 ? limitParsed : 10;
|
|
1005
|
+
const { ExplorerStore } = await import('./lib/explorer-store.js');
|
|
1006
|
+
const store = new ExplorerStore();
|
|
1007
|
+
try {
|
|
1008
|
+
const experiment = store.getExperiment(experimentId);
|
|
1009
|
+
if (!experiment) {
|
|
1010
|
+
console.error(`Experiment not found: ${experimentId}`);
|
|
1011
|
+
process.exit(1);
|
|
1012
|
+
}
|
|
1013
|
+
const best = store.getBestRun(experimentId);
|
|
1014
|
+
const count = store.getRunCount(experimentId);
|
|
1015
|
+
const active = store.hasActiveRun(experimentId);
|
|
1016
|
+
const recent = store.listRuns(experimentId, { limit, offset: 0 });
|
|
1017
|
+
console.log(`Experiment: ${experiment.id}`);
|
|
1018
|
+
console.log(`Name: ${experiment.name}`);
|
|
1019
|
+
console.log(`Status: ${experiment.status}`);
|
|
1020
|
+
console.log(`Runs: ${count}${active ? ' (active run in progress)' : ''}`);
|
|
1021
|
+
console.log(`Base SHA: ${experiment.base_ref}`);
|
|
1022
|
+
if (best && best.score !== null) {
|
|
1023
|
+
console.log(`Best run: ${best.id} score=${best.score.toFixed(6)} status=${best.status}`);
|
|
1024
|
+
}
|
|
1025
|
+
else {
|
|
1026
|
+
console.log('Best run: none yet');
|
|
1027
|
+
}
|
|
1028
|
+
console.log('');
|
|
1029
|
+
console.log('Recent runs:');
|
|
1030
|
+
if (recent.length === 0) {
|
|
1031
|
+
console.log(' (none)');
|
|
1032
|
+
}
|
|
1033
|
+
else {
|
|
1034
|
+
for (const run of recent) {
|
|
1035
|
+
const score = run.score == null ? '-' : run.score.toFixed(6);
|
|
1036
|
+
console.log(` ${run.id} ${run.status.padEnd(13)} score=${score} parent=${(run.parent_run_id || '-').slice(0, 16)}`);
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
finally {
|
|
1041
|
+
store.close();
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
catch (err) {
|
|
1045
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1046
|
+
process.exit(1);
|
|
1047
|
+
}
|
|
1048
|
+
});
|
|
1049
|
+
exploreCmd
|
|
1050
|
+
.command('pause')
|
|
1051
|
+
.description('Pause an experiment (manual run steps become no-ops)')
|
|
1052
|
+
.argument('<experimentId>', 'Experiment id')
|
|
1053
|
+
.action(async (experimentId) => {
|
|
1054
|
+
const { setExperimentStatus } = await import('./lib/explorer.js');
|
|
1055
|
+
const ok = setExperimentStatus(experimentId, 'paused');
|
|
1056
|
+
if (!ok) {
|
|
1057
|
+
console.error(`Experiment not found: ${experimentId}`);
|
|
1058
|
+
process.exit(1);
|
|
1059
|
+
}
|
|
1060
|
+
console.log(`Paused: ${experimentId}`);
|
|
1061
|
+
});
|
|
1062
|
+
exploreCmd
|
|
1063
|
+
.command('resume')
|
|
1064
|
+
.description('Resume an experiment')
|
|
1065
|
+
.argument('<experimentId>', 'Experiment id')
|
|
1066
|
+
.action(async (experimentId) => {
|
|
1067
|
+
const { setExperimentStatus } = await import('./lib/explorer.js');
|
|
1068
|
+
const ok = setExperimentStatus(experimentId, 'active');
|
|
1069
|
+
if (!ok) {
|
|
1070
|
+
console.error(`Experiment not found: ${experimentId}`);
|
|
1071
|
+
process.exit(1);
|
|
1072
|
+
}
|
|
1073
|
+
console.log(`Resumed: ${experimentId}`);
|
|
1074
|
+
});
|
|
1075
|
+
exploreCmd
|
|
1076
|
+
.command('tick')
|
|
1077
|
+
.description('Run one manual exploration step for an experiment')
|
|
1078
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1079
|
+
.action(async (opts) => {
|
|
1080
|
+
try {
|
|
1081
|
+
const { runAutopilotTick } = await import('./lib/explorer.js');
|
|
1082
|
+
const result = runAutopilotTick(opts.experiment);
|
|
1083
|
+
console.log(JSON.stringify(result, null, 2));
|
|
1084
|
+
if (!result.did_work) {
|
|
1085
|
+
process.exit(result.is_error === true ? 1 : 0);
|
|
1086
|
+
}
|
|
1087
|
+
if (result.status && !['eval_ok'].includes(result.status)) {
|
|
1088
|
+
process.exit(2);
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
catch (err) {
|
|
1092
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1093
|
+
process.exit(1);
|
|
1094
|
+
}
|
|
1095
|
+
});
|
|
1096
|
+
exploreCmd
|
|
1097
|
+
.command('tree')
|
|
1098
|
+
.description('Print run tree view as JSON')
|
|
1099
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1100
|
+
.option('--mode <mode>', 'Tree mode: best_path|full', 'best_path')
|
|
1101
|
+
.action(async (opts) => {
|
|
1102
|
+
try {
|
|
1103
|
+
const mode = String(opts.mode || 'best_path').trim() === 'full' ? 'full' : 'best_path';
|
|
1104
|
+
const { getExperimentTree } = await import('./lib/explorer.js');
|
|
1105
|
+
const tree = getExperimentTree(opts.experiment, mode);
|
|
1106
|
+
console.log(JSON.stringify(tree, null, 2));
|
|
1107
|
+
}
|
|
1108
|
+
catch (err) {
|
|
1109
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1110
|
+
process.exit(1);
|
|
1111
|
+
}
|
|
1112
|
+
});
|
|
1113
|
+
exploreCmd
|
|
1114
|
+
.command('leaderboard')
|
|
1115
|
+
.description('Print top scoring runs for an experiment')
|
|
1116
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1117
|
+
.option('-k, --top <count>', 'Top-K rows', '10')
|
|
1118
|
+
.action(async (opts) => {
|
|
1119
|
+
try {
|
|
1120
|
+
const topParsed = parseInt(opts.top, 10);
|
|
1121
|
+
const topK = Number.isFinite(topParsed) && topParsed > 0 ? topParsed : 10;
|
|
1122
|
+
const { getExperimentLeaderboard } = await import('./lib/explorer.js');
|
|
1123
|
+
const rows = getExperimentLeaderboard(opts.experiment, topK);
|
|
1124
|
+
console.log(JSON.stringify(rows, null, 2));
|
|
1125
|
+
}
|
|
1126
|
+
catch (err) {
|
|
1127
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1128
|
+
process.exit(1);
|
|
1129
|
+
}
|
|
1130
|
+
});
|
|
1131
|
+
exploreCmd
|
|
1132
|
+
.command('gc')
|
|
1133
|
+
.description('Prune old worktrees/artifacts according to retention policy')
|
|
1134
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1135
|
+
.option('--yes', 'Apply GC changes (default is dry-run)')
|
|
1136
|
+
.option('--prune-worktrees', 'Enable worktree pruning', true)
|
|
1137
|
+
.option('--no-prune-worktrees', 'Disable worktree pruning')
|
|
1138
|
+
.option('--prune-artifacts', 'Enable artifact pruning', true)
|
|
1139
|
+
.option('--no-prune-artifacts', 'Disable artifact pruning')
|
|
1140
|
+
.option('--artifacts <mode>', 'Override artifacts mode: all|minimal|none')
|
|
1141
|
+
.option('--keep-last <count>', 'Override keep_last_n')
|
|
1142
|
+
.option('--keep-failed-last <count>', 'Override keep_failed_last_n')
|
|
1143
|
+
.option('--keep-worktrees', 'Override keep_worktrees=true')
|
|
1144
|
+
.option('--keep-best', 'Override keep_best=true')
|
|
1145
|
+
.option('--no-keep-best', 'Override keep_best=false')
|
|
1146
|
+
.option('--after-days <days>', 'Override prune_artifacts_after_days')
|
|
1147
|
+
.option('--max-delete-runs <count>', 'Override max_delete_runs')
|
|
1148
|
+
.action(async (opts) => {
|
|
1149
|
+
try {
|
|
1150
|
+
const retention = {};
|
|
1151
|
+
if (opts.artifacts !== undefined)
|
|
1152
|
+
retention.artifacts = opts.artifacts;
|
|
1153
|
+
if (opts.keepLast !== undefined)
|
|
1154
|
+
retention.keep_last_n = parseInt(opts.keepLast, 10);
|
|
1155
|
+
if (opts.keepFailedLast !== undefined)
|
|
1156
|
+
retention.keep_failed_last_n = parseInt(opts.keepFailedLast, 10);
|
|
1157
|
+
if (opts.keepWorktrees === true)
|
|
1158
|
+
retention.keep_worktrees = true;
|
|
1159
|
+
if (opts.keepBest !== undefined)
|
|
1160
|
+
retention.keep_best = opts.keepBest !== false;
|
|
1161
|
+
if (opts.afterDays !== undefined)
|
|
1162
|
+
retention.prune_artifacts_after_days = parseInt(opts.afterDays, 10);
|
|
1163
|
+
if (opts.maxDeleteRuns !== undefined)
|
|
1164
|
+
retention.max_delete_runs = parseInt(opts.maxDeleteRuns, 10);
|
|
1165
|
+
const { runExperimentGc } = await import('./lib/explorer.js');
|
|
1166
|
+
const plan = runExperimentGc(opts.experiment, {
|
|
1167
|
+
dry_run: opts.yes !== true,
|
|
1168
|
+
prune_worktrees: opts.pruneWorktrees !== false,
|
|
1169
|
+
prune_artifacts: opts.pruneArtifacts !== false,
|
|
1170
|
+
retention: Object.keys(retention).length > 0 ? retention : undefined,
|
|
1171
|
+
});
|
|
1172
|
+
console.log(JSON.stringify(plan, null, 2));
|
|
1173
|
+
}
|
|
1174
|
+
catch (err) {
|
|
1175
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1176
|
+
process.exit(1);
|
|
1177
|
+
}
|
|
1178
|
+
});
|
|
1179
|
+
const exploreRetentionCmd = exploreCmd
|
|
1180
|
+
.command('retention')
|
|
1181
|
+
.description('Get or update retention policy for an experiment');
|
|
1182
|
+
exploreRetentionCmd
|
|
1183
|
+
.command('show')
|
|
1184
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1185
|
+
.action(async (opts) => {
|
|
1186
|
+
try {
|
|
1187
|
+
const { getExperimentOverview } = await import('./lib/explorer.js');
|
|
1188
|
+
const overview = getExperimentOverview(opts.experiment);
|
|
1189
|
+
if (!overview) {
|
|
1190
|
+
console.error(`Experiment not found: ${opts.experiment}`);
|
|
1191
|
+
process.exit(1);
|
|
1192
|
+
}
|
|
1193
|
+
console.log(JSON.stringify(overview.retention_policy, null, 2));
|
|
1194
|
+
}
|
|
1195
|
+
catch (err) {
|
|
1196
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1197
|
+
process.exit(1);
|
|
1198
|
+
}
|
|
1199
|
+
});
|
|
1200
|
+
exploreRetentionCmd
|
|
1201
|
+
.command('set')
|
|
1202
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1203
|
+
.option('--keep-worktrees', 'Set keep_worktrees=true')
|
|
1204
|
+
.option('--no-keep-worktrees', 'Set keep_worktrees=false')
|
|
1205
|
+
.option('--artifacts <mode>', 'Set artifacts mode: all|minimal|none')
|
|
1206
|
+
.option('--keep-last <count>', 'Set keep_last_n')
|
|
1207
|
+
.option('--keep-failed-last <count>', 'Set keep_failed_last_n')
|
|
1208
|
+
.option('--keep-best', 'Set keep_best=true')
|
|
1209
|
+
.option('--no-keep-best', 'Set keep_best=false')
|
|
1210
|
+
.option('--after-days <days>', 'Set prune_artifacts_after_days')
|
|
1211
|
+
.option('--max-delete-runs <count>', 'Set max_delete_runs')
|
|
1212
|
+
.action(async (opts) => {
|
|
1213
|
+
try {
|
|
1214
|
+
const patch = {};
|
|
1215
|
+
if (opts.keepWorktrees !== undefined)
|
|
1216
|
+
patch.keep_worktrees = opts.keepWorktrees;
|
|
1217
|
+
if (opts.artifacts !== undefined)
|
|
1218
|
+
patch.artifacts = opts.artifacts;
|
|
1219
|
+
if (opts.keepLast !== undefined)
|
|
1220
|
+
patch.keep_last_n = parseInt(opts.keepLast, 10);
|
|
1221
|
+
if (opts.keepFailedLast !== undefined)
|
|
1222
|
+
patch.keep_failed_last_n = parseInt(opts.keepFailedLast, 10);
|
|
1223
|
+
if (opts.keepBest !== undefined)
|
|
1224
|
+
patch.keep_best = opts.keepBest;
|
|
1225
|
+
if (opts.afterDays !== undefined)
|
|
1226
|
+
patch.prune_artifacts_after_days = parseInt(opts.afterDays, 10);
|
|
1227
|
+
if (opts.maxDeleteRuns !== undefined)
|
|
1228
|
+
patch.max_delete_runs = parseInt(opts.maxDeleteRuns, 10);
|
|
1229
|
+
if (Object.keys(patch).length === 0) {
|
|
1230
|
+
console.error('Error: no retention fields provided.');
|
|
1231
|
+
process.exit(1);
|
|
1232
|
+
}
|
|
1233
|
+
const { setExperimentRetentionPolicy, getExperimentOverview } = await import('./lib/explorer.js');
|
|
1234
|
+
const ok = setExperimentRetentionPolicy(opts.experiment, patch);
|
|
1235
|
+
if (!ok) {
|
|
1236
|
+
console.error(`Experiment not found: ${opts.experiment}`);
|
|
1237
|
+
process.exit(1);
|
|
1238
|
+
}
|
|
1239
|
+
const overview = getExperimentOverview(opts.experiment);
|
|
1240
|
+
console.log(JSON.stringify(overview?.retention_policy || {}, null, 2));
|
|
1241
|
+
}
|
|
1242
|
+
catch (err) {
|
|
1243
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1244
|
+
process.exit(1);
|
|
1245
|
+
}
|
|
1246
|
+
});
|
|
1247
|
+
exploreCmd
|
|
1248
|
+
.command('compare')
|
|
1249
|
+
.description('Compare one run against best/parent/another run')
|
|
1250
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1251
|
+
.requiredOption('--run <runId>', 'Selected run id')
|
|
1252
|
+
.option('--to <target>', 'Comparison target: best|parent|<runId>', 'best')
|
|
1253
|
+
.option('--diff', 'Include full patch artifact in compare output')
|
|
1254
|
+
.option('--stat', 'Show diff stats (included by default)', true)
|
|
1255
|
+
.action(async (opts) => {
|
|
1256
|
+
try {
|
|
1257
|
+
const { compareRun } = await import('./lib/explorer.js');
|
|
1258
|
+
const result = compareRun({
|
|
1259
|
+
experiment_id: opts.experiment,
|
|
1260
|
+
run_id: opts.run,
|
|
1261
|
+
compare_to: opts.to,
|
|
1262
|
+
include_patch: opts.diff === true,
|
|
1263
|
+
});
|
|
1264
|
+
console.log(JSON.stringify(result, null, 2));
|
|
1265
|
+
}
|
|
1266
|
+
catch (err) {
|
|
1267
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1268
|
+
process.exit(1);
|
|
1269
|
+
}
|
|
1270
|
+
});
|
|
1271
|
+
exploreCmd
|
|
1272
|
+
.command('overview')
|
|
1273
|
+
.description('Print aggregated experiment overview JSON for UI/status panels')
|
|
1274
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1275
|
+
.action(async (opts) => {
|
|
1276
|
+
try {
|
|
1277
|
+
const { getExperimentOverview } = await import('./lib/explorer.js');
|
|
1278
|
+
const overview = getExperimentOverview(opts.experiment);
|
|
1279
|
+
if (!overview) {
|
|
1280
|
+
console.error(`Experiment not found: ${opts.experiment}`);
|
|
1281
|
+
process.exit(1);
|
|
1282
|
+
}
|
|
1283
|
+
console.log(JSON.stringify(overview, null, 2));
|
|
1284
|
+
}
|
|
1285
|
+
catch (err) {
|
|
1286
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1287
|
+
process.exit(1);
|
|
1288
|
+
}
|
|
1289
|
+
});
|
|
1290
|
+
exploreCmd
|
|
1291
|
+
.command('run')
|
|
1292
|
+
.description('Print run details and artifact paths')
|
|
1293
|
+
.requiredOption('--id <runId>', 'Run id')
|
|
1294
|
+
.action(async (opts) => {
|
|
1295
|
+
try {
|
|
1296
|
+
const { getRunDetails } = await import('./lib/explorer.js');
|
|
1297
|
+
const details = getRunDetails(opts.id);
|
|
1298
|
+
if (!details) {
|
|
1299
|
+
console.error(`Run not found: ${opts.id}`);
|
|
1300
|
+
process.exit(1);
|
|
1301
|
+
}
|
|
1302
|
+
console.log(JSON.stringify(details, null, 2));
|
|
1303
|
+
}
|
|
1304
|
+
catch (err) {
|
|
1305
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1306
|
+
process.exit(1);
|
|
1307
|
+
}
|
|
1308
|
+
});
|
|
1309
|
+
exploreCmd
|
|
1310
|
+
.command('mcp')
|
|
1311
|
+
.description('Run the Solution Explorer MCP server over stdio')
|
|
1312
|
+
.option('--db <path>', 'Override explorer DB path')
|
|
1313
|
+
.action(async (opts) => {
|
|
1314
|
+
if (opts.db) {
|
|
1315
|
+
process.env.LABGATE_EXPLORER_DB = (0, path_1.resolve)(opts.db);
|
|
1316
|
+
}
|
|
1317
|
+
const { main } = await import('./lib/explorer-mcp.js');
|
|
1318
|
+
const args = opts.db ? ['--db', (0, path_1.resolve)(opts.db)] : [];
|
|
1319
|
+
await main(args);
|
|
1320
|
+
});
|
|
705
1321
|
// ── labgate license ──────────────────────────────────────
|
|
706
1322
|
const licenseCmd = program
|
|
707
1323
|
.command('license')
|