labgate 0.5.30 → 0.5.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +48 -0
  2. package/dist/cli.js +616 -0
  3. package/dist/cli.js.map +1 -1
  4. package/dist/lib/config.d.ts +11 -0
  5. package/dist/lib/config.js +44 -0
  6. package/dist/lib/config.js.map +1 -1
  7. package/dist/lib/container.d.ts +22 -3
  8. package/dist/lib/container.js +373 -67
  9. package/dist/lib/container.js.map +1 -1
  10. package/dist/lib/display-mcp.d.ts +10 -0
  11. package/dist/lib/display-mcp.js +160 -0
  12. package/dist/lib/display-mcp.js.map +1 -0
  13. package/dist/lib/display-store.d.ts +24 -0
  14. package/dist/lib/display-store.js +150 -0
  15. package/dist/lib/display-store.js.map +1 -0
  16. package/dist/lib/explorer-autopilot.d.ts +16 -0
  17. package/dist/lib/explorer-autopilot.js +573 -0
  18. package/dist/lib/explorer-autopilot.js.map +1 -0
  19. package/dist/lib/explorer-claude.d.ts +16 -0
  20. package/dist/lib/explorer-claude.js +361 -0
  21. package/dist/lib/explorer-claude.js.map +1 -0
  22. package/dist/lib/explorer-compare.d.ts +9 -0
  23. package/dist/lib/explorer-compare.js +190 -0
  24. package/dist/lib/explorer-compare.js.map +1 -0
  25. package/dist/lib/explorer-eval.d.ts +23 -0
  26. package/dist/lib/explorer-eval.js +161 -0
  27. package/dist/lib/explorer-eval.js.map +1 -0
  28. package/dist/lib/explorer-gc.d.ts +11 -0
  29. package/dist/lib/explorer-gc.js +304 -0
  30. package/dist/lib/explorer-gc.js.map +1 -0
  31. package/dist/lib/explorer-git.d.ts +14 -0
  32. package/dist/lib/explorer-git.js +136 -0
  33. package/dist/lib/explorer-git.js.map +1 -0
  34. package/dist/lib/explorer-lock.d.ts +5 -0
  35. package/dist/lib/explorer-lock.js +100 -0
  36. package/dist/lib/explorer-lock.js.map +1 -0
  37. package/dist/lib/explorer-mcp.d.ts +11 -0
  38. package/dist/lib/explorer-mcp.js +611 -0
  39. package/dist/lib/explorer-mcp.js.map +1 -0
  40. package/dist/lib/explorer-retention.d.ts +4 -0
  41. package/dist/lib/explorer-retention.js +58 -0
  42. package/dist/lib/explorer-retention.js.map +1 -0
  43. package/dist/lib/explorer-store.d.ts +77 -0
  44. package/dist/lib/explorer-store.js +950 -0
  45. package/dist/lib/explorer-store.js.map +1 -0
  46. package/dist/lib/explorer-types.d.ts +161 -0
  47. package/dist/lib/explorer-types.js +3 -0
  48. package/dist/lib/explorer-types.js.map +1 -0
  49. package/dist/lib/explorer.d.ts +31 -0
  50. package/dist/lib/explorer.js +247 -0
  51. package/dist/lib/explorer.js.map +1 -0
  52. package/dist/lib/results-mcp.d.ts +2 -2
  53. package/dist/lib/results-mcp.js +26 -4
  54. package/dist/lib/results-mcp.js.map +1 -1
  55. package/dist/lib/results-store.d.ts +1 -0
  56. package/dist/lib/results-store.js +87 -3
  57. package/dist/lib/results-store.js.map +1 -1
  58. package/dist/lib/runtime.d.ts +6 -0
  59. package/dist/lib/runtime.js +46 -19
  60. package/dist/lib/runtime.js.map +1 -1
  61. package/dist/lib/test/integration-harness.js +1 -1
  62. package/dist/lib/test/integration-harness.js.map +1 -1
  63. package/dist/lib/ui.d.ts +1 -0
  64. package/dist/lib/ui.html +11231 -4370
  65. package/dist/lib/ui.js +2564 -277
  66. package/dist/lib/ui.js.map +1 -1
  67. package/dist/lib/web-terminal.d.ts +13 -0
  68. package/dist/lib/web-terminal.js +118 -15
  69. package/dist/lib/web-terminal.js.map +1 -1
  70. package/dist/mcp-bundles/display-mcp.bundle.mjs +30209 -0
  71. package/dist/mcp-bundles/explorer-mcp.bundle.mjs +40044 -0
  72. package/dist/mcp-bundles/results-mcp.bundle.mjs +100 -7
  73. package/package.json +4 -3
  74. package/templates/tsp-lab/API_CONTRACT.md +20 -0
  75. package/templates/tsp-lab/EVAL.md +20 -0
  76. package/templates/tsp-lab/PROBLEM.md +18 -0
  77. package/templates/tsp-lab/data/generate_instances.py +51 -0
  78. package/templates/tsp-lab/data/instances.jsonl +12 -0
  79. package/templates/tsp-lab/eval.py +148 -0
  80. package/templates/tsp-lab/solver.py +88 -0
  81. package/templates/tsp-lab/stub-patches/enable_two_opt.patch +14 -0
package/dist/cli.js CHANGED
@@ -37,6 +37,7 @@ const commander_1 = require("commander");
37
37
  const path_1 = require("path");
38
38
  const fs_1 = require("fs");
39
39
  const os_1 = require("os");
40
+ const child_process_1 = require("child_process");
40
41
  const net_1 = require("net");
41
42
  const readline_1 = require("readline");
42
43
  const config_js_1 = require("./lib/config.js");
@@ -378,11 +379,13 @@ program
378
379
  process.exit(1);
379
380
  }
380
381
  const { startUI } = await import('./lib/ui.js');
382
+ const prewarmImageOnStartup = process.env.LABGATE_UI_PREWARM_ON_START === '1';
381
383
  const server = startUI({
382
384
  port: Number.isFinite(parsedPort) ? parsedPort : undefined,
383
385
  socketPath: hasSocket ? socketInput : undefined,
384
386
  listenAddress: hasListenAddress ? listenAddressInput : undefined,
385
387
  standalone: true,
388
+ prewarmImageOnStartup,
386
389
  });
387
390
  // In standalone mode, keep the process alive
388
391
  server.ref();
@@ -395,6 +398,86 @@ program
395
398
  const { listSessions } = await import('./lib/container.js');
396
399
  await listSessions();
397
400
  });
401
+ // ── labgate continue [id-or-name] ─────────────────────────
402
+ program
403
+ .command('continue')
404
+ .description('Attach to a running web terminal session (id, prefix, or custom name)')
405
+ .argument('[idOrName]', 'Web terminal session id, id prefix, or custom name')
406
+ .option('--latest', 'Attach to the most recent runnable local web terminal session')
407
+ .action(async (idOrName, opts) => {
408
+ const web = await import('./lib/web-terminal.js');
409
+ const { getTmuxBinary, hasTmuxSession, listWebTerminalRecords, resolveWebTerminalRecord } = web;
410
+ const query = (idOrName || '').trim();
411
+ const localNode = (0, os_1.hostname)();
412
+ const isInteractive = !!(process.stdin.isTTY && process.stdout.isTTY);
413
+ if (!query && !opts.latest && !isInteractive) {
414
+ console.error('No session id provided in non-interactive mode. Pass a session id/name or use `--latest`.');
415
+ process.exit(1);
416
+ }
417
+ let target = null;
418
+ if (query) {
419
+ const resolved = resolveWebTerminalRecord(query);
420
+ const localMatches = resolved.matches.filter((record) => record.node === localNode);
421
+ if (resolved.record && resolved.record.node === localNode) {
422
+ target = resolved.record;
423
+ }
424
+ else if (localMatches.length === 1) {
425
+ target = localMatches[0];
426
+ }
427
+ else if (localMatches.length > 1) {
428
+ console.error(`Ambiguous session selector "${query}". Matches:`);
429
+ for (const match of localMatches) {
430
+ const name = match.name ? ` (${match.name})` : '';
431
+ console.error(` - ${match.id}${name}`);
432
+ }
433
+ process.exit(1);
434
+ }
435
+ else if (resolved.record && resolved.record.node !== localNode) {
436
+ console.error(`Session "${query}" was found on node "${resolved.record.node}". ` +
437
+ `Run \`labgate continue\` on that node.`);
438
+ process.exit(1);
439
+ }
440
+ else {
441
+ console.error(`Session "${query}" not found.`);
442
+ process.exit(1);
443
+ }
444
+ }
445
+ if (!target) {
446
+ const localRecords = listWebTerminalRecords().filter((record) => record.node === localNode);
447
+ for (const record of localRecords) {
448
+ if (await hasTmuxSession(record.tmuxSession)) {
449
+ target = record;
450
+ break;
451
+ }
452
+ }
453
+ if (!target) {
454
+ console.error('No runnable local web terminal session found. Start one in `labgate ui` first.');
455
+ process.exit(1);
456
+ }
457
+ }
458
+ else if (!await hasTmuxSession(target.tmuxSession)) {
459
+ console.error(`Session "${target.id}" is not running.`);
460
+ process.exit(1);
461
+ }
462
+ console.error('Attaching terminal. Tip: press Ctrl+b then d to detach and keep the session running for Web UI reconnect. ' +
463
+ 'Ctrl+C sends SIGINT to the active process.');
464
+ const tmuxBin = await getTmuxBinary();
465
+ const attached = (0, child_process_1.spawnSync)(tmuxBin, ['attach-session', '-d', '-t', target.tmuxSession], {
466
+ stdio: 'inherit',
467
+ });
468
+ if (attached.error) {
469
+ console.error(`Failed to run tmux attach-session: ${attached.error.message}`);
470
+ process.exit(1);
471
+ }
472
+ if (attached.status !== null) {
473
+ process.exit(attached.status);
474
+ }
475
+ if (attached.signal) {
476
+ const signalNum = os_1.constants.signals[attached.signal];
477
+ process.exit(typeof signalNum === 'number' ? 128 + signalNum : 1);
478
+ }
479
+ process.exit(1);
480
+ });
398
481
  // ── labgate stop <id> ─────────────────────────────────────
399
482
  program
400
483
  .command('stop')
@@ -702,6 +785,539 @@ datasetCmd
702
785
  console.log(` Path: ${hostPath}`);
703
786
  console.log(` Scanned: ${stats.scanned_at}`);
704
787
  });
788
+ // ── labgate explore ─────────────────────────────────────
789
+ const exploreCmd = program
790
+ .command('explore')
791
+ .description('Solution Explorer commands');
792
+ exploreCmd
793
+ .command('create')
794
+ .description('Create a new Solution Explorer experiment from an existing git repository')
795
+ .requiredOption('--name <name>', 'Experiment name')
796
+ .requiredOption('--repo <path>', 'Source repository path to clone for isolated exploration')
797
+ .requiredOption('--eval <command>', 'Evaluation command (must print JSON as last stdout line)')
798
+ .option('--base-ref <ref>', 'Base git ref (default: HEAD)', 'HEAD')
799
+ .option('--timeout <sec>', 'Evaluation timeout seconds', '120')
800
+ .option('--epsilon <value>', 'Exploration epsilon (0..1)', '0.15')
801
+ .option('--top-n <count>', 'Top-N pool for epsilon exploration', '5')
802
+ .option('--max-runs <count>', 'Optional max run budget')
803
+ .option('--agent-mode <mode>', 'Agent mode: stub|claude_headless', 'stub')
804
+ .option('--stub-patch <path>', 'Patch file path for stub agent mode')
805
+ .option('--claude-resume-session <id>', 'Optional Claude session id to resume in claude_headless mode')
806
+ .option('--claude-timeout <sec>', 'Optional Claude headless timeout seconds (60..14400)')
807
+ .option('--keep-worktrees', 'Retention policy: keep run worktrees instead of pruning')
808
+ .option('--artifacts-policy <mode>', 'Retention artifacts mode: all|minimal|none', 'minimal')
809
+ .option('--keep-last <count>', 'Retention keep_last_n', '50')
810
+ .option('--keep-failed-last <count>', 'Retention keep_failed_last_n', '20')
811
+ .option('--keep-best', 'Retention keep best run protected', true)
812
+ .option('--no-keep-best', 'Retention do not specially protect best run')
813
+ .option('--prune-after-days <days>', 'Retention prune_artifacts_after_days')
814
+ .option('--max-delete-runs <count>', 'Retention max_delete_runs', '200')
815
+ .option('--skip-baseline', 'Skip baseline evaluation check after experiment creation')
816
+ .action(async (opts) => {
817
+ try {
818
+ const timeout = parseInt(opts.timeout, 10);
819
+ if (!Number.isFinite(timeout) || timeout < 5 || timeout > 86400) {
820
+ console.error('Error: --timeout must be an integer between 5 and 86400.');
821
+ process.exit(1);
822
+ }
823
+ const epsilon = Number(opts.epsilon);
824
+ if (!Number.isFinite(epsilon) || epsilon < 0 || epsilon > 1) {
825
+ console.error('Error: --epsilon must be a number between 0 and 1.');
826
+ process.exit(1);
827
+ }
828
+ const topN = parseInt(opts.topN, 10);
829
+ if (!Number.isFinite(topN) || topN < 1 || topN > 100) {
830
+ console.error('Error: --top-n must be an integer between 1 and 100.');
831
+ process.exit(1);
832
+ }
833
+ let maxRuns;
834
+ if (opts.maxRuns !== undefined) {
835
+ const parsed = parseInt(opts.maxRuns, 10);
836
+ if (!Number.isFinite(parsed) || parsed < 1) {
837
+ console.error('Error: --max-runs must be an integer >= 1.');
838
+ process.exit(1);
839
+ }
840
+ maxRuns = parsed;
841
+ }
842
+ const agentModeRaw = String(opts.agentMode || 'stub').trim().toLowerCase();
843
+ const agentMode = agentModeRaw === 'claude' ? 'claude_headless' : agentModeRaw;
844
+ if (agentMode !== 'stub' && agentMode !== 'claude_headless') {
845
+ console.error('Error: --agent-mode must be one of: stub, claude_headless.');
846
+ process.exit(1);
847
+ }
848
+ let claudeTimeout;
849
+ if (opts.claudeTimeout !== undefined) {
850
+ const parsed = parseInt(opts.claudeTimeout, 10);
851
+ if (!Number.isFinite(parsed) || parsed < 60 || parsed > 4 * 60 * 60) {
852
+ console.error('Error: --claude-timeout must be an integer between 60 and 14400.');
853
+ process.exit(1);
854
+ }
855
+ claudeTimeout = parsed;
856
+ }
857
+ const artifactsPolicy = String(opts.artifactsPolicy || 'minimal').trim().toLowerCase();
858
+ if (!['all', 'minimal', 'none'].includes(artifactsPolicy)) {
859
+ console.error('Error: --artifacts-policy must be one of: all, minimal, none.');
860
+ process.exit(1);
861
+ }
862
+ const keepLast = parseInt(opts.keepLast, 10);
863
+ const keepFailedLast = parseInt(opts.keepFailedLast, 10);
864
+ const maxDeleteRuns = parseInt(opts.maxDeleteRuns, 10);
865
+ if (!Number.isFinite(keepLast) || keepLast < 0) {
866
+ console.error('Error: --keep-last must be an integer >= 0.');
867
+ process.exit(1);
868
+ }
869
+ if (!Number.isFinite(keepFailedLast) || keepFailedLast < 0) {
870
+ console.error('Error: --keep-failed-last must be an integer >= 0.');
871
+ process.exit(1);
872
+ }
873
+ if (!Number.isFinite(maxDeleteRuns) || maxDeleteRuns < 1) {
874
+ console.error('Error: --max-delete-runs must be an integer >= 1.');
875
+ process.exit(1);
876
+ }
877
+ let pruneAfterDays;
878
+ if (opts.pruneAfterDays !== undefined) {
879
+ const parsed = parseInt(opts.pruneAfterDays, 10);
880
+ if (!Number.isFinite(parsed) || parsed < 1) {
881
+ console.error('Error: --prune-after-days must be an integer >= 1.');
882
+ process.exit(1);
883
+ }
884
+ pruneAfterDays = parsed;
885
+ }
886
+ const policy = {
887
+ epsilon,
888
+ top_n: topN,
889
+ agent_mode: agentMode,
890
+ };
891
+ if (maxRuns !== undefined)
892
+ policy.max_runs = maxRuns;
893
+ if (opts.stubPatch)
894
+ policy.stub_patch_file = opts.stubPatch;
895
+ if (agentMode === 'claude_headless') {
896
+ const resumeId = String(opts.claudeResumeSession || '').trim();
897
+ if (resumeId)
898
+ policy.claude_resume_session_id = resumeId;
899
+ if (claudeTimeout !== undefined)
900
+ policy.claude_timeout_sec = claudeTimeout;
901
+ }
902
+ const retention = {
903
+ keep_worktrees: !!opts.keepWorktrees,
904
+ artifacts: artifactsPolicy,
905
+ keep_last_n: keepLast,
906
+ keep_best: opts.keepBest !== false,
907
+ keep_failed_last_n: keepFailedLast,
908
+ ...(pruneAfterDays !== undefined ? { prune_artifacts_after_days: pruneAfterDays } : {}),
909
+ max_delete_runs: maxDeleteRuns,
910
+ };
911
+ const { createExplorerExperiment } = await import('./lib/explorer.js');
912
+ const { runEvaluation } = await import('./lib/explorer-eval.js');
913
+ const { getExplorerArtifactDir } = await import('./lib/config.js');
914
+ const { ExplorerStore } = await import('./lib/explorer-store.js');
915
+ const experiment = createExplorerExperiment({
916
+ name: opts.name,
917
+ source_repo_path: opts.repo,
918
+ eval_command: opts.eval,
919
+ base_ref: opts.baseRef,
920
+ eval_timeout_sec: timeout,
921
+ policy,
922
+ retention,
923
+ });
924
+ console.log(`Experiment created: ${experiment.id}`);
925
+ console.log(`Name: ${experiment.name}`);
926
+ console.log(`Repo clone: ${experiment.repo_path}`);
927
+ console.log(`Base SHA: ${experiment.base_ref}`);
928
+ if (!opts.skipBaseline) {
929
+ const baselineArtifactDir = getExplorerArtifactDir(experiment.id, 'baseline');
930
+ const baseline = runEvaluation({
931
+ worktree_path: experiment.repo_path,
932
+ eval_command: experiment.eval_command,
933
+ timeout_sec: experiment.eval_timeout_sec,
934
+ artifact_dir: baselineArtifactDir,
935
+ });
936
+ const store = new ExplorerStore();
937
+ try {
938
+ store.createEvent(experiment.id, 'note', {
939
+ message: 'baseline evaluation',
940
+ status: baseline.status,
941
+ score: baseline.score ?? null,
942
+ artifact_dir: baselineArtifactDir,
943
+ error: baseline.error || null,
944
+ });
945
+ }
946
+ finally {
947
+ store.close();
948
+ }
949
+ if (baseline.status === 'eval_ok' && Number.isFinite(baseline.score)) {
950
+ console.log(`Baseline score: ${baseline.score.toFixed(6)}`);
951
+ console.log(`Baseline artifacts: ${baselineArtifactDir}`);
952
+ }
953
+ else {
954
+ console.error(`Baseline eval status: ${baseline.status}`);
955
+ if (baseline.error)
956
+ console.error(`Baseline eval error: ${baseline.error}`);
957
+ console.error(`Baseline logs: ${baseline.stdout_path}, ${baseline.stderr_path}`);
958
+ }
959
+ }
960
+ }
961
+ catch (err) {
962
+ console.error(`Error: ${err?.message ?? String(err)}`);
963
+ process.exit(1);
964
+ }
965
+ });
966
+ exploreCmd
967
+ .command('list')
968
+ .description('List existing Solution Explorer experiments')
969
+ .option('-n, --limit <count>', 'Max experiments to list', '50')
970
+ .option('--offset <count>', 'Offset for pagination', '0')
971
+ .action(async (opts) => {
972
+ try {
973
+ const limit = parseInt(opts.limit, 10);
974
+ const offset = parseInt(opts.offset, 10);
975
+ const { ExplorerStore } = await import('./lib/explorer-store.js');
976
+ const store = new ExplorerStore();
977
+ try {
978
+ const rows = store.listExperiments(Number.isFinite(limit) ? limit : 50, Number.isFinite(offset) ? offset : 0);
979
+ if (rows.length === 0) {
980
+ console.log('No experiments found.');
981
+ return;
982
+ }
983
+ for (const row of rows) {
984
+ console.log(`${row.id} ${row.status.padEnd(8)} ${row.name} base=${row.base_ref.slice(0, 12)} created=${row.created_at}`);
985
+ }
986
+ }
987
+ finally {
988
+ store.close();
989
+ }
990
+ }
991
+ catch (err) {
992
+ console.error(`Error: ${err?.message ?? String(err)}`);
993
+ process.exit(1);
994
+ }
995
+ });
996
+ exploreCmd
997
+ .command('status')
998
+ .description('Show experiment status, best run, and recent runs')
999
+ .argument('<experimentId>', 'Experiment id')
1000
+ .option('-n, --limit <count>', 'How many recent runs to show', '10')
1001
+ .action(async (experimentId, opts) => {
1002
+ try {
1003
+ const limitParsed = parseInt(opts.limit, 10);
1004
+ const limit = Number.isFinite(limitParsed) && limitParsed > 0 ? limitParsed : 10;
1005
+ const { ExplorerStore } = await import('./lib/explorer-store.js');
1006
+ const store = new ExplorerStore();
1007
+ try {
1008
+ const experiment = store.getExperiment(experimentId);
1009
+ if (!experiment) {
1010
+ console.error(`Experiment not found: ${experimentId}`);
1011
+ process.exit(1);
1012
+ }
1013
+ const best = store.getBestRun(experimentId);
1014
+ const count = store.getRunCount(experimentId);
1015
+ const active = store.hasActiveRun(experimentId);
1016
+ const recent = store.listRuns(experimentId, { limit, offset: 0 });
1017
+ console.log(`Experiment: ${experiment.id}`);
1018
+ console.log(`Name: ${experiment.name}`);
1019
+ console.log(`Status: ${experiment.status}`);
1020
+ console.log(`Runs: ${count}${active ? ' (active run in progress)' : ''}`);
1021
+ console.log(`Base SHA: ${experiment.base_ref}`);
1022
+ if (best && best.score !== null) {
1023
+ console.log(`Best run: ${best.id} score=${best.score.toFixed(6)} status=${best.status}`);
1024
+ }
1025
+ else {
1026
+ console.log('Best run: none yet');
1027
+ }
1028
+ console.log('');
1029
+ console.log('Recent runs:');
1030
+ if (recent.length === 0) {
1031
+ console.log(' (none)');
1032
+ }
1033
+ else {
1034
+ for (const run of recent) {
1035
+ const score = run.score == null ? '-' : run.score.toFixed(6);
1036
+ console.log(` ${run.id} ${run.status.padEnd(13)} score=${score} parent=${(run.parent_run_id || '-').slice(0, 16)}`);
1037
+ }
1038
+ }
1039
+ }
1040
+ finally {
1041
+ store.close();
1042
+ }
1043
+ }
1044
+ catch (err) {
1045
+ console.error(`Error: ${err?.message ?? String(err)}`);
1046
+ process.exit(1);
1047
+ }
1048
+ });
1049
+ exploreCmd
1050
+ .command('pause')
1051
+ .description('Pause an experiment (manual run steps become no-ops)')
1052
+ .argument('<experimentId>', 'Experiment id')
1053
+ .action(async (experimentId) => {
1054
+ const { setExperimentStatus } = await import('./lib/explorer.js');
1055
+ const ok = setExperimentStatus(experimentId, 'paused');
1056
+ if (!ok) {
1057
+ console.error(`Experiment not found: ${experimentId}`);
1058
+ process.exit(1);
1059
+ }
1060
+ console.log(`Paused: ${experimentId}`);
1061
+ });
1062
+ exploreCmd
1063
+ .command('resume')
1064
+ .description('Resume an experiment')
1065
+ .argument('<experimentId>', 'Experiment id')
1066
+ .action(async (experimentId) => {
1067
+ const { setExperimentStatus } = await import('./lib/explorer.js');
1068
+ const ok = setExperimentStatus(experimentId, 'active');
1069
+ if (!ok) {
1070
+ console.error(`Experiment not found: ${experimentId}`);
1071
+ process.exit(1);
1072
+ }
1073
+ console.log(`Resumed: ${experimentId}`);
1074
+ });
1075
+ exploreCmd
1076
+ .command('tick')
1077
+ .description('Run one manual exploration step for an experiment')
1078
+ .requiredOption('--experiment <id>', 'Experiment id')
1079
+ .action(async (opts) => {
1080
+ try {
1081
+ const { runAutopilotTick } = await import('./lib/explorer.js');
1082
+ const result = runAutopilotTick(opts.experiment);
1083
+ console.log(JSON.stringify(result, null, 2));
1084
+ if (!result.did_work) {
1085
+ process.exit(result.is_error === true ? 1 : 0);
1086
+ }
1087
+ if (result.status && !['eval_ok'].includes(result.status)) {
1088
+ process.exit(2);
1089
+ }
1090
+ }
1091
+ catch (err) {
1092
+ console.error(`Error: ${err?.message ?? String(err)}`);
1093
+ process.exit(1);
1094
+ }
1095
+ });
1096
+ exploreCmd
1097
+ .command('tree')
1098
+ .description('Print run tree view as JSON')
1099
+ .requiredOption('--experiment <id>', 'Experiment id')
1100
+ .option('--mode <mode>', 'Tree mode: best_path|full', 'best_path')
1101
+ .action(async (opts) => {
1102
+ try {
1103
+ const mode = String(opts.mode || 'best_path').trim() === 'full' ? 'full' : 'best_path';
1104
+ const { getExperimentTree } = await import('./lib/explorer.js');
1105
+ const tree = getExperimentTree(opts.experiment, mode);
1106
+ console.log(JSON.stringify(tree, null, 2));
1107
+ }
1108
+ catch (err) {
1109
+ console.error(`Error: ${err?.message ?? String(err)}`);
1110
+ process.exit(1);
1111
+ }
1112
+ });
1113
+ exploreCmd
1114
+ .command('leaderboard')
1115
+ .description('Print top scoring runs for an experiment')
1116
+ .requiredOption('--experiment <id>', 'Experiment id')
1117
+ .option('-k, --top <count>', 'Top-K rows', '10')
1118
+ .action(async (opts) => {
1119
+ try {
1120
+ const topParsed = parseInt(opts.top, 10);
1121
+ const topK = Number.isFinite(topParsed) && topParsed > 0 ? topParsed : 10;
1122
+ const { getExperimentLeaderboard } = await import('./lib/explorer.js');
1123
+ const rows = getExperimentLeaderboard(opts.experiment, topK);
1124
+ console.log(JSON.stringify(rows, null, 2));
1125
+ }
1126
+ catch (err) {
1127
+ console.error(`Error: ${err?.message ?? String(err)}`);
1128
+ process.exit(1);
1129
+ }
1130
+ });
1131
+ exploreCmd
1132
+ .command('gc')
1133
+ .description('Prune old worktrees/artifacts according to retention policy')
1134
+ .requiredOption('--experiment <id>', 'Experiment id')
1135
+ .option('--yes', 'Apply GC changes (default is dry-run)')
1136
+ .option('--prune-worktrees', 'Enable worktree pruning', true)
1137
+ .option('--no-prune-worktrees', 'Disable worktree pruning')
1138
+ .option('--prune-artifacts', 'Enable artifact pruning', true)
1139
+ .option('--no-prune-artifacts', 'Disable artifact pruning')
1140
+ .option('--artifacts <mode>', 'Override artifacts mode: all|minimal|none')
1141
+ .option('--keep-last <count>', 'Override keep_last_n')
1142
+ .option('--keep-failed-last <count>', 'Override keep_failed_last_n')
1143
+ .option('--keep-worktrees', 'Override keep_worktrees=true')
1144
+ .option('--keep-best', 'Override keep_best=true')
1145
+ .option('--no-keep-best', 'Override keep_best=false')
1146
+ .option('--after-days <days>', 'Override prune_artifacts_after_days')
1147
+ .option('--max-delete-runs <count>', 'Override max_delete_runs')
1148
+ .action(async (opts) => {
1149
+ try {
1150
+ const retention = {};
1151
+ if (opts.artifacts !== undefined)
1152
+ retention.artifacts = opts.artifacts;
1153
+ if (opts.keepLast !== undefined)
1154
+ retention.keep_last_n = parseInt(opts.keepLast, 10);
1155
+ if (opts.keepFailedLast !== undefined)
1156
+ retention.keep_failed_last_n = parseInt(opts.keepFailedLast, 10);
1157
+ if (opts.keepWorktrees === true)
1158
+ retention.keep_worktrees = true;
1159
+ if (opts.keepBest !== undefined)
1160
+ retention.keep_best = opts.keepBest !== false;
1161
+ if (opts.afterDays !== undefined)
1162
+ retention.prune_artifacts_after_days = parseInt(opts.afterDays, 10);
1163
+ if (opts.maxDeleteRuns !== undefined)
1164
+ retention.max_delete_runs = parseInt(opts.maxDeleteRuns, 10);
1165
+ const { runExperimentGc } = await import('./lib/explorer.js');
1166
+ const plan = runExperimentGc(opts.experiment, {
1167
+ dry_run: opts.yes !== true,
1168
+ prune_worktrees: opts.pruneWorktrees !== false,
1169
+ prune_artifacts: opts.pruneArtifacts !== false,
1170
+ retention: Object.keys(retention).length > 0 ? retention : undefined,
1171
+ });
1172
+ console.log(JSON.stringify(plan, null, 2));
1173
+ }
1174
+ catch (err) {
1175
+ console.error(`Error: ${err?.message ?? String(err)}`);
1176
+ process.exit(1);
1177
+ }
1178
+ });
1179
+ const exploreRetentionCmd = exploreCmd
1180
+ .command('retention')
1181
+ .description('Get or update retention policy for an experiment');
1182
+ exploreRetentionCmd
1183
+ .command('show')
1184
+ .requiredOption('--experiment <id>', 'Experiment id')
1185
+ .action(async (opts) => {
1186
+ try {
1187
+ const { getExperimentOverview } = await import('./lib/explorer.js');
1188
+ const overview = getExperimentOverview(opts.experiment);
1189
+ if (!overview) {
1190
+ console.error(`Experiment not found: ${opts.experiment}`);
1191
+ process.exit(1);
1192
+ }
1193
+ console.log(JSON.stringify(overview.retention_policy, null, 2));
1194
+ }
1195
+ catch (err) {
1196
+ console.error(`Error: ${err?.message ?? String(err)}`);
1197
+ process.exit(1);
1198
+ }
1199
+ });
1200
+ exploreRetentionCmd
1201
+ .command('set')
1202
+ .requiredOption('--experiment <id>', 'Experiment id')
1203
+ .option('--keep-worktrees', 'Set keep_worktrees=true')
1204
+ .option('--no-keep-worktrees', 'Set keep_worktrees=false')
1205
+ .option('--artifacts <mode>', 'Set artifacts mode: all|minimal|none')
1206
+ .option('--keep-last <count>', 'Set keep_last_n')
1207
+ .option('--keep-failed-last <count>', 'Set keep_failed_last_n')
1208
+ .option('--keep-best', 'Set keep_best=true')
1209
+ .option('--no-keep-best', 'Set keep_best=false')
1210
+ .option('--after-days <days>', 'Set prune_artifacts_after_days')
1211
+ .option('--max-delete-runs <count>', 'Set max_delete_runs')
1212
+ .action(async (opts) => {
1213
+ try {
1214
+ const patch = {};
1215
+ if (opts.keepWorktrees !== undefined)
1216
+ patch.keep_worktrees = opts.keepWorktrees;
1217
+ if (opts.artifacts !== undefined)
1218
+ patch.artifacts = opts.artifacts;
1219
+ if (opts.keepLast !== undefined)
1220
+ patch.keep_last_n = parseInt(opts.keepLast, 10);
1221
+ if (opts.keepFailedLast !== undefined)
1222
+ patch.keep_failed_last_n = parseInt(opts.keepFailedLast, 10);
1223
+ if (opts.keepBest !== undefined)
1224
+ patch.keep_best = opts.keepBest;
1225
+ if (opts.afterDays !== undefined)
1226
+ patch.prune_artifacts_after_days = parseInt(opts.afterDays, 10);
1227
+ if (opts.maxDeleteRuns !== undefined)
1228
+ patch.max_delete_runs = parseInt(opts.maxDeleteRuns, 10);
1229
+ if (Object.keys(patch).length === 0) {
1230
+ console.error('Error: no retention fields provided.');
1231
+ process.exit(1);
1232
+ }
1233
+ const { setExperimentRetentionPolicy, getExperimentOverview } = await import('./lib/explorer.js');
1234
+ const ok = setExperimentRetentionPolicy(opts.experiment, patch);
1235
+ if (!ok) {
1236
+ console.error(`Experiment not found: ${opts.experiment}`);
1237
+ process.exit(1);
1238
+ }
1239
+ const overview = getExperimentOverview(opts.experiment);
1240
+ console.log(JSON.stringify(overview?.retention_policy || {}, null, 2));
1241
+ }
1242
+ catch (err) {
1243
+ console.error(`Error: ${err?.message ?? String(err)}`);
1244
+ process.exit(1);
1245
+ }
1246
+ });
1247
+ exploreCmd
1248
+ .command('compare')
1249
+ .description('Compare one run against best/parent/another run')
1250
+ .requiredOption('--experiment <id>', 'Experiment id')
1251
+ .requiredOption('--run <runId>', 'Selected run id')
1252
+ .option('--to <target>', 'Comparison target: best|parent|<runId>', 'best')
1253
+ .option('--diff', 'Include full patch artifact in compare output')
1254
+ .option('--stat', 'Show diff stats (included by default)', true)
1255
+ .action(async (opts) => {
1256
+ try {
1257
+ const { compareRun } = await import('./lib/explorer.js');
1258
+ const result = compareRun({
1259
+ experiment_id: opts.experiment,
1260
+ run_id: opts.run,
1261
+ compare_to: opts.to,
1262
+ include_patch: opts.diff === true,
1263
+ });
1264
+ console.log(JSON.stringify(result, null, 2));
1265
+ }
1266
+ catch (err) {
1267
+ console.error(`Error: ${err?.message ?? String(err)}`);
1268
+ process.exit(1);
1269
+ }
1270
+ });
1271
+ exploreCmd
1272
+ .command('overview')
1273
+ .description('Print aggregated experiment overview JSON for UI/status panels')
1274
+ .requiredOption('--experiment <id>', 'Experiment id')
1275
+ .action(async (opts) => {
1276
+ try {
1277
+ const { getExperimentOverview } = await import('./lib/explorer.js');
1278
+ const overview = getExperimentOverview(opts.experiment);
1279
+ if (!overview) {
1280
+ console.error(`Experiment not found: ${opts.experiment}`);
1281
+ process.exit(1);
1282
+ }
1283
+ console.log(JSON.stringify(overview, null, 2));
1284
+ }
1285
+ catch (err) {
1286
+ console.error(`Error: ${err?.message ?? String(err)}`);
1287
+ process.exit(1);
1288
+ }
1289
+ });
1290
+ exploreCmd
1291
+ .command('run')
1292
+ .description('Print run details and artifact paths')
1293
+ .requiredOption('--id <runId>', 'Run id')
1294
+ .action(async (opts) => {
1295
+ try {
1296
+ const { getRunDetails } = await import('./lib/explorer.js');
1297
+ const details = getRunDetails(opts.id);
1298
+ if (!details) {
1299
+ console.error(`Run not found: ${opts.id}`);
1300
+ process.exit(1);
1301
+ }
1302
+ console.log(JSON.stringify(details, null, 2));
1303
+ }
1304
+ catch (err) {
1305
+ console.error(`Error: ${err?.message ?? String(err)}`);
1306
+ process.exit(1);
1307
+ }
1308
+ });
1309
+ exploreCmd
1310
+ .command('mcp')
1311
+ .description('Run the Solution Explorer MCP server over stdio')
1312
+ .option('--db <path>', 'Override explorer DB path')
1313
+ .action(async (opts) => {
1314
+ if (opts.db) {
1315
+ process.env.LABGATE_EXPLORER_DB = (0, path_1.resolve)(opts.db);
1316
+ }
1317
+ const { main } = await import('./lib/explorer-mcp.js');
1318
+ const args = opts.db ? ['--db', (0, path_1.resolve)(opts.db)] : [];
1319
+ await main(args);
1320
+ });
705
1321
  // ── labgate license ──────────────────────────────────────
706
1322
  const licenseCmd = program
707
1323
  .command('license')