labgate 0.5.31 → 0.5.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -0
- package/dist/cli.js +533 -0
- package/dist/cli.js.map +1 -1
- package/dist/lib/config.d.ts +11 -0
- package/dist/lib/config.js +44 -0
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/container.d.ts +3 -3
- package/dist/lib/container.js +117 -8
- package/dist/lib/container.js.map +1 -1
- package/dist/lib/display-mcp.d.ts +10 -0
- package/dist/lib/display-mcp.js +160 -0
- package/dist/lib/display-mcp.js.map +1 -0
- package/dist/lib/display-store.d.ts +24 -0
- package/dist/lib/display-store.js +150 -0
- package/dist/lib/display-store.js.map +1 -0
- package/dist/lib/explorer-autopilot.d.ts +16 -0
- package/dist/lib/explorer-autopilot.js +573 -0
- package/dist/lib/explorer-autopilot.js.map +1 -0
- package/dist/lib/explorer-claude.d.ts +16 -0
- package/dist/lib/explorer-claude.js +361 -0
- package/dist/lib/explorer-claude.js.map +1 -0
- package/dist/lib/explorer-compare.d.ts +9 -0
- package/dist/lib/explorer-compare.js +190 -0
- package/dist/lib/explorer-compare.js.map +1 -0
- package/dist/lib/explorer-eval.d.ts +23 -0
- package/dist/lib/explorer-eval.js +161 -0
- package/dist/lib/explorer-eval.js.map +1 -0
- package/dist/lib/explorer-gc.d.ts +11 -0
- package/dist/lib/explorer-gc.js +304 -0
- package/dist/lib/explorer-gc.js.map +1 -0
- package/dist/lib/explorer-git.d.ts +14 -0
- package/dist/lib/explorer-git.js +136 -0
- package/dist/lib/explorer-git.js.map +1 -0
- package/dist/lib/explorer-lock.d.ts +5 -0
- package/dist/lib/explorer-lock.js +100 -0
- package/dist/lib/explorer-lock.js.map +1 -0
- package/dist/lib/explorer-mcp.d.ts +11 -0
- package/dist/lib/explorer-mcp.js +611 -0
- package/dist/lib/explorer-mcp.js.map +1 -0
- package/dist/lib/explorer-retention.d.ts +4 -0
- package/dist/lib/explorer-retention.js +58 -0
- package/dist/lib/explorer-retention.js.map +1 -0
- package/dist/lib/explorer-store.d.ts +77 -0
- package/dist/lib/explorer-store.js +950 -0
- package/dist/lib/explorer-store.js.map +1 -0
- package/dist/lib/explorer-types.d.ts +161 -0
- package/dist/lib/explorer-types.js +3 -0
- package/dist/lib/explorer-types.js.map +1 -0
- package/dist/lib/explorer.d.ts +31 -0
- package/dist/lib/explorer.js +247 -0
- package/dist/lib/explorer.js.map +1 -0
- package/dist/lib/results-store.js +37 -3
- package/dist/lib/results-store.js.map +1 -1
- package/dist/lib/test/integration-harness.js +1 -1
- package/dist/lib/test/integration-harness.js.map +1 -1
- package/dist/lib/ui.html +4800 -1997
- package/dist/lib/ui.js +893 -30
- package/dist/lib/ui.js.map +1 -1
- package/dist/mcp-bundles/display-mcp.bundle.mjs +30209 -0
- package/dist/mcp-bundles/explorer-mcp.bundle.mjs +40044 -0
- package/dist/mcp-bundles/results-mcp.bundle.mjs +30 -4
- package/package.json +3 -2
- package/templates/tsp-lab/API_CONTRACT.md +20 -0
- package/templates/tsp-lab/EVAL.md +20 -0
- package/templates/tsp-lab/PROBLEM.md +18 -0
- package/templates/tsp-lab/data/generate_instances.py +51 -0
- package/templates/tsp-lab/data/instances.jsonl +12 -0
- package/templates/tsp-lab/eval.py +148 -0
- package/templates/tsp-lab/solver.py +88 -0
- package/templates/tsp-lab/stub-patches/enable_two_opt.patch +14 -0
package/README.md
CHANGED
|
@@ -94,6 +94,54 @@ labgate policy validate [file] # validate poli
|
|
|
94
94
|
labgate logs [-n 20] # view recent audit events
|
|
95
95
|
labgate logs --follow # stream new audit events
|
|
96
96
|
labgate init [--force] # create/reset config
|
|
97
|
+
labgate explore create --name <name> --repo <path> --eval "<command>" # create Solution Explorer experiment
|
|
98
|
+
labgate explore tick --experiment <id> # run one autopilot tick (cron-friendly)
|
|
99
|
+
labgate explore overview --experiment <id> # aggregated status/counts/best/latest
|
|
100
|
+
labgate explore run --id <run-id> # run metadata + artifact paths
|
|
101
|
+
labgate explore gc --experiment <id> # retention-based prune (dry-run by default)
|
|
102
|
+
labgate explore compare --experiment <id> --run <run-id> --to best # score+diff compare
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Solution Explorer (MVP backend)
|
|
106
|
+
|
|
107
|
+
LabGate now includes an early **Solution Explorer** backend for reproducible
|
|
108
|
+
variant search workflows:
|
|
109
|
+
|
|
110
|
+
- Isolated experiment repo clones under `~/.labgate/explorer/repos/`
|
|
111
|
+
- Git worktree-per-run execution
|
|
112
|
+
- Deterministic eval contract parsing (`score` JSON on last stdout line)
|
|
113
|
+
- Per-run artifacts (`eval.json`, logs, diff, summary)
|
|
114
|
+
- Cron-safe autopilot tick lock
|
|
115
|
+
|
|
116
|
+
Starter template:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
templates/tsp-lab/
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Example flow:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
# Create experiment from a local repo
|
|
126
|
+
labgate explore create \
|
|
127
|
+
--name "TSP baseline" \
|
|
128
|
+
--repo /path/to/tsp-lab \
|
|
129
|
+
--eval "python3 eval.py" \
|
|
130
|
+
--agent-mode stub \
|
|
131
|
+
--stub-patch stub-patches/enable_two_opt.patch
|
|
132
|
+
|
|
133
|
+
# Trigger one run
|
|
134
|
+
labgate explore tick --experiment <experiment-id>
|
|
135
|
+
|
|
136
|
+
# Inspect tree and leaderboard
|
|
137
|
+
labgate explore tree --experiment <experiment-id> --mode best_path
|
|
138
|
+
labgate explore leaderboard --experiment <experiment-id> --top 5
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Cron example (every 5 minutes):
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
*/5 * * * * /usr/bin/env labgate explore tick --experiment <experiment-id>
|
|
97
145
|
```
|
|
98
146
|
|
|
99
147
|
### Options
|
package/dist/cli.js
CHANGED
|
@@ -785,6 +785,539 @@ datasetCmd
|
|
|
785
785
|
console.log(` Path: ${hostPath}`);
|
|
786
786
|
console.log(` Scanned: ${stats.scanned_at}`);
|
|
787
787
|
});
|
|
788
|
+
// ── labgate explore ─────────────────────────────────────
|
|
789
|
+
const exploreCmd = program
|
|
790
|
+
.command('explore')
|
|
791
|
+
.description('Solution Explorer commands');
|
|
792
|
+
exploreCmd
|
|
793
|
+
.command('create')
|
|
794
|
+
.description('Create a new Solution Explorer experiment from an existing git repository')
|
|
795
|
+
.requiredOption('--name <name>', 'Experiment name')
|
|
796
|
+
.requiredOption('--repo <path>', 'Source repository path to clone for isolated exploration')
|
|
797
|
+
.requiredOption('--eval <command>', 'Evaluation command (must print JSON as last stdout line)')
|
|
798
|
+
.option('--base-ref <ref>', 'Base git ref (default: HEAD)', 'HEAD')
|
|
799
|
+
.option('--timeout <sec>', 'Evaluation timeout seconds', '120')
|
|
800
|
+
.option('--epsilon <value>', 'Exploration epsilon (0..1)', '0.15')
|
|
801
|
+
.option('--top-n <count>', 'Top-N pool for epsilon exploration', '5')
|
|
802
|
+
.option('--max-runs <count>', 'Optional max run budget')
|
|
803
|
+
.option('--agent-mode <mode>', 'Agent mode: stub|claude_headless', 'stub')
|
|
804
|
+
.option('--stub-patch <path>', 'Patch file path for stub agent mode')
|
|
805
|
+
.option('--claude-resume-session <id>', 'Optional Claude session id to resume in claude_headless mode')
|
|
806
|
+
.option('--claude-timeout <sec>', 'Optional Claude headless timeout seconds (60..14400)')
|
|
807
|
+
.option('--keep-worktrees', 'Retention policy: keep run worktrees instead of pruning')
|
|
808
|
+
.option('--artifacts-policy <mode>', 'Retention artifacts mode: all|minimal|none', 'minimal')
|
|
809
|
+
.option('--keep-last <count>', 'Retention keep_last_n', '50')
|
|
810
|
+
.option('--keep-failed-last <count>', 'Retention keep_failed_last_n', '20')
|
|
811
|
+
.option('--keep-best', 'Retention keep best run protected', true)
|
|
812
|
+
.option('--no-keep-best', 'Retention do not specially protect best run')
|
|
813
|
+
.option('--prune-after-days <days>', 'Retention prune_artifacts_after_days')
|
|
814
|
+
.option('--max-delete-runs <count>', 'Retention max_delete_runs', '200')
|
|
815
|
+
.option('--skip-baseline', 'Skip baseline evaluation check after experiment creation')
|
|
816
|
+
.action(async (opts) => {
|
|
817
|
+
try {
|
|
818
|
+
const timeout = parseInt(opts.timeout, 10);
|
|
819
|
+
if (!Number.isFinite(timeout) || timeout < 5 || timeout > 86400) {
|
|
820
|
+
console.error('Error: --timeout must be an integer between 5 and 86400.');
|
|
821
|
+
process.exit(1);
|
|
822
|
+
}
|
|
823
|
+
const epsilon = Number(opts.epsilon);
|
|
824
|
+
if (!Number.isFinite(epsilon) || epsilon < 0 || epsilon > 1) {
|
|
825
|
+
console.error('Error: --epsilon must be a number between 0 and 1.');
|
|
826
|
+
process.exit(1);
|
|
827
|
+
}
|
|
828
|
+
const topN = parseInt(opts.topN, 10);
|
|
829
|
+
if (!Number.isFinite(topN) || topN < 1 || topN > 100) {
|
|
830
|
+
console.error('Error: --top-n must be an integer between 1 and 100.');
|
|
831
|
+
process.exit(1);
|
|
832
|
+
}
|
|
833
|
+
let maxRuns;
|
|
834
|
+
if (opts.maxRuns !== undefined) {
|
|
835
|
+
const parsed = parseInt(opts.maxRuns, 10);
|
|
836
|
+
if (!Number.isFinite(parsed) || parsed < 1) {
|
|
837
|
+
console.error('Error: --max-runs must be an integer >= 1.');
|
|
838
|
+
process.exit(1);
|
|
839
|
+
}
|
|
840
|
+
maxRuns = parsed;
|
|
841
|
+
}
|
|
842
|
+
const agentModeRaw = String(opts.agentMode || 'stub').trim().toLowerCase();
|
|
843
|
+
const agentMode = agentModeRaw === 'claude' ? 'claude_headless' : agentModeRaw;
|
|
844
|
+
if (agentMode !== 'stub' && agentMode !== 'claude_headless') {
|
|
845
|
+
console.error('Error: --agent-mode must be one of: stub, claude_headless.');
|
|
846
|
+
process.exit(1);
|
|
847
|
+
}
|
|
848
|
+
let claudeTimeout;
|
|
849
|
+
if (opts.claudeTimeout !== undefined) {
|
|
850
|
+
const parsed = parseInt(opts.claudeTimeout, 10);
|
|
851
|
+
if (!Number.isFinite(parsed) || parsed < 60 || parsed > 4 * 60 * 60) {
|
|
852
|
+
console.error('Error: --claude-timeout must be an integer between 60 and 14400.');
|
|
853
|
+
process.exit(1);
|
|
854
|
+
}
|
|
855
|
+
claudeTimeout = parsed;
|
|
856
|
+
}
|
|
857
|
+
const artifactsPolicy = String(opts.artifactsPolicy || 'minimal').trim().toLowerCase();
|
|
858
|
+
if (!['all', 'minimal', 'none'].includes(artifactsPolicy)) {
|
|
859
|
+
console.error('Error: --artifacts-policy must be one of: all, minimal, none.');
|
|
860
|
+
process.exit(1);
|
|
861
|
+
}
|
|
862
|
+
const keepLast = parseInt(opts.keepLast, 10);
|
|
863
|
+
const keepFailedLast = parseInt(opts.keepFailedLast, 10);
|
|
864
|
+
const maxDeleteRuns = parseInt(opts.maxDeleteRuns, 10);
|
|
865
|
+
if (!Number.isFinite(keepLast) || keepLast < 0) {
|
|
866
|
+
console.error('Error: --keep-last must be an integer >= 0.');
|
|
867
|
+
process.exit(1);
|
|
868
|
+
}
|
|
869
|
+
if (!Number.isFinite(keepFailedLast) || keepFailedLast < 0) {
|
|
870
|
+
console.error('Error: --keep-failed-last must be an integer >= 0.');
|
|
871
|
+
process.exit(1);
|
|
872
|
+
}
|
|
873
|
+
if (!Number.isFinite(maxDeleteRuns) || maxDeleteRuns < 1) {
|
|
874
|
+
console.error('Error: --max-delete-runs must be an integer >= 1.');
|
|
875
|
+
process.exit(1);
|
|
876
|
+
}
|
|
877
|
+
let pruneAfterDays;
|
|
878
|
+
if (opts.pruneAfterDays !== undefined) {
|
|
879
|
+
const parsed = parseInt(opts.pruneAfterDays, 10);
|
|
880
|
+
if (!Number.isFinite(parsed) || parsed < 1) {
|
|
881
|
+
console.error('Error: --prune-after-days must be an integer >= 1.');
|
|
882
|
+
process.exit(1);
|
|
883
|
+
}
|
|
884
|
+
pruneAfterDays = parsed;
|
|
885
|
+
}
|
|
886
|
+
const policy = {
|
|
887
|
+
epsilon,
|
|
888
|
+
top_n: topN,
|
|
889
|
+
agent_mode: agentMode,
|
|
890
|
+
};
|
|
891
|
+
if (maxRuns !== undefined)
|
|
892
|
+
policy.max_runs = maxRuns;
|
|
893
|
+
if (opts.stubPatch)
|
|
894
|
+
policy.stub_patch_file = opts.stubPatch;
|
|
895
|
+
if (agentMode === 'claude_headless') {
|
|
896
|
+
const resumeId = String(opts.claudeResumeSession || '').trim();
|
|
897
|
+
if (resumeId)
|
|
898
|
+
policy.claude_resume_session_id = resumeId;
|
|
899
|
+
if (claudeTimeout !== undefined)
|
|
900
|
+
policy.claude_timeout_sec = claudeTimeout;
|
|
901
|
+
}
|
|
902
|
+
const retention = {
|
|
903
|
+
keep_worktrees: !!opts.keepWorktrees,
|
|
904
|
+
artifacts: artifactsPolicy,
|
|
905
|
+
keep_last_n: keepLast,
|
|
906
|
+
keep_best: opts.keepBest !== false,
|
|
907
|
+
keep_failed_last_n: keepFailedLast,
|
|
908
|
+
...(pruneAfterDays !== undefined ? { prune_artifacts_after_days: pruneAfterDays } : {}),
|
|
909
|
+
max_delete_runs: maxDeleteRuns,
|
|
910
|
+
};
|
|
911
|
+
const { createExplorerExperiment } = await import('./lib/explorer.js');
|
|
912
|
+
const { runEvaluation } = await import('./lib/explorer-eval.js');
|
|
913
|
+
const { getExplorerArtifactDir } = await import('./lib/config.js');
|
|
914
|
+
const { ExplorerStore } = await import('./lib/explorer-store.js');
|
|
915
|
+
const experiment = createExplorerExperiment({
|
|
916
|
+
name: opts.name,
|
|
917
|
+
source_repo_path: opts.repo,
|
|
918
|
+
eval_command: opts.eval,
|
|
919
|
+
base_ref: opts.baseRef,
|
|
920
|
+
eval_timeout_sec: timeout,
|
|
921
|
+
policy,
|
|
922
|
+
retention,
|
|
923
|
+
});
|
|
924
|
+
console.log(`Experiment created: ${experiment.id}`);
|
|
925
|
+
console.log(`Name: ${experiment.name}`);
|
|
926
|
+
console.log(`Repo clone: ${experiment.repo_path}`);
|
|
927
|
+
console.log(`Base SHA: ${experiment.base_ref}`);
|
|
928
|
+
if (!opts.skipBaseline) {
|
|
929
|
+
const baselineArtifactDir = getExplorerArtifactDir(experiment.id, 'baseline');
|
|
930
|
+
const baseline = runEvaluation({
|
|
931
|
+
worktree_path: experiment.repo_path,
|
|
932
|
+
eval_command: experiment.eval_command,
|
|
933
|
+
timeout_sec: experiment.eval_timeout_sec,
|
|
934
|
+
artifact_dir: baselineArtifactDir,
|
|
935
|
+
});
|
|
936
|
+
const store = new ExplorerStore();
|
|
937
|
+
try {
|
|
938
|
+
store.createEvent(experiment.id, 'note', {
|
|
939
|
+
message: 'baseline evaluation',
|
|
940
|
+
status: baseline.status,
|
|
941
|
+
score: baseline.score ?? null,
|
|
942
|
+
artifact_dir: baselineArtifactDir,
|
|
943
|
+
error: baseline.error || null,
|
|
944
|
+
});
|
|
945
|
+
}
|
|
946
|
+
finally {
|
|
947
|
+
store.close();
|
|
948
|
+
}
|
|
949
|
+
if (baseline.status === 'eval_ok' && Number.isFinite(baseline.score)) {
|
|
950
|
+
console.log(`Baseline score: ${baseline.score.toFixed(6)}`);
|
|
951
|
+
console.log(`Baseline artifacts: ${baselineArtifactDir}`);
|
|
952
|
+
}
|
|
953
|
+
else {
|
|
954
|
+
console.error(`Baseline eval status: ${baseline.status}`);
|
|
955
|
+
if (baseline.error)
|
|
956
|
+
console.error(`Baseline eval error: ${baseline.error}`);
|
|
957
|
+
console.error(`Baseline logs: ${baseline.stdout_path}, ${baseline.stderr_path}`);
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
catch (err) {
|
|
962
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
963
|
+
process.exit(1);
|
|
964
|
+
}
|
|
965
|
+
});
|
|
966
|
+
exploreCmd
|
|
967
|
+
.command('list')
|
|
968
|
+
.description('List existing Solution Explorer experiments')
|
|
969
|
+
.option('-n, --limit <count>', 'Max experiments to list', '50')
|
|
970
|
+
.option('--offset <count>', 'Offset for pagination', '0')
|
|
971
|
+
.action(async (opts) => {
|
|
972
|
+
try {
|
|
973
|
+
const limit = parseInt(opts.limit, 10);
|
|
974
|
+
const offset = parseInt(opts.offset, 10);
|
|
975
|
+
const { ExplorerStore } = await import('./lib/explorer-store.js');
|
|
976
|
+
const store = new ExplorerStore();
|
|
977
|
+
try {
|
|
978
|
+
const rows = store.listExperiments(Number.isFinite(limit) ? limit : 50, Number.isFinite(offset) ? offset : 0);
|
|
979
|
+
if (rows.length === 0) {
|
|
980
|
+
console.log('No experiments found.');
|
|
981
|
+
return;
|
|
982
|
+
}
|
|
983
|
+
for (const row of rows) {
|
|
984
|
+
console.log(`${row.id} ${row.status.padEnd(8)} ${row.name} base=${row.base_ref.slice(0, 12)} created=${row.created_at}`);
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
finally {
|
|
988
|
+
store.close();
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
catch (err) {
|
|
992
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
993
|
+
process.exit(1);
|
|
994
|
+
}
|
|
995
|
+
});
|
|
996
|
+
exploreCmd
|
|
997
|
+
.command('status')
|
|
998
|
+
.description('Show experiment status, best run, and recent runs')
|
|
999
|
+
.argument('<experimentId>', 'Experiment id')
|
|
1000
|
+
.option('-n, --limit <count>', 'How many recent runs to show', '10')
|
|
1001
|
+
.action(async (experimentId, opts) => {
|
|
1002
|
+
try {
|
|
1003
|
+
const limitParsed = parseInt(opts.limit, 10);
|
|
1004
|
+
const limit = Number.isFinite(limitParsed) && limitParsed > 0 ? limitParsed : 10;
|
|
1005
|
+
const { ExplorerStore } = await import('./lib/explorer-store.js');
|
|
1006
|
+
const store = new ExplorerStore();
|
|
1007
|
+
try {
|
|
1008
|
+
const experiment = store.getExperiment(experimentId);
|
|
1009
|
+
if (!experiment) {
|
|
1010
|
+
console.error(`Experiment not found: ${experimentId}`);
|
|
1011
|
+
process.exit(1);
|
|
1012
|
+
}
|
|
1013
|
+
const best = store.getBestRun(experimentId);
|
|
1014
|
+
const count = store.getRunCount(experimentId);
|
|
1015
|
+
const active = store.hasActiveRun(experimentId);
|
|
1016
|
+
const recent = store.listRuns(experimentId, { limit, offset: 0 });
|
|
1017
|
+
console.log(`Experiment: ${experiment.id}`);
|
|
1018
|
+
console.log(`Name: ${experiment.name}`);
|
|
1019
|
+
console.log(`Status: ${experiment.status}`);
|
|
1020
|
+
console.log(`Runs: ${count}${active ? ' (active run in progress)' : ''}`);
|
|
1021
|
+
console.log(`Base SHA: ${experiment.base_ref}`);
|
|
1022
|
+
if (best && best.score !== null) {
|
|
1023
|
+
console.log(`Best run: ${best.id} score=${best.score.toFixed(6)} status=${best.status}`);
|
|
1024
|
+
}
|
|
1025
|
+
else {
|
|
1026
|
+
console.log('Best run: none yet');
|
|
1027
|
+
}
|
|
1028
|
+
console.log('');
|
|
1029
|
+
console.log('Recent runs:');
|
|
1030
|
+
if (recent.length === 0) {
|
|
1031
|
+
console.log(' (none)');
|
|
1032
|
+
}
|
|
1033
|
+
else {
|
|
1034
|
+
for (const run of recent) {
|
|
1035
|
+
const score = run.score == null ? '-' : run.score.toFixed(6);
|
|
1036
|
+
console.log(` ${run.id} ${run.status.padEnd(13)} score=${score} parent=${(run.parent_run_id || '-').slice(0, 16)}`);
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
finally {
|
|
1041
|
+
store.close();
|
|
1042
|
+
}
|
|
1043
|
+
}
|
|
1044
|
+
catch (err) {
|
|
1045
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1046
|
+
process.exit(1);
|
|
1047
|
+
}
|
|
1048
|
+
});
|
|
1049
|
+
exploreCmd
|
|
1050
|
+
.command('pause')
|
|
1051
|
+
.description('Pause an experiment (manual run steps become no-ops)')
|
|
1052
|
+
.argument('<experimentId>', 'Experiment id')
|
|
1053
|
+
.action(async (experimentId) => {
|
|
1054
|
+
const { setExperimentStatus } = await import('./lib/explorer.js');
|
|
1055
|
+
const ok = setExperimentStatus(experimentId, 'paused');
|
|
1056
|
+
if (!ok) {
|
|
1057
|
+
console.error(`Experiment not found: ${experimentId}`);
|
|
1058
|
+
process.exit(1);
|
|
1059
|
+
}
|
|
1060
|
+
console.log(`Paused: ${experimentId}`);
|
|
1061
|
+
});
|
|
1062
|
+
exploreCmd
|
|
1063
|
+
.command('resume')
|
|
1064
|
+
.description('Resume an experiment')
|
|
1065
|
+
.argument('<experimentId>', 'Experiment id')
|
|
1066
|
+
.action(async (experimentId) => {
|
|
1067
|
+
const { setExperimentStatus } = await import('./lib/explorer.js');
|
|
1068
|
+
const ok = setExperimentStatus(experimentId, 'active');
|
|
1069
|
+
if (!ok) {
|
|
1070
|
+
console.error(`Experiment not found: ${experimentId}`);
|
|
1071
|
+
process.exit(1);
|
|
1072
|
+
}
|
|
1073
|
+
console.log(`Resumed: ${experimentId}`);
|
|
1074
|
+
});
|
|
1075
|
+
exploreCmd
|
|
1076
|
+
.command('tick')
|
|
1077
|
+
.description('Run one manual exploration step for an experiment')
|
|
1078
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1079
|
+
.action(async (opts) => {
|
|
1080
|
+
try {
|
|
1081
|
+
const { runAutopilotTick } = await import('./lib/explorer.js');
|
|
1082
|
+
const result = runAutopilotTick(opts.experiment);
|
|
1083
|
+
console.log(JSON.stringify(result, null, 2));
|
|
1084
|
+
if (!result.did_work) {
|
|
1085
|
+
process.exit(result.is_error === true ? 1 : 0);
|
|
1086
|
+
}
|
|
1087
|
+
if (result.status && !['eval_ok'].includes(result.status)) {
|
|
1088
|
+
process.exit(2);
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
catch (err) {
|
|
1092
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1093
|
+
process.exit(1);
|
|
1094
|
+
}
|
|
1095
|
+
});
|
|
1096
|
+
exploreCmd
|
|
1097
|
+
.command('tree')
|
|
1098
|
+
.description('Print run tree view as JSON')
|
|
1099
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1100
|
+
.option('--mode <mode>', 'Tree mode: best_path|full', 'best_path')
|
|
1101
|
+
.action(async (opts) => {
|
|
1102
|
+
try {
|
|
1103
|
+
const mode = String(opts.mode || 'best_path').trim() === 'full' ? 'full' : 'best_path';
|
|
1104
|
+
const { getExperimentTree } = await import('./lib/explorer.js');
|
|
1105
|
+
const tree = getExperimentTree(opts.experiment, mode);
|
|
1106
|
+
console.log(JSON.stringify(tree, null, 2));
|
|
1107
|
+
}
|
|
1108
|
+
catch (err) {
|
|
1109
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1110
|
+
process.exit(1);
|
|
1111
|
+
}
|
|
1112
|
+
});
|
|
1113
|
+
exploreCmd
|
|
1114
|
+
.command('leaderboard')
|
|
1115
|
+
.description('Print top scoring runs for an experiment')
|
|
1116
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1117
|
+
.option('-k, --top <count>', 'Top-K rows', '10')
|
|
1118
|
+
.action(async (opts) => {
|
|
1119
|
+
try {
|
|
1120
|
+
const topParsed = parseInt(opts.top, 10);
|
|
1121
|
+
const topK = Number.isFinite(topParsed) && topParsed > 0 ? topParsed : 10;
|
|
1122
|
+
const { getExperimentLeaderboard } = await import('./lib/explorer.js');
|
|
1123
|
+
const rows = getExperimentLeaderboard(opts.experiment, topK);
|
|
1124
|
+
console.log(JSON.stringify(rows, null, 2));
|
|
1125
|
+
}
|
|
1126
|
+
catch (err) {
|
|
1127
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1128
|
+
process.exit(1);
|
|
1129
|
+
}
|
|
1130
|
+
});
|
|
1131
|
+
exploreCmd
|
|
1132
|
+
.command('gc')
|
|
1133
|
+
.description('Prune old worktrees/artifacts according to retention policy')
|
|
1134
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1135
|
+
.option('--yes', 'Apply GC changes (default is dry-run)')
|
|
1136
|
+
.option('--prune-worktrees', 'Enable worktree pruning', true)
|
|
1137
|
+
.option('--no-prune-worktrees', 'Disable worktree pruning')
|
|
1138
|
+
.option('--prune-artifacts', 'Enable artifact pruning', true)
|
|
1139
|
+
.option('--no-prune-artifacts', 'Disable artifact pruning')
|
|
1140
|
+
.option('--artifacts <mode>', 'Override artifacts mode: all|minimal|none')
|
|
1141
|
+
.option('--keep-last <count>', 'Override keep_last_n')
|
|
1142
|
+
.option('--keep-failed-last <count>', 'Override keep_failed_last_n')
|
|
1143
|
+
.option('--keep-worktrees', 'Override keep_worktrees=true')
|
|
1144
|
+
.option('--keep-best', 'Override keep_best=true')
|
|
1145
|
+
.option('--no-keep-best', 'Override keep_best=false')
|
|
1146
|
+
.option('--after-days <days>', 'Override prune_artifacts_after_days')
|
|
1147
|
+
.option('--max-delete-runs <count>', 'Override max_delete_runs')
|
|
1148
|
+
.action(async (opts) => {
|
|
1149
|
+
try {
|
|
1150
|
+
const retention = {};
|
|
1151
|
+
if (opts.artifacts !== undefined)
|
|
1152
|
+
retention.artifacts = opts.artifacts;
|
|
1153
|
+
if (opts.keepLast !== undefined)
|
|
1154
|
+
retention.keep_last_n = parseInt(opts.keepLast, 10);
|
|
1155
|
+
if (opts.keepFailedLast !== undefined)
|
|
1156
|
+
retention.keep_failed_last_n = parseInt(opts.keepFailedLast, 10);
|
|
1157
|
+
if (opts.keepWorktrees === true)
|
|
1158
|
+
retention.keep_worktrees = true;
|
|
1159
|
+
if (opts.keepBest !== undefined)
|
|
1160
|
+
retention.keep_best = opts.keepBest !== false;
|
|
1161
|
+
if (opts.afterDays !== undefined)
|
|
1162
|
+
retention.prune_artifacts_after_days = parseInt(opts.afterDays, 10);
|
|
1163
|
+
if (opts.maxDeleteRuns !== undefined)
|
|
1164
|
+
retention.max_delete_runs = parseInt(opts.maxDeleteRuns, 10);
|
|
1165
|
+
const { runExperimentGc } = await import('./lib/explorer.js');
|
|
1166
|
+
const plan = runExperimentGc(opts.experiment, {
|
|
1167
|
+
dry_run: opts.yes !== true,
|
|
1168
|
+
prune_worktrees: opts.pruneWorktrees !== false,
|
|
1169
|
+
prune_artifacts: opts.pruneArtifacts !== false,
|
|
1170
|
+
retention: Object.keys(retention).length > 0 ? retention : undefined,
|
|
1171
|
+
});
|
|
1172
|
+
console.log(JSON.stringify(plan, null, 2));
|
|
1173
|
+
}
|
|
1174
|
+
catch (err) {
|
|
1175
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1176
|
+
process.exit(1);
|
|
1177
|
+
}
|
|
1178
|
+
});
|
|
1179
|
+
const exploreRetentionCmd = exploreCmd
|
|
1180
|
+
.command('retention')
|
|
1181
|
+
.description('Get or update retention policy for an experiment');
|
|
1182
|
+
exploreRetentionCmd
|
|
1183
|
+
.command('show')
|
|
1184
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1185
|
+
.action(async (opts) => {
|
|
1186
|
+
try {
|
|
1187
|
+
const { getExperimentOverview } = await import('./lib/explorer.js');
|
|
1188
|
+
const overview = getExperimentOverview(opts.experiment);
|
|
1189
|
+
if (!overview) {
|
|
1190
|
+
console.error(`Experiment not found: ${opts.experiment}`);
|
|
1191
|
+
process.exit(1);
|
|
1192
|
+
}
|
|
1193
|
+
console.log(JSON.stringify(overview.retention_policy, null, 2));
|
|
1194
|
+
}
|
|
1195
|
+
catch (err) {
|
|
1196
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1197
|
+
process.exit(1);
|
|
1198
|
+
}
|
|
1199
|
+
});
|
|
1200
|
+
exploreRetentionCmd
|
|
1201
|
+
.command('set')
|
|
1202
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1203
|
+
.option('--keep-worktrees', 'Set keep_worktrees=true')
|
|
1204
|
+
.option('--no-keep-worktrees', 'Set keep_worktrees=false')
|
|
1205
|
+
.option('--artifacts <mode>', 'Set artifacts mode: all|minimal|none')
|
|
1206
|
+
.option('--keep-last <count>', 'Set keep_last_n')
|
|
1207
|
+
.option('--keep-failed-last <count>', 'Set keep_failed_last_n')
|
|
1208
|
+
.option('--keep-best', 'Set keep_best=true')
|
|
1209
|
+
.option('--no-keep-best', 'Set keep_best=false')
|
|
1210
|
+
.option('--after-days <days>', 'Set prune_artifacts_after_days')
|
|
1211
|
+
.option('--max-delete-runs <count>', 'Set max_delete_runs')
|
|
1212
|
+
.action(async (opts) => {
|
|
1213
|
+
try {
|
|
1214
|
+
const patch = {};
|
|
1215
|
+
if (opts.keepWorktrees !== undefined)
|
|
1216
|
+
patch.keep_worktrees = opts.keepWorktrees;
|
|
1217
|
+
if (opts.artifacts !== undefined)
|
|
1218
|
+
patch.artifacts = opts.artifacts;
|
|
1219
|
+
if (opts.keepLast !== undefined)
|
|
1220
|
+
patch.keep_last_n = parseInt(opts.keepLast, 10);
|
|
1221
|
+
if (opts.keepFailedLast !== undefined)
|
|
1222
|
+
patch.keep_failed_last_n = parseInt(opts.keepFailedLast, 10);
|
|
1223
|
+
if (opts.keepBest !== undefined)
|
|
1224
|
+
patch.keep_best = opts.keepBest;
|
|
1225
|
+
if (opts.afterDays !== undefined)
|
|
1226
|
+
patch.prune_artifacts_after_days = parseInt(opts.afterDays, 10);
|
|
1227
|
+
if (opts.maxDeleteRuns !== undefined)
|
|
1228
|
+
patch.max_delete_runs = parseInt(opts.maxDeleteRuns, 10);
|
|
1229
|
+
if (Object.keys(patch).length === 0) {
|
|
1230
|
+
console.error('Error: no retention fields provided.');
|
|
1231
|
+
process.exit(1);
|
|
1232
|
+
}
|
|
1233
|
+
const { setExperimentRetentionPolicy, getExperimentOverview } = await import('./lib/explorer.js');
|
|
1234
|
+
const ok = setExperimentRetentionPolicy(opts.experiment, patch);
|
|
1235
|
+
if (!ok) {
|
|
1236
|
+
console.error(`Experiment not found: ${opts.experiment}`);
|
|
1237
|
+
process.exit(1);
|
|
1238
|
+
}
|
|
1239
|
+
const overview = getExperimentOverview(opts.experiment);
|
|
1240
|
+
console.log(JSON.stringify(overview?.retention_policy || {}, null, 2));
|
|
1241
|
+
}
|
|
1242
|
+
catch (err) {
|
|
1243
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1244
|
+
process.exit(1);
|
|
1245
|
+
}
|
|
1246
|
+
});
|
|
1247
|
+
exploreCmd
|
|
1248
|
+
.command('compare')
|
|
1249
|
+
.description('Compare one run against best/parent/another run')
|
|
1250
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1251
|
+
.requiredOption('--run <runId>', 'Selected run id')
|
|
1252
|
+
.option('--to <target>', 'Comparison target: best|parent|<runId>', 'best')
|
|
1253
|
+
.option('--diff', 'Include full patch artifact in compare output')
|
|
1254
|
+
.option('--stat', 'Show diff stats (included by default)', true)
|
|
1255
|
+
.action(async (opts) => {
|
|
1256
|
+
try {
|
|
1257
|
+
const { compareRun } = await import('./lib/explorer.js');
|
|
1258
|
+
const result = compareRun({
|
|
1259
|
+
experiment_id: opts.experiment,
|
|
1260
|
+
run_id: opts.run,
|
|
1261
|
+
compare_to: opts.to,
|
|
1262
|
+
include_patch: opts.diff === true,
|
|
1263
|
+
});
|
|
1264
|
+
console.log(JSON.stringify(result, null, 2));
|
|
1265
|
+
}
|
|
1266
|
+
catch (err) {
|
|
1267
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1268
|
+
process.exit(1);
|
|
1269
|
+
}
|
|
1270
|
+
});
|
|
1271
|
+
exploreCmd
|
|
1272
|
+
.command('overview')
|
|
1273
|
+
.description('Print aggregated experiment overview JSON for UI/status panels')
|
|
1274
|
+
.requiredOption('--experiment <id>', 'Experiment id')
|
|
1275
|
+
.action(async (opts) => {
|
|
1276
|
+
try {
|
|
1277
|
+
const { getExperimentOverview } = await import('./lib/explorer.js');
|
|
1278
|
+
const overview = getExperimentOverview(opts.experiment);
|
|
1279
|
+
if (!overview) {
|
|
1280
|
+
console.error(`Experiment not found: ${opts.experiment}`);
|
|
1281
|
+
process.exit(1);
|
|
1282
|
+
}
|
|
1283
|
+
console.log(JSON.stringify(overview, null, 2));
|
|
1284
|
+
}
|
|
1285
|
+
catch (err) {
|
|
1286
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1287
|
+
process.exit(1);
|
|
1288
|
+
}
|
|
1289
|
+
});
|
|
1290
|
+
exploreCmd
|
|
1291
|
+
.command('run')
|
|
1292
|
+
.description('Print run details and artifact paths')
|
|
1293
|
+
.requiredOption('--id <runId>', 'Run id')
|
|
1294
|
+
.action(async (opts) => {
|
|
1295
|
+
try {
|
|
1296
|
+
const { getRunDetails } = await import('./lib/explorer.js');
|
|
1297
|
+
const details = getRunDetails(opts.id);
|
|
1298
|
+
if (!details) {
|
|
1299
|
+
console.error(`Run not found: ${opts.id}`);
|
|
1300
|
+
process.exit(1);
|
|
1301
|
+
}
|
|
1302
|
+
console.log(JSON.stringify(details, null, 2));
|
|
1303
|
+
}
|
|
1304
|
+
catch (err) {
|
|
1305
|
+
console.error(`Error: ${err?.message ?? String(err)}`);
|
|
1306
|
+
process.exit(1);
|
|
1307
|
+
}
|
|
1308
|
+
});
|
|
1309
|
+
exploreCmd
|
|
1310
|
+
.command('mcp')
|
|
1311
|
+
.description('Run the Solution Explorer MCP server over stdio')
|
|
1312
|
+
.option('--db <path>', 'Override explorer DB path')
|
|
1313
|
+
.action(async (opts) => {
|
|
1314
|
+
if (opts.db) {
|
|
1315
|
+
process.env.LABGATE_EXPLORER_DB = (0, path_1.resolve)(opts.db);
|
|
1316
|
+
}
|
|
1317
|
+
const { main } = await import('./lib/explorer-mcp.js');
|
|
1318
|
+
const args = opts.db ? ['--db', (0, path_1.resolve)(opts.db)] : [];
|
|
1319
|
+
await main(args);
|
|
1320
|
+
});
|
|
788
1321
|
// ── labgate license ──────────────────────────────────────
|
|
789
1322
|
const licenseCmd = program
|
|
790
1323
|
.command('license')
|