opencode-autoresearch 3.13.2 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/.opencode-plugin/plugin.json +1 -1
  2. package/INSTALL.md +52 -2
  3. package/README.md +5 -3
  4. package/VERSION +1 -1
  5. package/dist/cli.js +540 -32
  6. package/dist/cli.js.map +1 -1
  7. package/dist/compaction.d.ts +17 -0
  8. package/dist/compaction.d.ts.map +1 -0
  9. package/dist/compaction.js +175 -0
  10. package/dist/compaction.js.map +1 -0
  11. package/dist/constants.d.ts +1 -1
  12. package/dist/constants.js +1 -1
  13. package/dist/error-categories.d.ts +12 -0
  14. package/dist/error-categories.d.ts.map +1 -0
  15. package/dist/error-categories.js +137 -0
  16. package/dist/error-categories.js.map +1 -0
  17. package/dist/evidence.d.ts +24 -0
  18. package/dist/evidence.d.ts.map +1 -0
  19. package/dist/evidence.js +82 -0
  20. package/dist/evidence.js.map +1 -0
  21. package/dist/helpers.d.ts +13 -0
  22. package/dist/helpers.d.ts.map +1 -1
  23. package/dist/helpers.js +40 -0
  24. package/dist/helpers.js.map +1 -1
  25. package/dist/index.d.ts +1 -1
  26. package/dist/leaderboard.d.ts +27 -0
  27. package/dist/leaderboard.d.ts.map +1 -0
  28. package/dist/leaderboard.js +195 -0
  29. package/dist/leaderboard.js.map +1 -0
  30. package/dist/memory-manager.d.ts.map +1 -1
  31. package/dist/memory-manager.js +8 -2
  32. package/dist/memory-manager.js.map +1 -1
  33. package/dist/metric-comparator.d.ts +15 -0
  34. package/dist/metric-comparator.d.ts.map +1 -0
  35. package/dist/metric-comparator.js +58 -0
  36. package/dist/metric-comparator.js.map +1 -0
  37. package/dist/run-manager.d.ts.map +1 -1
  38. package/dist/run-manager.js +18 -11
  39. package/dist/run-manager.js.map +1 -1
  40. package/dist/serialize.d.ts +8 -0
  41. package/dist/serialize.d.ts.map +1 -0
  42. package/dist/serialize.js +50 -0
  43. package/dist/serialize.js.map +1 -0
  44. package/dist/strategy-pack.d.ts +31 -0
  45. package/dist/strategy-pack.d.ts.map +1 -0
  46. package/dist/strategy-pack.js +90 -0
  47. package/dist/strategy-pack.js.map +1 -0
  48. package/dist/subagent-pool.d.ts.map +1 -1
  49. package/dist/subagent-pool.js +22 -14
  50. package/dist/subagent-pool.js.map +1 -1
  51. package/dist/task-queue.d.ts +36 -0
  52. package/dist/task-queue.d.ts.map +1 -0
  53. package/dist/task-queue.js +65 -0
  54. package/dist/task-queue.js.map +1 -0
  55. package/dist/types.d.ts +2 -1
  56. package/dist/types.d.ts.map +1 -1
  57. package/dist/verifier-parser.d.ts.map +1 -1
  58. package/dist/verifier-parser.js +3 -1
  59. package/dist/verifier-parser.js.map +1 -1
  60. package/dist/whats-new.d.ts +12 -0
  61. package/dist/whats-new.d.ts.map +1 -0
  62. package/dist/whats-new.js +106 -0
  63. package/dist/whats-new.js.map +1 -0
  64. package/dist/worker.d.ts +11 -0
  65. package/dist/worker.d.ts.map +1 -0
  66. package/dist/worker.js +75 -0
  67. package/dist/worker.js.map +1 -0
  68. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -1,12 +1,27 @@
1
1
  #!/usr/bin/env node
2
- import { closeSync, existsSync, fstatSync, openSync, readFileSync, readSync, readdirSync } from "fs";
2
+ import { closeSync, constants as fsConstants, existsSync, fstatSync, lstatSync, openSync, readFileSync, readSync, readdirSync } from "fs";
3
3
  import { resolve } from "path";
4
4
  import { execSync } from "child_process";
5
5
  import { MAX_DRAFTS } from "./constants.js";
6
- import { printJson, resolveRepo, parseRunState, parsePositiveInt, sanitizeForTerminal, getInstalledPackagePath, getInstalledPackageInfo, readUpdateCache, getGlobalNpmPrefix, readGoalDoc, atomicWriteTextInRepo } from "./helpers.js";
6
+ import { printJson, printJsonEnvelope, resolveRepo, parseRunState, parsePositiveInt, sanitizeForTerminal, getInstalledPackagePath, getInstalledPackageInfo, readUpdateCache, getGlobalNpmPrefix, readGoalDoc, atomicWriteTextInRepo } from "./helpers.js";
7
7
  const VERSION_FLAGS = ["--version", "-v"];
8
8
  const HELP_FLAGS = ["--help", "-h", "help"];
9
9
  const BRANCH_POLICIES = ["best", "roulette", "diverse"];
10
+ const shouldSkipUpdateCheck = (args) => {
11
+ if (args.length > 0 && VERSION_FLAGS.includes(args[0])) {
12
+ return { skip: true, reason: "version_flag" };
13
+ }
14
+ if (args.length > 0 && HELP_FLAGS.includes(args[0])) {
15
+ return { skip: true, reason: "help_flag" };
16
+ }
17
+ if (process.env.AUTORESEARCH_NO_UPDATE === "1") {
18
+ return { skip: true, reason: "env_opt_out" };
19
+ }
20
+ if (process.env.CI === "true" || process.env.CI === "1") {
21
+ return { skip: true, reason: "ci_environment" };
22
+ }
23
+ return { skip: false, reason: null };
24
+ };
10
25
  const usage = () => {
11
26
  console.error("Usage: autoresearch <command> [options]");
12
27
  console.error("");
@@ -22,6 +37,7 @@ const usage = () => {
22
37
  console.error(" score Run the configured scorer and show normalized output");
23
38
  console.error(" digest Generate re-entry digest for operator handoff");
24
39
  console.error(" config Show runtime configuration");
40
+ console.error(" contract Print runtime contract schemas");
25
41
  console.error(" summary Aggregate stats across runs");
26
42
  console.error(" suggest Suggest next goal from memory");
27
43
  console.error(" launch Launch a background run");
@@ -29,6 +45,9 @@ const usage = () => {
29
45
  console.error(" stop Request a background run stop");
30
46
  console.error(" resume Resume a background run");
31
47
  console.error(" record Record an experiment result");
48
+ console.error(" queue Manage background task queue");
49
+ console.error(" pack Export and inspect strategy packs");
50
+ console.error(" leaderboard Show local leaderboard across runs");
32
51
  console.error(" doctor Verify package installation and version");
33
52
  console.error(" help Show this help");
34
53
  console.error("");
@@ -55,6 +74,7 @@ const usage = () => {
55
74
  console.error(" --duration Wall-clock cap (e.g., 5h or 300m)");
56
75
  console.error(` --num-drafts Number of parallel drafts (default: 1, max: ${MAX_DRAFTS})`);
57
76
  console.error(" --branch-policy Branch selection policy: best, roulette, diverse");
77
+ console.error(' --branch-policy-overrides JSON object mapping draft IDs to policies (e.g. {"draft-0":"diverse"})');
58
78
  console.error(" --max-debug-depth Max debug experiment depth before stop");
59
79
  console.error(" --branch-failure-budget Per-branch failure budget before stop");
60
80
  console.error(" --json Output raw JSON (default: human-readable)");
@@ -84,7 +104,13 @@ const parseArgs = (args) => {
84
104
  const result = {};
85
105
  for (let i = 0; i < args.length; i++) {
86
106
  if (args[i].startsWith("--")) {
87
- const key = args[i].slice(2);
107
+ const longArg = args[i];
108
+ const equalsIndex = longArg.indexOf("=");
109
+ if (equalsIndex > 2) {
110
+ result[longArg.slice(2, equalsIndex)] = longArg.slice(equalsIndex + 1);
111
+ continue;
112
+ }
113
+ const key = longArg.slice(2);
88
114
  if (i + 1 < args.length && !args[i + 1].startsWith("--") && !args[i + 1].startsWith("-")) {
89
115
  result[key] = args[++i];
90
116
  }
@@ -183,6 +209,39 @@ const formatTimestamp = (ts) => {
183
209
  return ts;
184
210
  }
185
211
  };
212
+ const MAX_SCORE_HISTORY_BYTES = 10 * 1024 * 1024;
213
+ const assertRegularBoundedFile = (filePath) => {
214
+ const linkStats = lstatSync(filePath);
215
+ if (linkStats.isSymbolicLink()) {
216
+ throw new Error(`Refusing to read score history symlink: ${filePath}`);
217
+ }
218
+ if (!linkStats.isFile()) {
219
+ throw new Error(`Refusing to read non-regular score history file: ${filePath}`);
220
+ }
221
+ if (linkStats.size > MAX_SCORE_HISTORY_BYTES) {
222
+ throw new Error(`Score history is too large to read safely (${linkStats.size} bytes; max ${MAX_SCORE_HISTORY_BYTES} bytes): ${filePath}`);
223
+ }
224
+ };
225
+ const readScoreHistoryFile = (filePath) => {
226
+ assertRegularBoundedFile(filePath);
227
+ if (typeof fsConstants.O_NOFOLLOW !== "number") {
228
+ throw new Error(`Refusing to read score history because this platform does not support O_NOFOLLOW: ${filePath}`);
229
+ }
230
+ const fd = openSync(filePath, fsConstants.O_RDONLY | fsConstants.O_NOFOLLOW);
231
+ try {
232
+ const fileStats = fstatSync(fd);
233
+ if (!fileStats.isFile()) {
234
+ throw new Error(`Refusing to read non-regular score history file: ${filePath}`);
235
+ }
236
+ if (fileStats.size > MAX_SCORE_HISTORY_BYTES) {
237
+ throw new Error(`Score history is too large to read safely (${fileStats.size} bytes; max ${MAX_SCORE_HISTORY_BYTES} bytes): ${filePath}`);
238
+ }
239
+ return readFileSync(fd, "utf-8");
240
+ }
241
+ finally {
242
+ closeSync(fd);
243
+ }
244
+ };
186
245
  const readTailLines = (filePath, limit) => {
187
246
  if (limit <= 0)
188
247
  return [];
@@ -245,6 +304,41 @@ const normalizeBranchPolicy = (value) => {
245
304
  return value;
246
305
  throw new Error(`Invalid branch policy: ${value}. Expected one of: ${BRANCH_POLICIES.join(", ")}`);
247
306
  };
307
+ const PROTO_POISON_KEYS = new Set(["__proto__", "constructor", "prototype"]);
308
+ const normalizeOverrideBranchPolicy = (branchId, value) => {
309
+ const trimmed = value.trim();
310
+ if (trimmed === "") {
311
+ throw new Error(`Invalid branch policy override for ${branchId}: value must not be empty`);
312
+ }
313
+ if (BRANCH_POLICIES.includes(trimmed))
314
+ return trimmed;
315
+ throw new Error(`Invalid branch policy override for ${branchId}: "${trimmed}" is not one of: ${BRANCH_POLICIES.join(", ")}`);
316
+ };
317
+ const parseBranchPolicyOverrides = (value) => {
318
+ if (value == null || value === "")
319
+ return undefined;
320
+ let parsed;
321
+ try {
322
+ parsed = JSON.parse(value);
323
+ }
324
+ catch {
325
+ throw new Error("Invalid branch policy overrides: expected a JSON object mapping draft IDs to branch policies");
326
+ }
327
+ if (parsed == null || Array.isArray(parsed) || typeof parsed !== "object") {
328
+ throw new Error("Invalid branch policy overrides: expected a JSON object mapping draft IDs to branch policies");
329
+ }
330
+ const overrides = Object.create(null);
331
+ for (const [branchId, branchPolicy] of Object.entries(parsed)) {
332
+ if (PROTO_POISON_KEYS.has(branchId)) {
333
+ throw new Error(`Invalid branch policy override key: "${branchId}" is not a valid draft ID`);
334
+ }
335
+ if (typeof branchPolicy !== "string") {
336
+ throw new Error(`Invalid branch policy override for ${branchId}: expected a string policy`);
337
+ }
338
+ overrides[branchId] = normalizeOverrideBranchPolicy(branchId, branchPolicy);
339
+ }
340
+ return overrides;
341
+ };
248
342
  const main = async () => {
249
343
  const args = process.argv.slice(2);
250
344
  // Handle standalone flags
@@ -298,7 +392,7 @@ const main = async () => {
298
392
  stop_condition: grouped["stop-condition"],
299
393
  rollback_strategy: grouped["rollback-strategy"],
300
394
  };
301
- printJson(buildSetupSummary(grouped.repo, config));
395
+ printJsonEnvelope("wizard", buildSetupSummary(grouped.repo, config));
302
396
  break;
303
397
  }
304
398
  case "init": {
@@ -335,6 +429,7 @@ const main = async () => {
335
429
  baseline: grouped.baseline,
336
430
  num_drafts: parsePositiveInt(grouped["num-drafts"], "num_drafts", { max: MAX_DRAFTS }) ?? 1,
337
431
  branch_selection_policy: normalizeBranchPolicy(grouped["branch-policy"]),
432
+ branch_policy_overrides: parseBranchPolicyOverrides(grouped["branch-policy-overrides"]),
338
433
  outcome_metric: grouped["outcome-metric"],
339
434
  outcome_direction: grouped["outcome-direction"],
340
435
  instrument_metric: grouped["instrument-metric"],
@@ -350,7 +445,7 @@ const main = async () => {
350
445
  const { buildSupervisorSnapshot } = await import("./run-manager.js");
351
446
  const snapshot = await buildSupervisorSnapshot(grouped.repo, grouped["results-path"], grouped["state-path"]);
352
447
  if (useJson) {
353
- printJson(snapshot);
448
+ printJsonEnvelope("status", snapshot);
354
449
  }
355
450
  else {
356
451
  const s = snapshot;
@@ -397,7 +492,7 @@ const main = async () => {
397
492
  const lastIter = s.last_iteration;
398
493
  const flags = s.flags;
399
494
  if (useJson) {
400
- printJson(snapshot);
495
+ printJsonEnvelope("explain", snapshot);
401
496
  break;
402
497
  }
403
498
  const statusEmoji = {
@@ -466,7 +561,7 @@ const main = async () => {
466
561
  }
467
562
  return obj;
468
563
  });
469
- printJson({ count: records.length, records: parsed });
564
+ printJsonEnvelope("history", { count: records.length, records: parsed });
470
565
  break;
471
566
  }
472
567
  for (const r of records) {
@@ -493,7 +588,7 @@ const main = async () => {
493
588
  const limit = parsePositiveInt(grouped.limit, "limit") ?? 10;
494
589
  const showTopComponents = grouped["top-components"] === "true";
495
590
  if (showTopComponents) {
496
- const allLines = readFileSync(scoreHistoryPath, "utf-8")
591
+ const allLines = readScoreHistoryFile(scoreHistoryPath)
497
592
  .split("\n")
498
593
  .map((l) => l.trim())
499
594
  .filter(Boolean);
@@ -512,7 +607,7 @@ const main = async () => {
512
607
  const { rankComponents } = await import("./score-parser.js");
513
608
  const ranking = rankComponents(allParsed);
514
609
  if (useJson) {
515
- printJson({ count: allParsed.length, scores: allParsed.slice(-limit), ranking });
610
+ printJsonEnvelope("scores", { count: allParsed.length, scores: allParsed.slice(-limit), ranking });
516
611
  break;
517
612
  }
518
613
  console.log("Component Rankings:");
@@ -548,7 +643,7 @@ const main = async () => {
548
643
  return null;
549
644
  }
550
645
  }).filter(Boolean);
551
- printJson({ count: parsed.length, scores: parsed });
646
+ printJsonEnvelope("scores", { count: parsed.length, scores: parsed });
552
647
  break;
553
648
  }
554
649
  console.log("Score History (latest " + Math.min(limit, records.length) + "):");
@@ -651,7 +746,7 @@ const main = async () => {
651
746
  const normalized = scored.score / scored.max;
652
747
  const percent = (normalized * 100).toFixed(1) + "%";
653
748
  if (useJson) {
654
- printJson({
749
+ printJsonEnvelope("score", {
655
750
  score: scored.score,
656
751
  max: scored.max,
657
752
  normalized,
@@ -687,7 +782,7 @@ const main = async () => {
687
782
  }
688
783
  const state = parseRunState(readJsonFile(statePath));
689
784
  if (useJson) {
690
- printJson({
785
+ printJsonEnvelope("config", {
691
786
  goal: state.goal,
692
787
  mode: state.mode,
693
788
  metric: state.metric,
@@ -719,6 +814,148 @@ const main = async () => {
719
814
  console.log(` Pool: ${state.subagent_pool ? "configured" : "none"}`);
720
815
  break;
721
816
  }
817
+ case "contract": {
818
+ const schemas = {
819
+ schema_version: "1.0.0",
820
+ description: "Auto Research runtime contract schemas",
821
+ state: {
822
+ type: "object",
823
+ required: ["schema_version", "run_id", "created_at", "updated_at", "status", "mode", "operating_mode", "goal", "scope", "metric", "verify", "label_requirements", "artifact_paths", "stats", "flags"],
824
+ properties: {
825
+ schema_version: { type: "number", description: "State schema version" },
826
+ run_id: { type: "string", description: "Unique run identifier" },
827
+ created_at: { type: "string", format: "date-time", description: "Run creation timestamp" },
828
+ updated_at: { type: "string", format: "date-time", description: "Last update timestamp" },
829
+ status: { type: "string", enum: ["initialized", "running", "stopping", "stopped", "completed", "needs_human"], description: "Run status" },
830
+ mode: { type: "string", enum: ["foreground", "background"], description: "Execution mode" },
831
+ operating_mode: { type: "string", enum: ["converge", "continuous", "supervised"], description: "Operating mode" },
832
+ goal: { type: "string", description: "Run goal description" },
833
+ scope: { type: "string", description: "Target scope" },
834
+ metric: {
835
+ type: "object",
836
+ required: ["name", "direction"],
837
+ properties: {
838
+ name: { type: "string" },
839
+ direction: { type: "string", enum: ["higher", "lower"] },
840
+ baseline: { type: "string" },
841
+ best: { type: "string" },
842
+ latest: { type: "string" },
843
+ },
844
+ },
845
+ instrument_metric: { type: "object", description: "Optional secondary metric" },
846
+ verify: { type: "string", description: "Verification command" },
847
+ guard: { type: "string", description: "Guard command" },
848
+ scorer: { type: "string", description: "Scorer command" },
849
+ iterations_cap: { type: "number", description: "Maximum iterations" },
850
+ duration: { type: "string", description: "Duration limit" },
851
+ duration_seconds: { type: "number" },
852
+ deadline_at: { type: "string", format: "date-time" },
853
+ label_requirements: {
854
+ type: "object",
855
+ required: ["keep", "stop"],
856
+ properties: {
857
+ keep: { type: "array", items: { type: "string" } },
858
+ stop: { type: "array", items: { type: "string" } },
859
+ },
860
+ },
861
+ artifact_paths: {
862
+ type: "object",
863
+ required: ["results", "state"],
864
+ properties: {
865
+ results: { type: "string" },
866
+ state: { type: "string" },
867
+ },
868
+ },
869
+ stats: {
870
+ type: "object",
871
+ required: ["total_iterations", "kept", "discarded", "needs_human"],
872
+ properties: {
873
+ total_iterations: { type: "number" },
874
+ kept: { type: "number" },
875
+ discarded: { type: "number" },
876
+ needs_human: { type: "number" },
877
+ consecutive_discards: { type: "number" },
878
+ best_iteration: { type: "number" },
879
+ debug_depth: { type: "number" },
880
+ },
881
+ },
882
+ flags: {
883
+ type: "object",
884
+ required: ["stop_requested", "needs_human", "background_active", "stop_ready"],
885
+ properties: {
886
+ stop_requested: { type: "boolean" },
887
+ needs_human: { type: "boolean" },
888
+ background_active: { type: "boolean" },
889
+ stop_ready: { type: "boolean" },
890
+ },
891
+ },
892
+ last_iteration: {
893
+ type: "object",
894
+ properties: {
895
+ iteration: { type: "number" },
896
+ decision: { type: "string", enum: ["keep", "discard", "needs_human"] },
897
+ metric_value: { type: "string" },
898
+ change_summary: { type: "string" },
899
+ labels: { type: "array", items: { type: "string" } },
900
+ timestamp: { type: "string", format: "date-time" },
901
+ },
902
+ },
903
+ draft_pool: { type: "object", description: "Draft pool configuration" },
904
+ lineage: { type: "object", description: "Experiment lineage" },
905
+ budget_exhausted: { type: "boolean" },
906
+ budget_blocker_reason: { type: "string" },
907
+ },
908
+ },
909
+ result_row: {
910
+ type: "object",
911
+ description: "Single iteration result row in TSV format",
912
+ properties: {
913
+ iteration: { type: "number" },
914
+ decision: { type: "string" },
915
+ metric_value: { type: "string" },
916
+ verify_status: { type: "string" },
917
+ guard_status: { type: "string" },
918
+ change_summary: { type: "string" },
919
+ labels: { type: "array", items: { type: "string" } },
920
+ timestamp: { type: "string" },
921
+ note: { type: "string" },
922
+ },
923
+ },
924
+ goal_doc: {
925
+ type: "object",
926
+ required: ["goal", "metric", "direction", "verify"],
927
+ properties: {
928
+ goal: { type: "string" },
929
+ metric: { type: "string" },
930
+ direction: { type: "string", enum: ["higher", "lower"] },
931
+ verify: { type: "string" },
932
+ guard: { type: "string" },
933
+ constraints: { type: "string" },
934
+ file_map: { type: "string" },
935
+ stop_conditions: { type: "string" },
936
+ },
937
+ },
938
+ };
939
+ if (useJson) {
940
+ printJsonEnvelope("contract", schemas);
941
+ break;
942
+ }
943
+ console.log("Auto Research Contract Schemas");
944
+ console.log("==============================");
945
+ console.log("");
946
+ console.log("State Schema:");
947
+ console.log(` Version: ${schemas.state.properties.schema_version.type}`);
948
+ console.log(` Required: ${schemas.state.required.join(", ")}`);
949
+ console.log("");
950
+ console.log("Result Row Schema:");
951
+ console.log(` Properties: ${Object.keys(schemas.result_row.properties).join(", ")}`);
952
+ console.log("");
953
+ console.log("Goal Doc Schema:");
954
+ console.log(` Required: ${schemas.goal_doc.required.join(", ")}`);
955
+ console.log("");
956
+ console.log("Use --json for full machine-readable schema output.");
957
+ break;
958
+ }
722
959
  case "summary": {
723
960
  const { resolvePath } = await import("./helpers.js");
724
961
  const { RESULTS_DEFAULT } = await import("./constants.js");
@@ -746,7 +983,7 @@ const main = async () => {
746
983
  runIds.add(iterTags[0]);
747
984
  }
748
985
  if (useJson) {
749
- printJson({
986
+ printJsonEnvelope("summary", {
750
987
  total_records: records.length,
751
988
  total_kept: totalKept,
752
989
  total_discarded: totalDiscarded,
@@ -789,7 +1026,7 @@ const main = async () => {
789
1026
  if (!grouped.verify)
790
1027
  errors.push("Missing required: --verify");
791
1028
  if (useJson) {
792
- printJson({ valid: errors.length === 0, errors });
1029
+ printJsonEnvelope("validate", { valid: errors.length === 0, errors });
793
1030
  return errors.length > 0 ? 1 : 0;
794
1031
  }
795
1032
  if (errors.length === 0) {
@@ -827,7 +1064,7 @@ const main = async () => {
827
1064
  results = resultLines.slice(1).filter(Boolean);
828
1065
  }
829
1066
  if (useJson) {
830
- printJson({ state, results_count: results.length });
1067
+ printJsonEnvelope("report", { state, results_count: results.length });
831
1068
  break;
832
1069
  }
833
1070
  console.log(`# Auto Research Report`);
@@ -861,6 +1098,40 @@ const main = async () => {
861
1098
  }
862
1099
  }
863
1100
  }
1101
+ // Milestone Progress
1102
+ console.log(`\n## Milestone Progress`);
1103
+ if (state.stats) {
1104
+ const s = state.stats;
1105
+ const total = s.total_iterations;
1106
+ const successRate = total > 0 ? ((s.kept / total) * 100).toFixed(1) : "0";
1107
+ console.log(`- **Progress:** ${formatMarkdownField(s.kept)} kept / ${formatMarkdownField(total)} total iterations (${formatMarkdownField(successRate)}% success rate)`);
1108
+ if (state.iterations_cap) {
1109
+ const progressPct = ((total / state.iterations_cap) * 100).toFixed(1);
1110
+ console.log(`- **Cap:** ${formatMarkdownField(total)} / ${formatMarkdownField(state.iterations_cap)} iterations (${formatMarkdownField(progressPct)}% of cap)`);
1111
+ }
1112
+ if (state.created_at) {
1113
+ const startedAtMs = Date.parse(state.created_at);
1114
+ const endedAtMs = state.updated_at ? Date.parse(state.updated_at) : Date.now();
1115
+ if (!Number.isNaN(startedAtMs) && !Number.isNaN(endedAtMs) && endedAtMs >= startedAtMs) {
1116
+ const elapsedMin = Math.round((endedAtMs - startedAtMs) / 1000 / 60);
1117
+ console.log(`- **Elapsed:** ${formatMarkdownField(elapsedMin)} minutes`);
1118
+ }
1119
+ }
1120
+ // Next candidate
1121
+ if (state.last_iteration && state.last_iteration.decision === "keep") {
1122
+ console.log(`- **Next candidate:** Iteration ${formatMarkdownField(state.last_iteration.iteration)} (kept)`);
1123
+ }
1124
+ else if (s.best_iteration) {
1125
+ console.log(`- **Best candidate:** Iteration ${formatMarkdownField(s.best_iteration)}`);
1126
+ }
1127
+ }
1128
+ // Artifact pointers
1129
+ console.log(`\n## Artifacts`);
1130
+ console.log(`- State: ${formatMarkdownField(state.artifact_paths?.state || ".autoresearch/state.json")}`);
1131
+ console.log(`- Results: ${formatMarkdownField(state.artifact_paths?.results || "autoresearch-results.tsv")}`);
1132
+ if (grouped.repo) {
1133
+ console.log(`- Repository: ${formatMarkdownField(grouped.repo)}`);
1134
+ }
864
1135
  // Failed branches information
865
1136
  if (state.draft_pool && state.draft_pool.active_drafts) {
866
1137
  const failedBranches = state.draft_pool.active_drafts.filter(draft => draft.status === "discarded");
@@ -920,6 +1191,37 @@ const main = async () => {
920
1191
  break;
921
1192
  }
922
1193
  case "suggest": {
1194
+ const evidenceGated = grouped["evidence"] === "true";
1195
+ if (evidenceGated) {
1196
+ const { generateIssueCandidate } = await import("./evidence.js");
1197
+ const candidate = generateIssueCandidate(grouped.repo, grouped.goal, grouped.metric, grouped.verify, grouped["score-history-path"]);
1198
+ if (!candidate) {
1199
+ if (useJson) {
1200
+ printJsonEnvelope("suggest", { candidates: [], reason: "insufficient_evidence" });
1201
+ }
1202
+ else {
1203
+ console.log("No evidence-gated issue candidates found.");
1204
+ console.log("Insufficient failure clusters or score history not available.");
1205
+ }
1206
+ break;
1207
+ }
1208
+ if (useJson) {
1209
+ printJsonEnvelope("suggest", { candidates: [candidate], evidence_gated: true });
1210
+ }
1211
+ else {
1212
+ console.log(`Evidence-Gated Issue Candidate:`);
1213
+ console.log(` Title: ${candidate.title}`);
1214
+ console.log(` Goal: ${candidate.goal}`);
1215
+ console.log(` Metric: ${candidate.metric}`);
1216
+ console.log(` Evidence: ${candidate.evidence.total_discards} discards in ${candidate.evidence.total_runs} cluster(s)`);
1217
+ console.log(``);
1218
+ console.log(` Suggested command:`);
1219
+ console.log(` ${candidate.suggest_command}`);
1220
+ console.log(``);
1221
+ console.log(`Review before opening. This candidate is NOT auto-submitted.`);
1222
+ }
1223
+ break;
1224
+ }
923
1225
  const { resolvePath } = await import("./helpers.js");
924
1226
  const { MEMORY_DEFAULT } = await import("./constants.js");
925
1227
  const memoryPath = resolvePath(grouped.repo, grouped["memory-path"], MEMORY_DEFAULT);
@@ -928,10 +1230,10 @@ const main = async () => {
928
1230
  break;
929
1231
  }
930
1232
  const memory = readFileSync(memoryPath, "utf-8");
931
- const patterns = memory.match(/### Pattern: [^\n]+/g) ?? [];
1233
+ const patterns = memory.match(/^### Pattern: [^\n]+/gm) ?? [];
932
1234
  const suggestions = patterns.map(parseMemoryPatternHeading);
933
1235
  if (useJson) {
934
- printJson({ patterns_found: suggestions.length, suggestions });
1236
+ printJsonEnvelope("suggest", { patterns_found: suggestions.length, suggestions });
935
1237
  break;
936
1238
  }
937
1239
  console.log("Memory Patterns — candidate next goals:");
@@ -974,7 +1276,7 @@ const main = async () => {
974
1276
  },
975
1277
  };
976
1278
  if (format === "json") {
977
- console.log(JSON.stringify(exportData, null, 2));
1279
+ printJsonEnvelope("export", exportData);
978
1280
  }
979
1281
  else if (format === "md" || format === "markdown") {
980
1282
  console.log(`# Auto Research Export`);
@@ -1001,7 +1303,7 @@ const main = async () => {
1001
1303
  case "completion": {
1002
1304
  const shell = grouped.shell || "bash";
1003
1305
  const commands = ["init", "goal", "wizard", "status", "explain", "history", "config", "summary", "suggest", "launch", "complete", "stop", "resume", "record", "doctor", "export", "completion", "help"];
1004
- const options = ["--repo", "--goal", "--metric", "--direction", "--verify", "--guard", "--mode", "--scope", "--iterations", "--duration", "--num-drafts", "--branch-policy", "--json", "--results-path", "--state-path", "--fresh-start", "--memory-path", "--format", "--shell", "--goal-path", "--template"];
1306
+ const options = ["--repo", "--goal", "--metric", "--direction", "--verify", "--guard", "--mode", "--scope", "--iterations", "--duration", "--num-drafts", "--branch-policy", "--branch-policy-overrides", "--json", "--results-path", "--state-path", "--fresh-start", "--memory-path", "--format", "--shell", "--goal-path", "--template"];
1005
1307
  if (shell === "bash" || shell === "zsh") {
1006
1308
  console.log(`# Auto Research CLI completion for ${shell}`);
1007
1309
  console.log(`_autoresearch() {`);
@@ -1056,6 +1358,7 @@ const main = async () => {
1056
1358
  baseline: grouped.baseline,
1057
1359
  num_drafts: parsePositiveInt(grouped["num-drafts"], "num_drafts", { max: MAX_DRAFTS }) ?? 1,
1058
1360
  branch_selection_policy: normalizeBranchPolicy(grouped["branch-policy"]),
1361
+ branch_policy_overrides: parseBranchPolicyOverrides(grouped["branch-policy-overrides"]),
1059
1362
  outcome_metric: grouped["outcome-metric"],
1060
1363
  outcome_direction: grouped["outcome-direction"],
1061
1364
  instrument_metric: grouped["instrument-metric"],
@@ -1071,7 +1374,7 @@ const main = async () => {
1071
1374
  const { writeFileSync } = await import("fs");
1072
1375
  const state = await initializeRun(grouped.repo, grouped["results-path"], grouped["state-path"], config, grouped["fresh-start"] === "true");
1073
1376
  writeFileSync(launchPath, JSON.stringify({ run_id: state.run_id, goal: state.goal, mode: "background" }, null, 2) + "\n", "utf-8");
1074
- printJson({ status: "launched", run_id: state.run_id, launch_path: launchPath });
1377
+ printJsonEnvelope("launch", { status: "launched", run_id: state.run_id, launch_path: launchPath });
1075
1378
  break;
1076
1379
  }
1077
1380
  case "complete": {
@@ -1081,7 +1384,7 @@ const main = async () => {
1081
1384
  }
1082
1385
  const { completeRun } = await import("./run-manager.js");
1083
1386
  const state = await completeRun(grouped.repo, grouped["state-path"]);
1084
- printJson({ status: "completed", run_id: state.run_id });
1387
+ printJsonEnvelope("complete", { status: "completed", run_id: state.run_id });
1085
1388
  break;
1086
1389
  }
1087
1390
  case "stop": {
@@ -1091,7 +1394,7 @@ const main = async () => {
1091
1394
  }
1092
1395
  const { setStopRequested } = await import("./run-manager.js");
1093
1396
  const state = await setStopRequested(grouped.repo, grouped["state-path"]);
1094
- printJson({ status: "stop_requested", run_id: state.run_id });
1397
+ printJsonEnvelope("stop", { status: "stop_requested", run_id: state.run_id });
1095
1398
  break;
1096
1399
  }
1097
1400
  case "resume": {
@@ -1101,7 +1404,7 @@ const main = async () => {
1101
1404
  }
1102
1405
  const { resumeBackgroundRun } = await import("./run-manager.js");
1103
1406
  const state = await resumeBackgroundRun(grouped.repo, grouped["state-path"]);
1104
- printJson({ status: "resumed", run_id: state.run_id });
1407
+ printJsonEnvelope("resume", { status: "resumed", run_id: state.run_id });
1105
1408
  break;
1106
1409
  }
1107
1410
  case "record": {
@@ -1144,7 +1447,7 @@ const main = async () => {
1144
1447
  }
1145
1448
  const { appendIteration } = await import("./run-manager.js");
1146
1449
  const state = await appendIteration(grouped.repo, grouped["results-path"], grouped["state-path"], grouped.decision, grouped["metric-value"], grouped["instrument-value"], normalizeResultStatus(vs, "verify_status"), normalizeResultStatus(gs, "guard_status"), grouped.hypothesis, grouped["change-summary"], grouped.labels ? (Array.isArray(grouped.labels) ? grouped.labels : [grouped.labels]) : undefined, grouped.note, iteration, undefined, scorerStatus, scoreComponents);
1147
- printJson(state);
1450
+ printJsonEnvelope("record", state);
1148
1451
  break;
1149
1452
  }
1150
1453
  case "digest": {
@@ -1155,7 +1458,7 @@ const main = async () => {
1155
1458
  const { buildRunDigest } = await import("./run-manager.js");
1156
1459
  const digest = await buildRunDigest(grouped.repo, grouped["results-path"], grouped["state-path"]);
1157
1460
  if (useJson) {
1158
- printJson(digest);
1461
+ printJsonEnvelope("digest", digest);
1159
1462
  }
1160
1463
  else {
1161
1464
  console.log(`# Auto Research Digest`);
@@ -1199,7 +1502,7 @@ const main = async () => {
1199
1502
  if (digest.flags && Object.keys(digest.flags).length > 0) {
1200
1503
  console.log(`\n## Flags`);
1201
1504
  for (const [key, value] of Object.entries(digest.flags)) {
1202
- console.log(`- ${key}: ${formatMarkdownField(value)}`);
1505
+ console.log(`- ${formatMarkdownField(key)}: ${formatMarkdownField(value)}`);
1203
1506
  }
1204
1507
  }
1205
1508
  }
@@ -1225,6 +1528,7 @@ const main = async () => {
1225
1528
  const installedPath = getInstalledPackagePath(PACKAGE_NAME);
1226
1529
  const installedInfo = installedPath ? getInstalledPackageInfo(PACKAGE_NAME) : null;
1227
1530
  const updateCache = readUpdateCache();
1531
+ const { skip: updateSkipped, reason: skipReason } = shouldSkipUpdateCheck(process.argv.slice(2));
1228
1532
  const updateStatus = {
1229
1533
  cache_exists: updateCache !== null,
1230
1534
  last_check: updateCache?.last_check || null,
@@ -1232,9 +1536,13 @@ const main = async () => {
1232
1536
  latest_version: updateCache?.latest_version || null,
1233
1537
  update_available: updateCache?.update_available || false,
1234
1538
  update_disabled: process.env.AUTORESEARCH_NO_UPDATE === "1",
1539
+ skipped: updateSkipped,
1540
+ skip_reason: skipReason,
1235
1541
  };
1236
1542
  if (useJson) {
1237
- printJson({
1543
+ const { getWhatsNew } = await import("./whats-new.js");
1544
+ const wn = getWhatsNew(base);
1545
+ printJsonEnvelope("doctor", {
1238
1546
  version: VERSION,
1239
1547
  skill_name: SKILL_NAME,
1240
1548
  runtime: `Node.js ${process.version}`,
@@ -1249,6 +1557,7 @@ const main = async () => {
1249
1557
  update: updateStatus,
1250
1558
  checks: checks,
1251
1559
  checks_passed: checks.filter((c) => !c.ok).length === 0,
1560
+ whats_new: wn ? { features: wn.features, fixes: wn.fixes } : null,
1252
1561
  });
1253
1562
  break;
1254
1563
  }
@@ -1269,7 +1578,10 @@ const main = async () => {
1269
1578
  }
1270
1579
  console.log("");
1271
1580
  console.log("Update:");
1272
- if (updateCache) {
1581
+ if (updateSkipped) {
1582
+ console.log(` Skipped: yes (${skipReason})`);
1583
+ }
1584
+ else if (updateCache) {
1273
1585
  console.log(` Last check: ${updateCache.last_check}`);
1274
1586
  console.log(` Current: ${updateCache.current_version}`);
1275
1587
  console.log(` Latest: ${updateCache.latest_version}`);
@@ -1294,6 +1606,20 @@ const main = async () => {
1294
1606
  return 1;
1295
1607
  }
1296
1608
  console.log(`\nAll ${checks.length} checks passed.`);
1609
+ const showWhatsNew = grouped["whats-new"] === "true";
1610
+ if (showWhatsNew || useJson) {
1611
+ const { getWhatsNew, formatWhatsNew } = await import("./whats-new.js");
1612
+ const wn = getWhatsNew(base);
1613
+ if (wn) {
1614
+ if (useJson) {
1615
+ // Already displayed in JSON envelope — add it for next re-entry
1616
+ }
1617
+ else {
1618
+ console.log("");
1619
+ console.log(formatWhatsNew(wn));
1620
+ }
1621
+ }
1622
+ }
1297
1623
  break;
1298
1624
  }
1299
1625
  case "goal": {
@@ -1336,7 +1662,7 @@ const main = async () => {
1336
1662
  }
1337
1663
  const doc = readGoalDoc(goalPath);
1338
1664
  if (useJson) {
1339
- printJson(doc);
1665
+ printJsonEnvelope("goal", doc);
1340
1666
  break;
1341
1667
  }
1342
1668
  console.log(`Goal: ${formatDisplayValue(doc.goal)}`);
@@ -1456,7 +1782,7 @@ const main = async () => {
1456
1782
  const result = buildGoalInitResult(goalPath, config, !hasRequiredFlags && isTTY);
1457
1783
  if (isGoalDryRun) {
1458
1784
  if (useGoalJson) {
1459
- printJson({ ...result, dry_run: true });
1785
+ printJsonEnvelope("goal", { ...result, dry_run: true });
1460
1786
  }
1461
1787
  else {
1462
1788
  console.log("[dry-run] Would write goal document to: " + goalPath);
@@ -1472,7 +1798,7 @@ const main = async () => {
1472
1798
  }
1473
1799
  atomicWriteTextInRepo(goalGrouped.repo, goalPath, document);
1474
1800
  if (useGoalJson) {
1475
- printJson(result);
1801
+ printJsonEnvelope("goal", result);
1476
1802
  }
1477
1803
  else {
1478
1804
  console.log(`✓ Goal definition written to ${goalPath}`);
@@ -1487,6 +1813,181 @@ const main = async () => {
1487
1813
  }
1488
1814
  break;
1489
1815
  }
1816
+ case "queue": {
1817
+ const subCmd = cmdArgs[0] || "list";
1818
+ if (subCmd === "help") {
1819
+ console.error("Usage: autoresearch queue <subcommand> [options]");
1820
+ console.error("");
1821
+ console.error("Subcommands:");
1822
+ console.error(" list List tasks in the queue (default)");
1823
+ console.error(" enqueue Enqueue a new task");
1824
+ console.error(" clean Remove completed and failed tasks");
1825
+ break;
1826
+ }
1827
+ if (subCmd === "enqueue") {
1828
+ if (!grouped.goal || !grouped.metric || !grouped.verify) {
1829
+ console.error("--goal, --metric, and --verify are required for enqueue");
1830
+ return 1;
1831
+ }
1832
+ const { enqueueTasks } = await import("./task-queue.js");
1833
+ const tasks = await enqueueTasks(grouped.repo, [{ goal: grouped.goal, metric: grouped.metric, verify: grouped.verify }]);
1834
+ if (useJson) {
1835
+ printJson({ enqueued: tasks });
1836
+ }
1837
+ else {
1838
+ for (const t of tasks) {
1839
+ console.log(`Enqueued: ${t.id} - ${t.goal}`);
1840
+ }
1841
+ }
1842
+ break;
1843
+ }
1844
+ if (subCmd === "clean") {
1845
+ const { listTasks, writeManifest, resolveQueuePath } = await import("./task-queue.js");
1846
+ const queuePath = resolveQueuePath(grouped.repo);
1847
+ const manifest = await listTasks(grouped.repo);
1848
+ const before = manifest.tasks.length;
1849
+ manifest.tasks = manifest.tasks.filter((t) => t.status === "pending" || t.status === "leased");
1850
+ manifest.updated_at = new Date().toISOString();
1851
+ const removed = before - manifest.tasks.length;
1852
+ await writeManifest(queuePath, manifest);
1853
+ if (useJson) {
1854
+ printJson({ removed });
1855
+ }
1856
+ else {
1857
+ console.log(`Cleaned ${removed} completed/failed tasks. ${manifest.tasks.length} remain.`);
1858
+ }
1859
+ break;
1860
+ }
1861
+ const { listTasks } = await import("./task-queue.js");
1862
+ const manifest = await listTasks(grouped.repo);
1863
+ if (useJson) {
1864
+ printJson(manifest);
1865
+ }
1866
+ else {
1867
+ if (manifest.tasks.length === 0) {
1868
+ console.log("No tasks in queue.");
1869
+ }
1870
+ else {
1871
+ console.log(`Task Queue (${manifest.tasks.length} tasks):`);
1872
+ for (const task of manifest.tasks) {
1873
+ const icon = task.status === "completed" ? "v" : task.status === "failed" ? "x" : task.status === "leased" ? ">" : "*";
1874
+ console.log(` ${icon} ${task.id} [${task.status}] ${task.goal}`);
1875
+ }
1876
+ }
1877
+ }
1878
+ break;
1879
+ }
1880
+ case "pack": {
1881
+ const subCmd = cmdArgs[0] || "help";
1882
+ if (subCmd === "help" || (subCmd !== "export" && subCmd !== "list" && subCmd !== "inspect")) {
1883
+ console.error("Usage: autoresearch pack <subcommand> [options]");
1884
+ console.error("Subcommands:");
1885
+ console.error(" export Export validated run as a strategy pack");
1886
+ console.error(" list List available strategy packs");
1887
+ console.error(" inspect View a specific strategy pack");
1888
+ break;
1889
+ }
1890
+ if (subCmd === "export") {
1891
+ const { exportPack } = await import("./strategy-pack.js");
1892
+ const result = exportPack(grouped.repo, grouped["state-path"], grouped["goal-path"]);
1893
+ if (!result) {
1894
+ console.error("No run state found. Complete a run first.");
1895
+ return 1;
1896
+ }
1897
+ if (useJson) {
1898
+ printJson({ exported: result.path, pack: result.pack });
1899
+ }
1900
+ else {
1901
+ console.log(`Strategy pack exported: ${result.path}`);
1902
+ console.log(` Goal: ${result.pack.goal}`);
1903
+ console.log(` Metric: ${result.pack.metric}`);
1904
+ console.log(` Success: ${result.pack.evidence.success_rate}`);
1905
+ }
1906
+ break;
1907
+ }
1908
+ if (subCmd === "list") {
1909
+ const { listPacks } = await import("./strategy-pack.js");
1910
+ const packs = listPacks(grouped.repo);
1911
+ if (useJson) {
1912
+ printJson({ packs });
1913
+ }
1914
+ else if (packs.length === 0) {
1915
+ console.log("No strategy packs found.");
1916
+ }
1917
+ else {
1918
+ console.log(`Strategy Packs (${packs.length}):`);
1919
+ for (const p of packs)
1920
+ console.log(` ${p.name}`);
1921
+ }
1922
+ break;
1923
+ }
1924
+ if (subCmd === "inspect") {
1925
+ const name = cmdArgs[1];
1926
+ if (!name) {
1927
+ console.error("Usage: autoresearch pack inspect <name>");
1928
+ return 1;
1929
+ }
1930
+ const { readPack } = await import("./strategy-pack.js");
1931
+ const content = readPack(grouped.repo, name);
1932
+ if (!content) {
1933
+ console.error(`Pack not found: ${name}`);
1934
+ return 1;
1935
+ }
1936
+ console.log(content);
1937
+ break;
1938
+ }
1939
+ break;
1940
+ }
1941
+ case "leaderboard": {
1942
+ const { generateLeaderboard, formatLeaderboardMarkdown, formatLeaderboardText } = await import("./leaderboard.js");
1943
+ const { resolveRepo } = await import("./helpers.js");
1944
+ const repo = resolveRepo(grouped.repo);
1945
+ const leaderboard = generateLeaderboard(repo);
1946
+ if (useJson) {
1947
+ printJson(leaderboard);
1948
+ break;
1949
+ }
1950
+ if (leaderboard.entries.length === 0) {
1951
+ console.log("No runs found. Complete some runs to see the leaderboard.");
1952
+ break;
1953
+ }
1954
+ if (grouped.format === "markdown") {
1955
+ console.log(formatLeaderboardMarkdown(leaderboard));
1956
+ }
1957
+ else {
1958
+ console.log(formatLeaderboardText(leaderboard));
1959
+ }
1960
+ break;
1961
+ }
1962
+ case "worker": {
1963
+ const { workerOnce } = await import("./worker.js");
1964
+ const once = grouped["once"] === "true";
1965
+ if (!once) {
1966
+ console.error("worker requires --once flag");
1967
+ console.error("Usage: autoresearch worker --once [--json] [--repo <path>]");
1968
+ return 1;
1969
+ }
1970
+ const result = workerOnce(grouped.repo, grouped["state-path"], grouped["results-path"]);
1971
+ if (useJson) {
1972
+ printJsonEnvelope("worker", result);
1973
+ }
1974
+ else {
1975
+ if (result.ready) {
1976
+ console.log(`✓ Ready for iteration ${result.iteration}`);
1977
+ console.log(` Run ID: ${result.run_id}`);
1978
+ console.log(` Status: ${result.status}`);
1979
+ console.log(` Goal: ${result.goal}`);
1980
+ if (result.metric)
1981
+ console.log(` Metric: ${result.metric}`);
1982
+ }
1983
+ else {
1984
+ console.log(`✗ Not ready: ${result.reason || "unknown"}`);
1985
+ console.log(` Run ID: ${result.run_id}`);
1986
+ console.log(` Iter: ${result.iteration}`);
1987
+ }
1988
+ }
1989
+ return result.ready ? 0 : 1;
1990
+ }
1490
1991
  default: {
1491
1992
  console.error(`Unknown command: ${cmd}`);
1492
1993
  console.error("Run 'autoresearch --help' for usage.");
@@ -1495,7 +1996,14 @@ const main = async () => {
1495
1996
  }
1496
1997
  }
1497
1998
  catch (exc) {
1498
- console.error(exc.message);
1999
+ const { categorizeError, formatStructuredError } = await import("./error-categories.js");
2000
+ const structured = categorizeError(exc);
2001
+ if (useJson) {
2002
+ console.error(formatStructuredError(structured, true));
2003
+ }
2004
+ else {
2005
+ console.error(formatStructuredError(structured, false));
2006
+ }
1499
2007
  return 2;
1500
2008
  }
1501
2009
  return 0;