majlis 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +2648 -1385
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -219,6 +219,36 @@ var init_migrations = __esm({
219
219
 
220
220
  ALTER TABLE dead_ends ADD COLUMN category TEXT DEFAULT 'structural'
221
221
  CHECK(category IN ('structural', 'procedural'));
222
+ `);
223
+ },
224
+ // Migration 005: v4 → v5 — Swarm tracking tables
225
+ (db) => {
226
+ db.exec(`
227
+ CREATE TABLE swarm_runs (
228
+ id INTEGER PRIMARY KEY,
229
+ goal TEXT NOT NULL,
230
+ parallel_count INTEGER NOT NULL,
231
+ status TEXT NOT NULL DEFAULT 'running'
232
+ CHECK(status IN ('running', 'completed', 'failed')),
233
+ total_cost_usd REAL DEFAULT 0,
234
+ best_experiment_slug TEXT,
235
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
236
+ completed_at DATETIME
237
+ );
238
+
239
+ CREATE TABLE swarm_members (
240
+ id INTEGER PRIMARY KEY,
241
+ swarm_run_id INTEGER REFERENCES swarm_runs(id),
242
+ experiment_slug TEXT NOT NULL,
243
+ worktree_path TEXT NOT NULL,
244
+ final_status TEXT,
245
+ overall_grade TEXT,
246
+ cost_usd REAL DEFAULT 0,
247
+ error TEXT,
248
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
249
+ );
250
+
251
+ CREATE INDEX idx_swarm_members_run ON swarm_members(swarm_run_id);
222
252
  `);
223
253
  }
224
254
  ];
@@ -273,6 +303,17 @@ function closeDb() {
273
303
  function resetDb() {
274
304
  _db = null;
275
305
  }
306
+ function openDbAt(projectRoot) {
307
+ const majlisDir = path.join(projectRoot, ".majlis");
308
+ if (!fs.existsSync(majlisDir)) {
309
+ fs.mkdirSync(majlisDir, { recursive: true });
310
+ }
311
+ const db = new import_better_sqlite3.default(path.join(majlisDir, "majlis.db"));
312
+ db.pragma("journal_mode = WAL");
313
+ db.pragma("foreign_keys = ON");
314
+ runMigrations(db);
315
+ return db;
316
+ }
276
317
  var import_better_sqlite3, path, fs, _db;
277
318
  var init_connection = __esm({
278
319
  "src/db/connection.ts"() {
@@ -469,7 +510,8 @@ ${cmd.body}
469
510
  "verification",
470
511
  "reframes",
471
512
  "rihla",
472
- "synthesis"
513
+ "synthesis",
514
+ "diagnosis"
473
515
  ];
474
516
  for (const dir of docDirs) {
475
517
  mkdirSafe(path2.join(docsDir, dir));
@@ -575,6 +617,10 @@ Read as much code as you need to understand the problem. Reading is free \u2014
575
617
  as many turns as necessary on Read, Grep, and Glob to build full context before
576
618
  you touch anything.
577
619
 
620
+ Do NOT read raw data files (fixtures/, ground truth JSON/STL). The synthesis
621
+ has the relevant facts. Reading raw data wastes turns re-deriving what the
622
+ doubt/challenge/verify cycle already established.
623
+
578
624
  ## The Rule: ONE Change, Then Document
579
625
 
580
626
  You make ONE code change per cycle. Not two, not "one more quick fix." ONE.
@@ -738,6 +784,9 @@ If the builder claims improvement but the framework metrics show regression, fla
738
784
  - Run at most 3-5 targeted diagnostic scripts, focused on the critical doubts/challenges.
739
785
  - Do NOT run exhaustive diagnostics on every claim.
740
786
 
787
+ Framework-captured metrics are ground truth \u2014 if they show regression, that
788
+ alone justifies a "rejected" grade. Do not re-derive from raw fixture data.
789
+
741
790
  Grade each component: sound / good / weak / rejected
742
791
  Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
743
792
 
@@ -817,6 +866,13 @@ the database export.
817
866
  The framework does NOT auto-save your output for these files.
818
867
  7. Review classification: new sub-types? resolved sub-types?
819
868
 
869
+ You may ONLY write to these three files:
870
+ - docs/synthesis/current.md
871
+ - docs/synthesis/fragility.md
872
+ - docs/synthesis/dead-ends.md
873
+
874
+ Do NOT modify MEMORY.md, .claude/, classification/, experiments/, or any other paths.
875
+
820
876
  You may NOT write code, make decisions, or run experiments.
821
877
 
822
878
  ## Structured Output Format
@@ -909,7 +965,74 @@ gate_decision:
909
965
  "stale_references": ["list of stale references found, if any"],
910
966
  "overlapping_dead_ends": [0]
911
967
  }
912
- -->`
968
+ -->`,
969
+ diagnostician: `---
970
+ name: diagnostician
971
+ model: opus
972
+ tools: [Read, Write, Bash, Glob, Grep, WebSearch]
973
+ ---
974
+ You are the Diagnostician. You perform deep project-wide analysis.
975
+
976
+ You have the highest turn budget of any agent. Use it for depth, not breadth.
977
+ Your job is pure insight \u2014 you do NOT fix code, you do NOT build, you do NOT
978
+ make decisions. You diagnose.
979
+
980
+ ## What You Receive
981
+ - Full database export: every experiment, decision, doubt, challenge, verification,
982
+ dead-end, metric, and compression across the entire project history
983
+ - Current synthesis, fragility map, and dead-end registry
984
+ - Full read access to the entire project codebase
985
+ - Bash access to run tests, profiling, git archaeology, and analysis scripts
986
+
987
+ ## What You Can Do
988
+ 1. **Read everything** \u2014 source code, docs, git history, test output
989
+ 2. **Run analysis** \u2014 execute tests, profilers, git log/blame/bisect, custom scripts
990
+ 3. **Write analysis scripts** \u2014 you may write scripts ONLY to \`.majlis/scripts/\`
991
+ 4. **Search externally** \u2014 WebSearch for patterns, known issues, relevant techniques
992
+
993
+ ## What You CANNOT Do
994
+ - Modify any project files outside \`.majlis/scripts/\`
995
+ - Make code changes, fixes, or patches
996
+ - Create experiments or make decisions
997
+ - Write to docs/, src/, or any other project directory
998
+
999
+ ## Your Approach
1000
+
1001
+ Phase 1: Orientation (turns 1-10)
1002
+ - Read the full database export in your context
1003
+ - Read synthesis, fragility, dead-ends
1004
+ - Identify patterns: recurring failures, unresolved doubts, evidence gaps
1005
+
1006
+ Phase 2: Deep Investigation (turns 11-40)
1007
+ - Read source code at critical points identified in Phase 1
1008
+ - Run targeted tests, profiling, git archaeology
1009
+ - Write and execute analysis scripts in .majlis/scripts/
1010
+ - Cross-reference findings across experiments
1011
+
1012
+ Phase 3: Synthesis (turns 41-60)
1013
+ - Compile findings into a diagnostic report
1014
+ - Identify root causes, not symptoms
1015
+ - Rank issues by structural impact
1016
+ - Suggest investigation directions (not fixes)
1017
+
1018
+ ## Output Format
1019
+ Produce a diagnostic report as markdown. At the end, include:
1020
+
1021
+ <!-- majlis-json
1022
+ {
1023
+ "diagnosis": {
1024
+ "root_causes": ["List of identified root causes"],
1025
+ "patterns": ["Recurring patterns across experiments"],
1026
+ "evidence_gaps": ["What we don't know but should"],
1027
+ "investigation_directions": ["Suggested directions for next experiments"]
1028
+ }
1029
+ }
1030
+ -->
1031
+
1032
+ ## Safety Reminders
1033
+ - You are READ-ONLY for project code. Write ONLY to .majlis/scripts/.
1034
+ - Focus on diagnosis, not fixing. Your value is insight, not implementation.
1035
+ - Trust the database export over docs/ files when they conflict.`
913
1036
  };
914
1037
  SLASH_COMMANDS = {
915
1038
  classify: {
@@ -969,6 +1092,14 @@ Produce a rihla document at docs/rihla/.`
969
1092
  If the CLI is not installed, review: original objective, current classification,
970
1093
  recent failures, dead-ends. Ask: is the classification serving the objective?
971
1094
  Would we decompose differently with what we now know?`
1095
+ },
1096
+ diagnose: {
1097
+ description: "Deep project-wide diagnostic analysis",
1098
+ body: `Run \`majlis diagnose $ARGUMENTS\` for deep diagnosis.
1099
+ If the CLI is not installed, perform a deep diagnostic analysis.
1100
+ Read docs/synthesis/current.md, fragility.md, dead-ends.md, and all experiments.
1101
+ Identify root causes, recurring patterns, evidence gaps, and investigation directions.
1102
+ Do NOT modify project code \u2014 analysis only.`
972
1103
  }
973
1104
  };
974
1105
  HOOKS_CONFIG = {
@@ -1567,6 +1698,29 @@ function insertFinding(db, experimentId, approach, source, relevance, contradict
1567
1698
  VALUES (?, ?, ?, ?, ?)
1568
1699
  `).run(experimentId, approach, source, relevance, contradictsCurrent ? 1 : 0);
1569
1700
  }
1701
+ function createSwarmRun(db, goal, parallelCount) {
1702
+ const result = db.prepare(`
1703
+ INSERT INTO swarm_runs (goal, parallel_count) VALUES (?, ?)
1704
+ `).run(goal, parallelCount);
1705
+ return { id: result.lastInsertRowid };
1706
+ }
1707
+ function updateSwarmRun(db, id, status2, totalCostUsd, bestSlug) {
1708
+ db.prepare(`
1709
+ UPDATE swarm_runs SET status = ?, total_cost_usd = ?, best_experiment_slug = ?,
1710
+ completed_at = CURRENT_TIMESTAMP WHERE id = ?
1711
+ `).run(status2, totalCostUsd, bestSlug, id);
1712
+ }
1713
+ function addSwarmMember(db, swarmRunId, slug, worktreePath) {
1714
+ db.prepare(`
1715
+ INSERT INTO swarm_members (swarm_run_id, experiment_slug, worktree_path) VALUES (?, ?, ?)
1716
+ `).run(swarmRunId, slug, worktreePath);
1717
+ }
1718
+ function updateSwarmMember(db, swarmRunId, slug, finalStatus, overallGrade, costUsd, error) {
1719
+ db.prepare(`
1720
+ UPDATE swarm_members SET final_status = ?, overall_grade = ?, cost_usd = ?, error = ?
1721
+ WHERE swarm_run_id = ? AND experiment_slug = ?
1722
+ `).run(finalStatus, overallGrade, costUsd, error, swarmRunId, slug);
1723
+ }
1570
1724
  function exportForCompressor(db, maxLength = 3e4) {
1571
1725
  const experiments = listAllExperiments(db);
1572
1726
  const sections = ["# Structured Data Export (from SQLite)\n"];
@@ -1629,6 +1783,79 @@ function exportForCompressor(db, maxLength = 3e4) {
1629
1783
  if (full.length > maxLength) {
1630
1784
  return full.slice(0, maxLength) + `
1631
1785
 
1786
+ [TRUNCATED \u2014 full export was ${full.length} chars]`;
1787
+ }
1788
+ return full;
1789
+ }
1790
+ function exportForDiagnostician(db, maxLength = 6e4) {
1791
+ const base = exportForCompressor(db, maxLength);
1792
+ const sections = [base];
1793
+ const metrics = db.prepare(`
1794
+ SELECT m.*, e.slug FROM metrics m
1795
+ JOIN experiments e ON m.experiment_id = e.id
1796
+ ORDER BY m.captured_at
1797
+ `).all();
1798
+ if (metrics.length > 0) {
1799
+ sections.push("\n## Metric History (all experiments)");
1800
+ for (const m of metrics) {
1801
+ sections.push(`- ${m.slug} [${m.phase}] ${m.fixture}/${m.metric_name}: ${m.metric_value}`);
1802
+ }
1803
+ }
1804
+ const sessions = db.prepare("SELECT * FROM sessions ORDER BY started_at").all();
1805
+ if (sessions.length > 0) {
1806
+ sections.push("\n## Session History");
1807
+ for (const s of sessions) {
1808
+ sections.push(`- #${s.id}: "${s.intent}" (${s.ended_at ? "ended" : "active"})`);
1809
+ if (s.accomplished) sections.push(` accomplished: ${s.accomplished}`);
1810
+ if (s.unfinished) sections.push(` unfinished: ${s.unfinished}`);
1811
+ if (s.new_fragility) sections.push(` fragility: ${s.new_fragility}`);
1812
+ }
1813
+ }
1814
+ const compressions = db.prepare("SELECT * FROM compressions ORDER BY created_at").all();
1815
+ if (compressions.length > 0) {
1816
+ sections.push("\n## Compression History");
1817
+ for (const c of compressions) {
1818
+ sections.push(`- #${c.id}: ${c.synthesis_size_before}B \u2192 ${c.synthesis_size_after}B (${c.session_count_since_last} sessions)`);
1819
+ }
1820
+ }
1821
+ try {
1822
+ const swarmRuns = db.prepare("SELECT * FROM swarm_runs ORDER BY created_at").all();
1823
+ if (swarmRuns.length > 0) {
1824
+ sections.push("\n## Swarm History");
1825
+ for (const sr of swarmRuns) {
1826
+ sections.push(`- #${sr.id}: "${sr.goal}" (${sr.status}, best: ${sr.best_experiment_slug ?? "none"})`);
1827
+ }
1828
+ }
1829
+ } catch {
1830
+ }
1831
+ const reframes = db.prepare(`
1832
+ SELECT r.*, e.slug FROM reframes r
1833
+ JOIN experiments e ON r.experiment_id = e.id
1834
+ ORDER BY r.created_at
1835
+ `).all();
1836
+ if (reframes.length > 0) {
1837
+ sections.push("\n## Reframe History");
1838
+ for (const r of reframes) {
1839
+ const decomp = String(r.decomposition ?? "").slice(0, 200);
1840
+ sections.push(`- ${r.slug}: ${decomp}`);
1841
+ if (r.recommendation) sections.push(` recommendation: ${String(r.recommendation).slice(0, 200)}`);
1842
+ }
1843
+ }
1844
+ const findings = db.prepare(`
1845
+ SELECT f.*, e.slug FROM findings f
1846
+ JOIN experiments e ON f.experiment_id = e.id
1847
+ ORDER BY f.created_at
1848
+ `).all();
1849
+ if (findings.length > 0) {
1850
+ sections.push("\n## Scout Findings");
1851
+ for (const f of findings) {
1852
+ sections.push(`- ${f.slug}: ${f.approach} (${f.source}) ${f.contradicts_current ? "[CONTRADICTS CURRENT]" : ""}`);
1853
+ }
1854
+ }
1855
+ const full = sections.join("\n");
1856
+ if (full.length > maxLength) {
1857
+ return full.slice(0, maxLength) + `
1858
+
1632
1859
  [TRUNCATED \u2014 full export was ${full.length} chars]`;
1633
1860
  }
1634
1861
  return full;
@@ -1805,1076 +2032,1238 @@ var init_status = __esm({
1805
2032
  }
1806
2033
  });
1807
2034
 
1808
- // src/metrics.ts
1809
- function compareMetrics(db, experimentId, config) {
1810
- const before = getMetricsByExperimentAndPhase(db, experimentId, "before");
1811
- const after = getMetricsByExperimentAndPhase(db, experimentId, "after");
1812
- const fixtures = new Set([...before, ...after].map((m) => m.fixture));
1813
- const trackedMetrics = Object.keys(config.metrics.tracked);
1814
- const comparisons = [];
1815
- for (const fixture of fixtures) {
1816
- for (const metric of trackedMetrics) {
1817
- const b = before.find((m) => m.fixture === fixture && m.metric_name === metric);
1818
- const a = after.find((m) => m.fixture === fixture && m.metric_name === metric);
1819
- if (b && a) {
1820
- const direction = config.metrics.tracked[metric]?.direction ?? "lower_is_better";
1821
- const regression = isRegression(b.metric_value, a.metric_value, direction);
1822
- comparisons.push({
1823
- fixture,
1824
- metric,
1825
- before: b.metric_value,
1826
- after: a.metric_value,
1827
- delta: a.metric_value - b.metric_value,
1828
- regression
1829
- });
1830
- }
1831
- }
1832
- }
1833
- return comparisons;
1834
- }
1835
- function isRegression(before, after, direction) {
1836
- switch (direction) {
1837
- case "lower_is_better":
1838
- return after > before;
1839
- case "higher_is_better":
1840
- return after < before;
1841
- case "closer_to_gt":
1842
- return false;
2035
+ // src/agents/types.ts
2036
+ function getExtractionSchema(role) {
2037
+ switch (role) {
2038
+ case "builder":
2039
+ return '{"decisions": [{"description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string"}]}';
2040
+ case "critic":
2041
+ return '{"doubts": [{"claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical"}]}';
2042
+ case "adversary":
2043
+ return '{"challenges": [{"description": "string", "reasoning": "string"}]}';
2044
+ case "verifier":
2045
+ return '{"grades": [{"component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string"}], "doubt_resolutions": [{"doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive"}]}';
2046
+ case "gatekeeper":
2047
+ return '{"gate_decision": "approve|reject|flag", "reason": "string", "stale_references": ["string"], "overlapping_dead_ends": [0]}';
2048
+ case "reframer":
2049
+ return '{"reframe": {"decomposition": "string", "divergences": ["string"], "recommendation": "string"}}';
2050
+ case "scout":
2051
+ return '{"findings": [{"approach": "string", "source": "string", "relevance": "string", "contradicts_current": true}]}';
2052
+ case "compressor":
2053
+ return '{"compression_report": {"synthesis_delta": "string", "new_dead_ends": ["string"], "fragility_changes": ["string"]}}';
2054
+ case "diagnostician":
2055
+ return '{"diagnosis": {"root_causes": ["string"], "patterns": ["string"], "evidence_gaps": ["string"], "investigation_directions": ["string"]}}';
1843
2056
  default:
1844
- return false;
1845
- }
1846
- }
1847
- function parseMetricsOutput(jsonStr) {
1848
- const data = JSON.parse(jsonStr);
1849
- const results = [];
1850
- if (data.fixtures && typeof data.fixtures === "object") {
1851
- for (const [fixture, metrics] of Object.entries(data.fixtures)) {
1852
- for (const [metricName, metricValue] of Object.entries(metrics)) {
1853
- if (typeof metricValue === "number") {
1854
- results.push({ fixture, metric_name: metricName, metric_value: metricValue });
1855
- }
1856
- }
1857
- }
2057
+ return EXTRACTION_SCHEMA;
1858
2058
  }
1859
- return results;
1860
2059
  }
1861
- var init_metrics = __esm({
1862
- "src/metrics.ts"() {
2060
+ var EXTRACTION_SCHEMA, ROLE_REQUIRED_FIELDS;
2061
+ var init_types = __esm({
2062
+ "src/agents/types.ts"() {
1863
2063
  "use strict";
1864
- init_queries();
2064
+ EXTRACTION_SCHEMA = `{
2065
+ "decisions": [{ "description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string" }],
2066
+ "grades": [{ "component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string" }],
2067
+ "doubts": [{ "claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical" }],
2068
+ "guidance": "string (actionable builder guidance)",
2069
+ "doubt_resolutions": [{ "doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive" }]
2070
+ }`;
2071
+ ROLE_REQUIRED_FIELDS = {
2072
+ builder: ["decisions"],
2073
+ critic: ["doubts"],
2074
+ adversary: ["challenges"],
2075
+ verifier: ["grades"],
2076
+ gatekeeper: ["gate_decision"],
2077
+ reframer: ["reframe"],
2078
+ scout: ["findings"],
2079
+ compressor: ["compression_report"],
2080
+ diagnostician: ["diagnosis"]
2081
+ };
1865
2082
  }
1866
2083
  });
1867
2084
 
1868
- // src/commands/measure.ts
1869
- var measure_exports = {};
1870
- __export(measure_exports, {
1871
- baseline: () => baseline,
1872
- compare: () => compare,
1873
- measure: () => measure
1874
- });
1875
- async function baseline(args) {
1876
- await captureMetrics("before", args);
1877
- }
1878
- async function measure(args) {
1879
- await captureMetrics("after", args);
1880
- }
1881
- async function captureMetrics(phase, args) {
1882
- const root = findProjectRoot();
1883
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
1884
- const db = getDb(root);
1885
- const config = loadConfig(root);
1886
- const expIdStr = getFlagValue(args, "--experiment");
1887
- let exp;
1888
- if (expIdStr !== void 0) {
1889
- exp = getExperimentById(db, Number(expIdStr));
2085
+ // src/agents/parse.ts
2086
+ async function extractStructuredData(role, markdown) {
2087
+ const tier1 = extractMajlisJsonBlock(markdown);
2088
+ if (tier1) {
2089
+ const parsed = tryParseJson(tier1);
2090
+ if (parsed) return parsed;
2091
+ console.warn(`[majlis] Malformed JSON in <!-- majlis-json --> block for ${role}. Falling back.`);
1890
2092
  } else {
1891
- exp = getLatestExperiment(db);
1892
- }
1893
- if (!exp) throw new Error('No active experiment. Run `majlis new "hypothesis"` first.');
1894
- if (config.build.pre_measure) {
1895
- info(`Running pre-measure: ${config.build.pre_measure}`);
1896
- try {
1897
- (0, import_node_child_process.execSync)(config.build.pre_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
1898
- } catch {
1899
- warn("Pre-measure command failed \u2014 continuing anyway.");
1900
- }
2093
+ console.warn(`[majlis] No <!-- majlis-json --> block found in ${role} output. Falling back.`);
1901
2094
  }
1902
- if (!config.metrics.command) {
1903
- throw new Error("No metrics.command configured in .majlis/config.json");
2095
+ const tier2 = extractViaPatterns(role, markdown);
2096
+ if (tier2 && hasData(tier2)) {
2097
+ console.warn(`[majlis] Used regex fallback for ${role}. Review extracted data.`);
2098
+ return tier2;
1904
2099
  }
1905
- info(`Running metrics: ${config.metrics.command}`);
1906
- let metricsOutput;
2100
+ console.warn(`[majlis] Regex fallback insufficient for ${role}. Using Haiku extraction.`);
2101
+ const tier3 = await extractViaHaiku(role, markdown);
2102
+ if (tier3) return tier3;
2103
+ console.error(
2104
+ `[majlis] FAILED to extract structured data from ${role} output. State machine will continue but data is missing. Manual review required.`
2105
+ );
2106
+ return null;
2107
+ }
2108
+ function extractMajlisJsonBlock(markdown) {
2109
+ const match = markdown.match(/<!--\s*majlis-json\s*\n([\s\S]*?)-->/);
2110
+ if (!match) return null;
2111
+ return match[1].trim();
2112
+ }
2113
+ function tryParseJson(jsonStr) {
1907
2114
  try {
1908
- metricsOutput = (0, import_node_child_process.execSync)(config.metrics.command, {
1909
- cwd: root,
1910
- encoding: "utf-8",
1911
- stdio: ["pipe", "pipe", "pipe"]
2115
+ return JSON.parse(jsonStr);
2116
+ } catch {
2117
+ return null;
2118
+ }
2119
+ }
2120
+ function extractViaPatterns(role, markdown) {
2121
+ const result = {};
2122
+ const decisionPattern = /\[(?:decision|Decision)\].*?(?:description|Description):\s*(.+?)(?:\n|$).*?(?:evidence.?level|Evidence.?Level|level):\s*(proof|test|strong_consensus|consensus|analogy|judgment).*?(?:justification|Justification):\s*(.+?)(?:\n|$)/gis;
2123
+ const decisions = [];
2124
+ const evidenceMarkers = /(?:^|\n)\s*[-*]\s*\*?\*?(?:Decision|DECISION)\*?\*?:\s*(.+?)(?:\n|$).*?(?:Evidence|EVIDENCE|Level):\s*(proof|test|strong_consensus|consensus|analogy|judgment)/gim;
2125
+ let match;
2126
+ while ((match = evidenceMarkers.exec(markdown)) !== null) {
2127
+ decisions.push({
2128
+ description: match[1].trim(),
2129
+ evidence_level: match[2].toLowerCase().trim(),
2130
+ justification: "Extracted via regex \u2014 review"
1912
2131
  });
1913
- } catch (err) {
1914
- throw new Error(`Metrics command failed: ${err instanceof Error ? err.message : String(err)}`);
1915
2132
  }
1916
- const parsed = parseMetricsOutput(metricsOutput);
1917
- if (parsed.length === 0) {
1918
- warn("Metrics command returned no data.");
1919
- return;
2133
+ const inlineTagPattern = /\[(proof|test|strong_consensus|consensus|analogy|judgment)\]\s*(.+?)(?:\n|$)/gi;
2134
+ while ((match = inlineTagPattern.exec(markdown)) !== null) {
2135
+ const desc = match[2].trim();
2136
+ if (!decisions.some((d) => d.description === desc)) {
2137
+ decisions.push({
2138
+ description: desc,
2139
+ evidence_level: match[1].toLowerCase(),
2140
+ justification: "Extracted via regex \u2014 review"
2141
+ });
2142
+ }
1920
2143
  }
1921
- for (const m of parsed) {
1922
- insertMetric(db, exp.id, phase, m.fixture, m.metric_name, m.metric_value);
2144
+ if (decisions.length > 0) result.decisions = decisions;
2145
+ const grades = [];
2146
+ const gradePattern = /(?:^|\n)\s*[-*]?\s*\*?\*?(?:Grade|GRADE|Component)\*?\*?.*?(?:component|Component)?\s*[:=]\s*(.+?)(?:\n|,).*?(?:grade|Grade)\s*[:=]\s*(sound|good|weak|rejected)/gim;
2147
+ while ((match = gradePattern.exec(markdown)) !== null) {
2148
+ grades.push({
2149
+ component: match[1].trim(),
2150
+ grade: match[2].toLowerCase().trim()
2151
+ });
1923
2152
  }
1924
- success(`Captured ${parsed.length} metric(s) for ${exp.slug} (phase: ${phase})`);
1925
- if (config.build.post_measure) {
1926
- try {
1927
- (0, import_node_child_process.execSync)(config.build.post_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
1928
- } catch {
1929
- warn("Post-measure command failed.");
2153
+ const simpleGradePattern = /(?:^|\n)\s*[-*]\s*\*?\*?(.+?)\*?\*?\s*[:—–-]\s*\*?\*?(sound|good|weak|rejected)\*?\*?/gim;
2154
+ while ((match = simpleGradePattern.exec(markdown)) !== null) {
2155
+ const comp = match[1].trim();
2156
+ if (!grades.some((g) => g.component === comp)) {
2157
+ grades.push({
2158
+ component: comp,
2159
+ grade: match[2].toLowerCase().trim()
2160
+ });
1930
2161
  }
1931
2162
  }
1932
- }
1933
- async function compare(args, isJson) {
1934
- const root = findProjectRoot();
1935
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
1936
- const db = getDb(root);
1937
- const config = loadConfig(root);
1938
- const expIdStr = getFlagValue(args, "--experiment");
1939
- let exp;
1940
- if (expIdStr !== void 0) {
1941
- exp = getExperimentById(db, Number(expIdStr));
1942
- } else {
1943
- exp = getLatestExperiment(db);
1944
- }
1945
- if (!exp) throw new Error("No active experiment.");
1946
- const comparisons = compareMetrics(db, exp.id, config);
1947
- if (comparisons.length === 0) {
1948
- warn(`No before/after metrics to compare for ${exp.slug}. Run baseline and measure first.`);
1949
- return;
1950
- }
1951
- if (isJson) {
1952
- console.log(JSON.stringify({ experiment: exp.slug, comparisons }, null, 2));
1953
- return;
2163
+ if (grades.length > 0) result.grades = grades;
2164
+ const doubts = [];
2165
+ const doubtPattern = /(?:Doubt|DOUBT|Claim doubted|CLAIM)\s*(?:\d+)?[:.]?\s*(.+?)(?:\n|$)[\s\S]*?(?:Severity|SEVERITY)\s*[:=]\s*(minor|moderate|critical)/gim;
2166
+ while ((match = doubtPattern.exec(markdown)) !== null) {
2167
+ doubts.push({
2168
+ claim_doubted: match[1].trim(),
2169
+ evidence_level_of_claim: "unknown",
2170
+ // Don't fabricate — mark as unknown for review
2171
+ evidence_for_doubt: "Extracted via regex \u2014 review original document",
2172
+ severity: match[2].toLowerCase().trim()
2173
+ });
1954
2174
  }
1955
- header(`Metric Comparison \u2014 ${exp.slug}`);
1956
- const regressions = comparisons.filter((c) => c.regression);
1957
- const rows = comparisons.map((c) => [
1958
- c.fixture,
1959
- c.metric,
1960
- String(c.before),
1961
- String(c.after),
1962
- formatDelta(c.delta),
1963
- c.regression ? red("REGRESSION") : green("OK")
1964
- ]);
1965
- console.log(table(["Fixture", "Metric", "Before", "After", "Delta", "Status"], rows));
1966
- if (regressions.length > 0) {
1967
- console.log();
1968
- warn(`${regressions.length} regression(s) detected!`);
1969
- } else {
1970
- console.log();
1971
- success("No regressions detected.");
2175
+ if (doubts.length > 0) result.doubts = doubts;
2176
+ return result;
2177
+ }
2178
+ async function extractViaHaiku(role, markdown) {
2179
+ try {
2180
+ const truncated = markdown.length > 8e3 ? markdown.slice(0, 8e3) + "\n[truncated]" : markdown;
2181
+ const schema = getExtractionSchema(role);
2182
+ const prompt = `Extract structured data from this ${role} document as JSON. Follow this schema exactly: ${schema}
2183
+
2184
+ Document:
2185
+ ${truncated}`;
2186
+ const conversation = (0, import_claude_agent_sdk.query)({
2187
+ prompt,
2188
+ options: {
2189
+ model: "haiku",
2190
+ tools: [],
2191
+ systemPrompt: "You are a JSON extraction assistant. Output only valid JSON matching the requested schema. No markdown, no explanation, just JSON.",
2192
+ permissionMode: "bypassPermissions",
2193
+ allowDangerouslySkipPermissions: true,
2194
+ maxTurns: 1,
2195
+ persistSession: false
2196
+ }
2197
+ });
2198
+ let resultText = "";
2199
+ for await (const message of conversation) {
2200
+ if (message.type === "assistant") {
2201
+ for (const block of message.message.content) {
2202
+ if (block.type === "text") {
2203
+ resultText += block.text;
2204
+ }
2205
+ }
2206
+ }
2207
+ }
2208
+ return tryParseJson(resultText.trim());
2209
+ } catch (err) {
2210
+ console.warn(`[majlis] Haiku extraction failed for ${role}: ${err instanceof Error ? err.message : String(err)}`);
2211
+ return null;
1972
2212
  }
1973
2213
  }
1974
- function formatDelta(delta) {
1975
- const prefix = delta > 0 ? "+" : "";
1976
- return `${prefix}${delta.toFixed(4)}`;
2214
+ function hasData(output) {
2215
+ return !!(output.decisions && output.decisions.length > 0 || output.grades && output.grades.length > 0 || output.doubts && output.doubts.length > 0 || output.challenges && output.challenges.length > 0 || output.findings && output.findings.length > 0 || output.guidance || output.reframe || output.compression_report || output.gate_decision || output.diagnosis);
1977
2216
  }
1978
- var import_node_child_process;
1979
- var init_measure = __esm({
1980
- "src/commands/measure.ts"() {
2217
+ function validateForRole(role, output) {
2218
+ const required = ROLE_REQUIRED_FIELDS[role];
2219
+ if (!required) return { valid: true, missing: [] };
2220
+ const missing = required.filter((field) => {
2221
+ const value = output[field];
2222
+ if (value === void 0 || value === null) return true;
2223
+ if (Array.isArray(value) && value.length === 0) return true;
2224
+ return false;
2225
+ });
2226
+ return { valid: missing.length === 0, missing };
2227
+ }
2228
+ var import_claude_agent_sdk;
2229
+ var init_parse = __esm({
2230
+ "src/agents/parse.ts"() {
1981
2231
  "use strict";
1982
- import_node_child_process = require("child_process");
1983
- init_connection();
1984
- init_queries();
1985
- init_metrics();
1986
- init_config();
1987
- init_format();
2232
+ init_types();
2233
+ import_claude_agent_sdk = require("@anthropic-ai/claude-agent-sdk");
1988
2234
  }
1989
2235
  });
1990
2236
 
1991
- // src/commands/experiment.ts
1992
- var experiment_exports = {};
1993
- __export(experiment_exports, {
1994
- newExperiment: () => newExperiment,
1995
- revert: () => revert
1996
- });
1997
- async function newExperiment(args) {
1998
- const root = findProjectRoot();
1999
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2000
- const hypothesis = args.filter((a) => !a.startsWith("--")).join(" ");
2001
- if (!hypothesis) {
2002
- throw new Error('Usage: majlis new "hypothesis"');
2003
- }
2004
- const db = getDb(root);
2005
- const config = loadConfig(root);
2006
- const slug = slugify(hypothesis);
2007
- if (getExperimentBySlug(db, slug)) {
2008
- throw new Error(`Experiment with slug "${slug}" already exists.`);
2237
+ // src/agents/spawn.ts
2238
+ function loadAgentDefinition(role, projectRoot) {
2239
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2240
+ const filePath = path4.join(root, ".majlis", "agents", `${role}.md`);
2241
+ if (!fs4.existsSync(filePath)) {
2242
+ throw new Error(`Agent definition not found: ${filePath}`);
2009
2243
  }
2010
- const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
2011
- const num = allExps.count + 1;
2012
- const paddedNum = String(num).padStart(3, "0");
2013
- const branch = `exp/${paddedNum}-${slug}`;
2014
- try {
2015
- (0, import_node_child_process2.execSync)(`git checkout -b ${branch}`, {
2016
- cwd: root,
2017
- encoding: "utf-8",
2018
- stdio: ["pipe", "pipe", "pipe"]
2019
- });
2020
- info(`Created branch: ${branch}`);
2021
- } catch (err) {
2022
- warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
2244
+ const content = fs4.readFileSync(filePath, "utf-8");
2245
+ const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
2246
+ if (!frontmatterMatch) {
2247
+ throw new Error(`Invalid agent definition (missing YAML frontmatter): ${filePath}`);
2023
2248
  }
2024
- const subType = getFlagValue(args, "--sub-type") ?? null;
2025
- const exp = createExperiment(db, slug, branch, hypothesis, subType, null);
2026
- success(`Created experiment #${exp.id}: ${exp.slug}`);
2027
- const docsDir = path4.join(root, "docs", "experiments");
2028
- const templatePath = path4.join(docsDir, "_TEMPLATE.md");
2029
- if (fs4.existsSync(templatePath)) {
2030
- const template = fs4.readFileSync(templatePath, "utf-8");
2031
- const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, subType ?? "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
2032
- const logPath = path4.join(docsDir, `${paddedNum}-${slug}.md`);
2033
- fs4.writeFileSync(logPath, logContent);
2034
- info(`Created experiment log: docs/experiments/${paddedNum}-${slug}.md`);
2249
+ const frontmatter = frontmatterMatch[1];
2250
+ const body = frontmatterMatch[2].trim();
2251
+ const name = extractYamlField(frontmatter, "name") ?? role;
2252
+ const model = extractYamlField(frontmatter, "model") ?? "opus";
2253
+ const toolsStr = extractYamlField(frontmatter, "tools") ?? "[]";
2254
+ const tools = toolsStr.replace(/[\[\]]/g, "").split(",").map((t) => t.trim()).filter(Boolean);
2255
+ return { name, model, tools, systemPrompt: body };
2256
+ }
2257
+ function buildCheckpointMessage(role, toolUseCount, maxTurns) {
2258
+ const approxTurn = Math.round(toolUseCount / 2);
2259
+ const header2 = `[MAJLIS CHECKPOINT \u2014 ~${approxTurn} of ${maxTurns} turns used]`;
2260
+ switch (role) {
2261
+ case "builder":
2262
+ return `${header2}
2263
+ Reminder: ONE code change per cycle.
2264
+ - Have you run the benchmark? YES \u2192 document results + output JSON + STOP.
2265
+ - If NO \u2192 run it now, then wrap up.
2266
+ Do NOT start a second change or investigate unrelated failures.`;
2267
+ case "verifier":
2268
+ return `${header2}
2269
+ AT MOST 3 diagnostic scripts total.
2270
+ - If \u22653 scripts run \u2192 produce grades + output JSON now.
2271
+ - Trust framework metrics. Do not re-derive from raw data.`;
2272
+ case "critic":
2273
+ return `${header2}
2274
+ Focus on the SINGLE weakest assumption.
2275
+ - Have you identified the core doubt? YES \u2192 write it up + output JSON.
2276
+ - Do not enumerate every possible concern \u2014 pick the most dangerous one.`;
2277
+ case "adversary":
2278
+ return `${header2}
2279
+ Design ONE targeted challenge, not a test suite.
2280
+ - Have you defined the challenge? YES \u2192 write it up + output JSON.
2281
+ - Focus on what would DISPROVE the hypothesis, not general testing.`;
2282
+ case "compressor":
2283
+ return `${header2}
2284
+ You may ONLY write to docs/synthesis/.
2285
+ - Have you updated current.md, fragility.md, dead-ends.md?
2286
+ - If yes \u2192 output compression report JSON.
2287
+ - Do NOT write to MEMORY.md or files outside docs/synthesis/.`;
2288
+ case "diagnostician":
2289
+ return `${header2}
2290
+ You are READ-ONLY for project code. Write ONLY to .majlis/scripts/.
2291
+ Focus on diagnosis, not fixing. Your value is insight, not implementation.
2292
+ Phase 1 (1-10): orientation. Phase 2 (11-40): deep investigation. Phase 3 (41-60): synthesis.
2293
+ If you are past turn 40, begin compiling your diagnostic report.`;
2294
+ default:
2295
+ return `${header2}
2296
+ Check: is your core task done? If yes, wrap up and output JSON.`;
2297
+ }
2298
+ }
2299
+ function buildPreToolUseGuards(role) {
2300
+ if (role === "compressor") {
2301
+ const guardHook = async (input) => {
2302
+ const toolInput = input.tool_input ?? {};
2303
+ const filePath = toolInput.file_path ?? "";
2304
+ if (filePath && !filePath.includes("/docs/synthesis/")) {
2305
+ return {
2306
+ decision: "block",
2307
+ reason: `Compressor may only write to docs/synthesis/. Blocked: ${filePath}`
2308
+ };
2309
+ }
2310
+ return {};
2311
+ };
2312
+ return [
2313
+ { matcher: "Write", hooks: [guardHook] },
2314
+ { matcher: "Edit", hooks: [guardHook] }
2315
+ ];
2035
2316
  }
2036
- if (config.cycle.auto_baseline_on_new_experiment && config.metrics.command) {
2037
- info("Auto-baselining... (run `majlis baseline` to do this manually)");
2038
- try {
2039
- const { baseline: baseline2 } = await Promise.resolve().then(() => (init_measure(), measure_exports));
2040
- await baseline2(["--experiment", String(exp.id)]);
2041
- } catch (err) {
2042
- warn("Auto-baseline failed \u2014 run `majlis baseline` manually.");
2043
- }
2317
+ if (role === "diagnostician") {
2318
+ const writeGuard = async (input) => {
2319
+ const toolInput = input.tool_input ?? {};
2320
+ const filePath = toolInput.file_path ?? "";
2321
+ if (filePath && !filePath.includes("/.majlis/scripts/")) {
2322
+ return {
2323
+ decision: "block",
2324
+ reason: `Diagnostician may only write to .majlis/scripts/. Blocked: ${filePath}`
2325
+ };
2326
+ }
2327
+ return {};
2328
+ };
2329
+ const bashGuard = async (input) => {
2330
+ const toolInput = input.tool_input ?? {};
2331
+ const command = toolInput.command ?? "";
2332
+ const destructive = /\b(rm\s+-rf|git\s+(checkout|reset|stash|clean|push)|chmod|chown|mkfs|dd\s+if=)\b/i;
2333
+ if (destructive.test(command)) {
2334
+ return {
2335
+ decision: "block",
2336
+ reason: `Diagnostician blocked destructive command: ${command.slice(0, 100)}`
2337
+ };
2338
+ }
2339
+ return {};
2340
+ };
2341
+ return [
2342
+ { matcher: "Write", hooks: [writeGuard] },
2343
+ { matcher: "Edit", hooks: [writeGuard] },
2344
+ { matcher: "Bash", hooks: [bashGuard] }
2345
+ ];
2044
2346
  }
2347
+ return void 0;
2045
2348
  }
2046
- async function revert(args) {
2047
- const root = findProjectRoot();
2048
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2049
- const db = getDb(root);
2050
- let exp;
2051
- const slugArg = args.filter((a) => !a.startsWith("--"))[0];
2052
- if (slugArg) {
2053
- exp = getExperimentBySlug(db, slugArg);
2054
- if (!exp) throw new Error(`Experiment not found: ${slugArg}`);
2055
- } else {
2056
- exp = getLatestExperiment(db);
2057
- if (!exp) throw new Error("No active experiments to revert.");
2349
+ function buildAgentHooks(role, maxTurns) {
2350
+ const result = {};
2351
+ let hasHooks = false;
2352
+ const interval = CHECKPOINT_INTERVAL[role];
2353
+ if (interval) {
2354
+ let toolUseCount = 0;
2355
+ const checkpointHook = async () => {
2356
+ toolUseCount++;
2357
+ if (toolUseCount % interval === 0) {
2358
+ const msg = buildCheckpointMessage(role, toolUseCount, maxTurns);
2359
+ return {
2360
+ hookSpecificOutput: {
2361
+ hookEventName: "PostToolUse",
2362
+ additionalContext: msg
2363
+ }
2364
+ };
2365
+ }
2366
+ return {};
2367
+ };
2368
+ result.PostToolUse = [{ hooks: [checkpointHook] }];
2369
+ hasHooks = true;
2058
2370
  }
2059
- const reason = getFlagValue(args, "--reason") ?? "Manually reverted";
2060
- const category = args.includes("--structural") ? "structural" : "procedural";
2061
- insertDeadEnd(
2062
- db,
2063
- exp.id,
2064
- exp.hypothesis ?? exp.slug,
2065
- reason,
2066
- `Reverted: ${reason}`,
2067
- exp.sub_type,
2068
- category
2069
- );
2070
- updateExperimentStatus(db, exp.id, "dead_end");
2071
- try {
2072
- const currentBranch = (0, import_node_child_process2.execSync)("git rev-parse --abbrev-ref HEAD", {
2073
- cwd: root,
2074
- encoding: "utf-8"
2075
- }).trim();
2076
- if (currentBranch === exp.branch) {
2077
- (0, import_node_child_process2.execSync)("git checkout main 2>/dev/null || git checkout master", {
2078
- cwd: root,
2079
- encoding: "utf-8",
2080
- stdio: ["pipe", "pipe", "pipe"]
2081
- });
2082
- }
2083
- } catch {
2084
- warn("Could not switch git branches \u2014 do this manually.");
2371
+ const guards = buildPreToolUseGuards(role);
2372
+ if (guards) {
2373
+ result.PreToolUse = guards;
2374
+ hasHooks = true;
2085
2375
  }
2086
- info(`Experiment ${exp.slug} reverted to dead-end. Reason: ${reason}`);
2376
+ return hasHooks ? result : void 0;
2087
2377
  }
2088
- function slugify(text) {
2089
- return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
2090
- }
2091
- var fs4, path4, import_node_child_process2;
2092
- var init_experiment = __esm({
2093
- "src/commands/experiment.ts"() {
2378
+ function extractYamlField(yaml, field) {
2379
+ const match = yaml.match(new RegExp(`^${field}:\\s*(.+)$`, "m"));
2380
+ return match ? match[1].trim() : null;
2381
+ }
2382
+ async function spawnAgent(role, context, projectRoot) {
2383
+ const agentDef = loadAgentDefinition(role, projectRoot);
2384
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2385
+ const taskPrompt = context.taskPrompt ?? `Perform your role as ${agentDef.name}.`;
2386
+ const contextJson = JSON.stringify(context);
2387
+ const prompt = `Here is your context:
2388
+
2389
+ \`\`\`json
2390
+ ${contextJson}
2391
+ \`\`\`
2392
+
2393
+ ${taskPrompt}`;
2394
+ const turns = ROLE_MAX_TURNS[role] ?? 15;
2395
+ console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
2396
+ const { text: markdown, costUsd, truncated } = await runQuery({
2397
+ prompt,
2398
+ model: agentDef.model,
2399
+ tools: agentDef.tools,
2400
+ systemPrompt: agentDef.systemPrompt,
2401
+ cwd: root,
2402
+ maxTurns: turns,
2403
+ label: role,
2404
+ role
2405
+ });
2406
+ console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
2407
+ const artifactPath = writeArtifact(role, context, markdown, root);
2408
+ if (artifactPath) {
2409
+ console.log(`[${role}] Artifact written to ${artifactPath}`);
2410
+ }
2411
+ const structured = await extractStructuredData(role, markdown);
2412
+ if (structured) {
2413
+ const { valid, missing } = validateForRole(role, structured);
2414
+ if (!valid) {
2415
+ console.warn(`[${role}] Output missing expected fields: ${missing.join(", ")}`);
2416
+ }
2417
+ }
2418
+ return { output: markdown, structured, truncated };
2419
+ }
2420
+ async function spawnSynthesiser(context, projectRoot) {
2421
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2422
+ const contextJson = JSON.stringify(context);
2423
+ const taskPrompt = context.taskPrompt ?? "Synthesise the findings into actionable builder guidance.";
2424
+ const prompt = `Here is your context:
2425
+
2426
+ \`\`\`json
2427
+ ${contextJson}
2428
+ \`\`\`
2429
+
2430
+ ${taskPrompt}`;
2431
+ const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2432
+ console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
2433
+ const { text: markdown, costUsd, truncated } = await runQuery({
2434
+ prompt,
2435
+ model: "sonnet",
2436
+ tools: ["Read", "Glob", "Grep"],
2437
+ systemPrompt,
2438
+ cwd: root,
2439
+ maxTurns: 5,
2440
+ label: "synthesiser",
2441
+ role: "synthesiser"
2442
+ });
2443
+ console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
2444
+ return { output: markdown, structured: { guidance: markdown }, truncated };
2445
+ }
2446
+ async function spawnRecovery(role, partialOutput, context, projectRoot) {
2447
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2448
+ const expSlug = context.experiment?.slug ?? "unknown";
2449
+ console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
2450
+ const expDocPath = path4.join(
2451
+ root,
2452
+ "docs",
2453
+ "experiments",
2454
+ `${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
2455
+ );
2456
+ const templatePath = path4.join(root, "docs", "experiments", "_TEMPLATE.md");
2457
+ const template = fs4.existsSync(templatePath) ? fs4.readFileSync(templatePath, "utf-8") : "";
2458
+ const currentDoc = fs4.existsSync(expDocPath) ? fs4.readFileSync(expDocPath, "utf-8") : "";
2459
+ const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
2460
+
2461
+ Here is the partial agent output (reasoning + tool calls):
2462
+ <partial_output>
2463
+ ${partialOutput.slice(-3e3)}
2464
+ </partial_output>
2465
+
2466
+ Here is the current experiment doc:
2467
+ <current_doc>
2468
+ ${currentDoc}
2469
+ </current_doc>
2470
+
2471
+ Here is the template that the experiment doc should follow:
2472
+ <template>
2473
+ ${template}
2474
+ </template>
2475
+
2476
+ Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
2477
+ - Keep any valid content from the current doc
2478
+ - Fill in what you can infer from the partial output
2479
+ - Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
2480
+ - The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
2481
+ - Do NOT include agent reasoning or thinking \u2014 only structured experiment content
2482
+ - Be concise. This is cleanup, not new work.`;
2483
+ const { text: _markdown } = await runQuery({
2484
+ prompt,
2485
+ model: "haiku",
2486
+ tools: ["Read", "Write"],
2487
+ systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
2488
+ cwd: root,
2489
+ maxTurns: 5,
2490
+ label: "recovery",
2491
+ role: "recovery"
2492
+ });
2493
+ console.log(`[recovery] Cleanup complete for ${expSlug}.`);
2494
+ }
2495
+ async function runQuery(opts) {
2496
+ let truncated = false;
2497
+ const tag = opts.label ?? "majlis";
2498
+ const hooks = opts.role ? buildAgentHooks(opts.role, opts.maxTurns ?? 15) : void 0;
2499
+ const conversation = (0, import_claude_agent_sdk2.query)({
2500
+ prompt: opts.prompt,
2501
+ options: {
2502
+ model: opts.model,
2503
+ tools: opts.tools,
2504
+ systemPrompt: {
2505
+ type: "preset",
2506
+ preset: "claude_code",
2507
+ append: opts.systemPrompt
2508
+ },
2509
+ cwd: opts.cwd,
2510
+ permissionMode: "bypassPermissions",
2511
+ allowDangerouslySkipPermissions: true,
2512
+ maxTurns: opts.maxTurns ?? 15,
2513
+ persistSession: false,
2514
+ settingSources: ["project"],
2515
+ hooks
2516
+ }
2517
+ });
2518
+ const textParts = [];
2519
+ let costUsd = 0;
2520
+ let turnCount = 0;
2521
+ for await (const message of conversation) {
2522
+ if (message.type === "assistant") {
2523
+ turnCount++;
2524
+ let hasText = false;
2525
+ for (const block of message.message.content) {
2526
+ if (block.type === "text") {
2527
+ textParts.push(block.text);
2528
+ hasText = true;
2529
+ } else if (block.type === "tool_use") {
2530
+ const toolName = block.name ?? "tool";
2531
+ const input = block.input ?? {};
2532
+ const detail = formatToolDetail(toolName, input);
2533
+ process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
2534
+ `);
2535
+ }
2536
+ }
2537
+ if (hasText) {
2538
+ const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
2539
+ if (preview) {
2540
+ process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
2541
+ `);
2542
+ }
2543
+ }
2544
+ } else if (message.type === "tool_progress") {
2545
+ const elapsed = Math.round(message.elapsed_time_seconds);
2546
+ if (elapsed > 0 && elapsed % 5 === 0) {
2547
+ process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
2548
+ `);
2549
+ }
2550
+ } else if (message.type === "result") {
2551
+ if (message.subtype === "success") {
2552
+ costUsd = message.total_cost_usd;
2553
+ } else if (message.subtype === "error_max_turns") {
2554
+ truncated = true;
2555
+ costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
2556
+ console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
2557
+ } else {
2558
+ const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
2559
+ throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
2560
+ }
2561
+ }
2562
+ }
2563
+ return { text: textParts.join("\n\n"), costUsd, truncated };
2564
+ }
2565
+ async function generateSlug(hypothesis, projectRoot) {
2566
+ const fallback = hypothesis.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 30).replace(/-$/, "");
2567
+ try {
2568
+ const { text } = await runQuery({
2569
+ prompt: `Generate a short, descriptive git branch slug (2-4 words, lowercase, hyphen-separated) for this experiment hypothesis:
2570
+
2571
+ "${hypothesis.slice(0, 500)}"
2572
+
2573
+ Output ONLY the slug, nothing else. Examples: uv-containment-filter, skip-degenerate-faces, fix-edge-sewing-order`,
2574
+ model: "haiku",
2575
+ tools: [],
2576
+ systemPrompt: "Output only a short hyphenated slug. No explanation, no quotes, no punctuation except hyphens.",
2577
+ cwd: projectRoot,
2578
+ maxTurns: 1,
2579
+ label: "slug",
2580
+ role: "slug"
2581
+ });
2582
+ const slug = text.trim().toLowerCase().replace(/[^a-z0-9-]+/g, "").replace(/^-|-$/g, "").slice(0, 40);
2583
+ return slug.length >= 3 ? slug : fallback;
2584
+ } catch {
2585
+ return fallback;
2586
+ }
2587
+ }
2588
+ function formatToolDetail(toolName, input) {
2589
+ switch (toolName) {
2590
+ case "Read":
2591
+ return input.file_path ? ` ${input.file_path}` : "";
2592
+ case "Write":
2593
+ return input.file_path ? ` \u2192 ${input.file_path}` : "";
2594
+ case "Edit":
2595
+ return input.file_path ? ` ${input.file_path}` : "";
2596
+ case "Glob":
2597
+ return input.pattern ? ` ${input.pattern}` : "";
2598
+ case "Grep":
2599
+ return input.pattern ? ` /${input.pattern}/` : "";
2600
+ case "Bash":
2601
+ return input.command ? ` $ ${input.command.slice(0, 80)}` : "";
2602
+ case "WebSearch":
2603
+ return input.query ? ` "${input.query}"` : "";
2604
+ default:
2605
+ return "";
2606
+ }
2607
+ }
2608
+ function writeArtifact(role, context, markdown, projectRoot) {
2609
+ const dirMap = {
2610
+ builder: "docs/experiments",
2611
+ critic: "docs/doubts",
2612
+ adversary: "docs/challenges",
2613
+ verifier: "docs/verification",
2614
+ reframer: "docs/reframes",
2615
+ compressor: "docs/synthesis",
2616
+ scout: "docs/rihla"
2617
+ };
2618
+ const dir = dirMap[role];
2619
+ if (!dir) return null;
2620
+ if (role === "builder" || role === "compressor" || role === "diagnostician") return null;
2621
+ const fullDir = path4.join(projectRoot, dir);
2622
+ if (!fs4.existsSync(fullDir)) {
2623
+ fs4.mkdirSync(fullDir, { recursive: true });
2624
+ }
2625
+ const expSlug = context.experiment?.slug ?? "general";
2626
+ const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
2627
+ const filename = `${nextNum}-${role}-${expSlug}.md`;
2628
+ const target = path4.join(fullDir, filename);
2629
+ fs4.writeFileSync(target, markdown);
2630
+ return target;
2631
+ }
2632
+ var fs4, path4, import_claude_agent_sdk2, ROLE_MAX_TURNS, CHECKPOINT_INTERVAL, DIM2, RESET2, CYAN2;
2633
+ var init_spawn = __esm({
2634
+ "src/agents/spawn.ts"() {
2094
2635
  "use strict";
2095
2636
  fs4 = __toESM(require("fs"));
2096
2637
  path4 = __toESM(require("path"));
2097
- import_node_child_process2 = require("child_process");
2638
+ import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
2639
+ init_parse();
2098
2640
  init_connection();
2099
- init_queries();
2100
- init_config();
2101
- init_format();
2641
+ ROLE_MAX_TURNS = {
2642
+ builder: 50,
2643
+ critic: 30,
2644
+ adversary: 30,
2645
+ verifier: 50,
2646
+ compressor: 30,
2647
+ reframer: 20,
2648
+ scout: 20,
2649
+ gatekeeper: 10,
2650
+ diagnostician: 60
2651
+ };
2652
+ CHECKPOINT_INTERVAL = {
2653
+ builder: 15,
2654
+ verifier: 12,
2655
+ critic: 15,
2656
+ adversary: 15,
2657
+ compressor: 15,
2658
+ diagnostician: 20
2659
+ };
2660
+ DIM2 = "\x1B[2m";
2661
+ RESET2 = "\x1B[0m";
2662
+ CYAN2 = "\x1B[36m";
2102
2663
  }
2103
2664
  });
2104
2665
 
2105
- // src/commands/session.ts
2106
- var session_exports = {};
2107
- __export(session_exports, {
2108
- session: () => session
2109
- });
2110
- async function session(args) {
2111
- const subcommand = args[0];
2112
- if (!subcommand || subcommand !== "start" && subcommand !== "end") {
2113
- throw new Error('Usage: majlis session start "intent" | majlis session end');
2114
- }
2115
- const root = findProjectRoot();
2116
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2117
- const db = getDb(root);
2118
- if (subcommand === "start") {
2119
- const intent = args.slice(1).filter((a) => !a.startsWith("--")).join(" ");
2120
- if (!intent) {
2121
- throw new Error('Usage: majlis session start "intent"');
2122
- }
2123
- const existing = getActiveSession(db);
2124
- if (existing) {
2125
- warn(`Session already active: "${existing.intent}" (started ${existing.started_at})`);
2126
- warn("End it first with `majlis session end`.");
2127
- return;
2128
- }
2129
- const latestExp = getLatestExperiment(db);
2130
- const sess = startSession(db, intent, latestExp?.id ?? null);
2131
- success(`Session started: "${intent}" (id: ${sess.id})`);
2132
- if (latestExp) {
2133
- info(`Linked to experiment: ${latestExp.slug} (${latestExp.status})`);
2666
+ // src/metrics.ts
2667
+ function compareMetrics(db, experimentId, config) {
2668
+ const before = getMetricsByExperimentAndPhase(db, experimentId, "before");
2669
+ const after = getMetricsByExperimentAndPhase(db, experimentId, "after");
2670
+ const fixtures = new Set([...before, ...after].map((m) => m.fixture));
2671
+ const trackedMetrics = Object.keys(config.metrics.tracked);
2672
+ const comparisons = [];
2673
+ for (const fixture of fixtures) {
2674
+ for (const metric of trackedMetrics) {
2675
+ const b = before.find((m) => m.fixture === fixture && m.metric_name === metric);
2676
+ const a = after.find((m) => m.fixture === fixture && m.metric_name === metric);
2677
+ if (b && a) {
2678
+ const direction = config.metrics.tracked[metric]?.direction ?? "lower_is_better";
2679
+ const regression = isRegression(b.metric_value, a.metric_value, direction);
2680
+ comparisons.push({
2681
+ fixture,
2682
+ metric,
2683
+ before: b.metric_value,
2684
+ after: a.metric_value,
2685
+ delta: a.metric_value - b.metric_value,
2686
+ regression
2687
+ });
2688
+ }
2134
2689
  }
2135
- } else {
2136
- const active = getActiveSession(db);
2137
- if (!active) {
2138
- throw new Error("No active session to end.");
2690
+ }
2691
+ return comparisons;
2692
+ }
2693
+ function isRegression(before, after, direction) {
2694
+ switch (direction) {
2695
+ case "lower_is_better":
2696
+ return after > before;
2697
+ case "higher_is_better":
2698
+ return after < before;
2699
+ case "closer_to_gt":
2700
+ return false;
2701
+ default:
2702
+ return false;
2703
+ }
2704
+ }
2705
+ function parseMetricsOutput(jsonStr) {
2706
+ const data = JSON.parse(jsonStr);
2707
+ const results = [];
2708
+ if (data.fixtures && typeof data.fixtures === "object") {
2709
+ for (const [fixture, metrics] of Object.entries(data.fixtures)) {
2710
+ for (const [metricName, metricValue] of Object.entries(metrics)) {
2711
+ if (typeof metricValue === "number") {
2712
+ results.push({ fixture, metric_name: metricName, metric_value: metricValue });
2713
+ }
2714
+ }
2139
2715
  }
2140
- const accomplished = getFlagValue(args, "--accomplished") ?? null;
2141
- const unfinished = getFlagValue(args, "--unfinished") ?? null;
2142
- const fragility = getFlagValue(args, "--fragility") ?? null;
2143
- endSession(db, active.id, accomplished, unfinished, fragility);
2144
- success(`Session ended: "${active.intent}"`);
2145
- if (accomplished) info(`Accomplished: ${accomplished}`);
2146
- if (unfinished) info(`Unfinished: ${unfinished}`);
2147
- if (fragility) warn(`New fragility: ${fragility}`);
2148
2716
  }
2717
+ return results;
2149
2718
  }
2150
- var init_session = __esm({
2151
- "src/commands/session.ts"() {
2719
+ var init_metrics = __esm({
2720
+ "src/metrics.ts"() {
2152
2721
  "use strict";
2153
- init_connection();
2154
2722
  init_queries();
2155
- init_config();
2156
- init_format();
2157
2723
  }
2158
2724
  });
2159
2725
 
2160
- // src/commands/query.ts
2161
- var query_exports = {};
2162
- __export(query_exports, {
2163
- query: () => query
2726
+ // src/commands/measure.ts
2727
+ var measure_exports = {};
2728
+ __export(measure_exports, {
2729
+ baseline: () => baseline,
2730
+ compare: () => compare,
2731
+ measure: () => measure
2164
2732
  });
2165
- async function query(command, args, isJson) {
2733
+ async function baseline(args) {
2734
+ await captureMetrics("before", args);
2735
+ }
2736
+ async function measure(args) {
2737
+ await captureMetrics("after", args);
2738
+ }
2739
+ async function captureMetrics(phase, args) {
2166
2740
  const root = findProjectRoot();
2167
2741
  if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2168
2742
  const db = getDb(root);
2169
- switch (command) {
2170
- case "decisions":
2171
- return queryDecisions(db, args, isJson);
2172
- case "dead-ends":
2173
- return queryDeadEnds(db, args, isJson);
2174
- case "fragility":
2175
- return queryFragility(root, isJson);
2176
- case "history":
2177
- return queryHistory(db, args, isJson);
2178
- case "circuit-breakers":
2179
- return queryCircuitBreakers(db, root, isJson);
2180
- case "check-commit":
2181
- return checkCommit(db);
2182
- }
2183
- }
2184
- function queryDecisions(db, args, isJson) {
2185
- const level = getFlagValue(args, "--level");
2743
+ const config = loadConfig(root);
2186
2744
  const expIdStr = getFlagValue(args, "--experiment");
2187
- const experimentId = expIdStr !== void 0 ? Number(expIdStr) : void 0;
2188
- const decisions = listAllDecisions(db, level, experimentId);
2189
- if (isJson) {
2190
- console.log(JSON.stringify(decisions, null, 2));
2191
- return;
2745
+ let exp;
2746
+ if (expIdStr !== void 0) {
2747
+ exp = getExperimentById(db, Number(expIdStr));
2748
+ } else {
2749
+ exp = getLatestExperiment(db);
2192
2750
  }
2193
- if (decisions.length === 0) {
2194
- info("No decisions found.");
2195
- return;
2751
+ if (!exp) throw new Error('No active experiment. Run `majlis new "hypothesis"` first.');
2752
+ if (config.build.pre_measure) {
2753
+ info(`Running pre-measure: ${config.build.pre_measure}`);
2754
+ try {
2755
+ (0, import_node_child_process.execSync)(config.build.pre_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
2756
+ } catch {
2757
+ warn("Pre-measure command failed \u2014 continuing anyway.");
2758
+ }
2196
2759
  }
2197
- header("Decisions");
2198
- const rows = decisions.map((d) => [
2199
- String(d.id),
2200
- String(d.experiment_id),
2201
- evidenceColor(d.evidence_level),
2202
- d.description.slice(0, 60) + (d.description.length > 60 ? "..." : ""),
2203
- d.status
2204
- ]);
2205
- console.log(table(["ID", "Exp", "Level", "Description", "Status"], rows));
2206
- }
2207
- function queryDeadEnds(db, args, isJson) {
2208
- const subType = getFlagValue(args, "--sub-type");
2209
- const searchTerm = getFlagValue(args, "--search");
2210
- let deadEnds;
2211
- if (subType) {
2212
- deadEnds = listDeadEndsBySubType(db, subType);
2213
- } else if (searchTerm) {
2214
- deadEnds = searchDeadEnds(db, searchTerm);
2215
- } else {
2216
- deadEnds = listAllDeadEnds(db);
2760
+ if (!config.metrics.command) {
2761
+ throw new Error("No metrics.command configured in .majlis/config.json");
2217
2762
  }
2218
- if (isJson) {
2219
- console.log(JSON.stringify(deadEnds, null, 2));
2220
- return;
2763
+ info(`Running metrics: ${config.metrics.command}`);
2764
+ let metricsOutput;
2765
+ try {
2766
+ metricsOutput = (0, import_node_child_process.execSync)(config.metrics.command, {
2767
+ cwd: root,
2768
+ encoding: "utf-8",
2769
+ stdio: ["pipe", "pipe", "pipe"]
2770
+ });
2771
+ } catch (err) {
2772
+ throw new Error(`Metrics command failed: ${err instanceof Error ? err.message : String(err)}`);
2221
2773
  }
2222
- if (deadEnds.length === 0) {
2223
- info("No dead-ends recorded.");
2774
+ const parsed = parseMetricsOutput(metricsOutput);
2775
+ if (parsed.length === 0) {
2776
+ warn("Metrics command returned no data.");
2224
2777
  return;
2225
2778
  }
2226
- header("Dead-End Registry");
2227
- const rows = deadEnds.map((d) => [
2228
- String(d.id),
2229
- d.sub_type ?? "\u2014",
2230
- d.approach.slice(0, 40) + (d.approach.length > 40 ? "..." : ""),
2231
- d.structural_constraint.slice(0, 40) + (d.structural_constraint.length > 40 ? "..." : "")
2232
- ]);
2233
- console.log(table(["ID", "Sub-Type", "Approach", "Constraint"], rows));
2234
- }
2235
- function queryFragility(root, isJson) {
2236
- const fragPath = path5.join(root, "docs", "synthesis", "fragility.md");
2237
- if (!fs5.existsSync(fragPath)) {
2238
- info("No fragility map found.");
2239
- return;
2779
+ for (const m of parsed) {
2780
+ insertMetric(db, exp.id, phase, m.fixture, m.metric_name, m.metric_value);
2240
2781
  }
2241
- const content = fs5.readFileSync(fragPath, "utf-8");
2242
- if (isJson) {
2243
- console.log(JSON.stringify({ content }, null, 2));
2244
- return;
2782
+ success(`Captured ${parsed.length} metric(s) for ${exp.slug} (phase: ${phase})`);
2783
+ if (config.build.post_measure) {
2784
+ try {
2785
+ (0, import_node_child_process.execSync)(config.build.post_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
2786
+ } catch {
2787
+ warn("Post-measure command failed.");
2788
+ }
2245
2789
  }
2246
- header("Fragility Map");
2247
- console.log(content);
2248
2790
  }
2249
- function queryHistory(db, args, isJson) {
2250
- const fixture = args.filter((a) => !a.startsWith("--"))[0];
2251
- if (!fixture) {
2252
- throw new Error("Usage: majlis history <fixture>");
2791
+ async function compare(args, isJson) {
2792
+ const root = findProjectRoot();
2793
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2794
+ const db = getDb(root);
2795
+ const config = loadConfig(root);
2796
+ const expIdStr = getFlagValue(args, "--experiment");
2797
+ let exp;
2798
+ if (expIdStr !== void 0) {
2799
+ exp = getExperimentById(db, Number(expIdStr));
2800
+ } else {
2801
+ exp = getLatestExperiment(db);
2253
2802
  }
2254
- const history = getMetricHistoryByFixture(db, fixture);
2255
- if (isJson) {
2256
- console.log(JSON.stringify(history, null, 2));
2803
+ if (!exp) throw new Error("No active experiment.");
2804
+ const comparisons = compareMetrics(db, exp.id, config);
2805
+ if (comparisons.length === 0) {
2806
+ warn(`No before/after metrics to compare for ${exp.slug}. Run baseline and measure first.`);
2257
2807
  return;
2258
2808
  }
2259
- if (history.length === 0) {
2260
- info(`No metric history for fixture: ${fixture}`);
2809
+ if (isJson) {
2810
+ console.log(JSON.stringify({ experiment: exp.slug, comparisons }, null, 2));
2261
2811
  return;
2262
2812
  }
2263
- header(`Metric History \u2014 ${fixture}`);
2264
- const rows = history.map((h) => [
2265
- String(h.experiment_id),
2266
- h.experiment_slug ?? "\u2014",
2267
- h.phase,
2268
- h.metric_name,
2269
- String(h.metric_value),
2270
- h.captured_at
2813
+ header(`Metric Comparison \u2014 ${exp.slug}`);
2814
+ const regressions = comparisons.filter((c) => c.regression);
2815
+ const rows = comparisons.map((c) => [
2816
+ c.fixture,
2817
+ c.metric,
2818
+ String(c.before),
2819
+ String(c.after),
2820
+ formatDelta(c.delta),
2821
+ c.regression ? red("REGRESSION") : green("OK")
2271
2822
  ]);
2272
- console.log(table(["Exp", "Slug", "Phase", "Metric", "Value", "Captured"], rows));
2823
+ console.log(table(["Fixture", "Metric", "Before", "After", "Delta", "Status"], rows));
2824
+ if (regressions.length > 0) {
2825
+ console.log();
2826
+ warn(`${regressions.length} regression(s) detected!`);
2827
+ } else {
2828
+ console.log();
2829
+ success("No regressions detected.");
2830
+ }
2273
2831
  }
2274
- function queryCircuitBreakers(db, root, isJson) {
2275
- const config = loadConfig(root);
2276
- const states = getAllCircuitBreakerStates(db, config.cycle.circuit_breaker_threshold);
2277
- if (isJson) {
2278
- console.log(JSON.stringify(states, null, 2));
2279
- return;
2832
+ function formatDelta(delta) {
2833
+ const prefix = delta > 0 ? "+" : "";
2834
+ return `${prefix}${delta.toFixed(4)}`;
2835
+ }
2836
+ var import_node_child_process;
2837
+ var init_measure = __esm({
2838
+ "src/commands/measure.ts"() {
2839
+ "use strict";
2840
+ import_node_child_process = require("child_process");
2841
+ init_connection();
2842
+ init_queries();
2843
+ init_metrics();
2844
+ init_config();
2845
+ init_format();
2280
2846
  }
2281
- if (states.length === 0) {
2282
- info("No circuit breaker data.");
2283
- return;
2847
+ });
2848
+
2849
+ // src/commands/experiment.ts
2850
+ var experiment_exports = {};
2851
+ __export(experiment_exports, {
2852
+ newExperiment: () => newExperiment,
2853
+ revert: () => revert
2854
+ });
2855
+ async function newExperiment(args) {
2856
+ const root = findProjectRoot();
2857
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2858
+ const hypothesis = args.filter((a) => !a.startsWith("--")).join(" ");
2859
+ if (!hypothesis) {
2860
+ throw new Error('Usage: majlis new "hypothesis"');
2284
2861
  }
2285
- header("Circuit Breakers");
2286
- const rows = states.map((s) => [
2287
- s.sub_type,
2288
- String(s.failure_count),
2289
- String(config.cycle.circuit_breaker_threshold),
2290
- s.tripped ? red("TRIPPED") : green("OK")
2291
- ]);
2292
- console.log(table(["Sub-Type", "Failures", "Threshold", "Status"], rows));
2293
- }
2294
- function checkCommit(db) {
2295
- let stdinData = "";
2862
+ const db = getDb(root);
2863
+ const config = loadConfig(root);
2864
+ const slug = getFlagValue(args, "--slug") ?? await generateSlug(hypothesis, root);
2865
+ if (getExperimentBySlug(db, slug)) {
2866
+ throw new Error(`Experiment with slug "${slug}" already exists.`);
2867
+ }
2868
+ const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
2869
+ const num = allExps.count + 1;
2870
+ const paddedNum = String(num).padStart(3, "0");
2871
+ const branch = `exp/${paddedNum}-${slug}`;
2296
2872
  try {
2297
- stdinData = fs5.readFileSync(0, "utf-8");
2298
- } catch {
2873
+ (0, import_node_child_process2.execSync)(`git checkout -b ${branch}`, {
2874
+ cwd: root,
2875
+ encoding: "utf-8",
2876
+ stdio: ["pipe", "pipe", "pipe"]
2877
+ });
2878
+ info(`Created branch: ${branch}`);
2879
+ } catch (err) {
2880
+ warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
2299
2881
  }
2300
- if (stdinData) {
2882
+ const subType = getFlagValue(args, "--sub-type") ?? null;
2883
+ const exp = createExperiment(db, slug, branch, hypothesis, subType, null);
2884
+ success(`Created experiment #${exp.id}: ${exp.slug}`);
2885
+ const docsDir = path5.join(root, "docs", "experiments");
2886
+ const templatePath = path5.join(docsDir, "_TEMPLATE.md");
2887
+ if (fs5.existsSync(templatePath)) {
2888
+ const template = fs5.readFileSync(templatePath, "utf-8");
2889
+ const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, subType ?? "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
2890
+ const logPath = path5.join(docsDir, `${paddedNum}-${slug}.md`);
2891
+ fs5.writeFileSync(logPath, logContent);
2892
+ info(`Created experiment log: docs/experiments/${paddedNum}-${slug}.md`);
2893
+ }
2894
+ if (config.cycle.auto_baseline_on_new_experiment && config.metrics.command) {
2895
+ info("Auto-baselining... (run `majlis baseline` to do this manually)");
2301
2896
  try {
2302
- const hookInput = JSON.parse(stdinData);
2303
- const command = hookInput?.tool_input?.command ?? "";
2304
- if (!command.includes("git commit")) {
2305
- return;
2306
- }
2307
- } catch {
2897
+ const { baseline: baseline2 } = await Promise.resolve().then(() => (init_measure(), measure_exports));
2898
+ await baseline2(["--experiment", String(exp.id)]);
2899
+ } catch (err) {
2900
+ warn("Auto-baseline failed \u2014 run `majlis baseline` manually.");
2308
2901
  }
2309
2902
  }
2310
- const active = listActiveExperiments(db);
2311
- const unverified = active.filter(
2312
- (e) => !["merged", "dead_end", "verified", "resolved", "compressed"].includes(e.status)
2903
+ }
2904
+ async function revert(args) {
2905
+ const root = findProjectRoot();
2906
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2907
+ const db = getDb(root);
2908
+ let exp;
2909
+ const slugArg = args.filter((a) => !a.startsWith("--"))[0];
2910
+ if (slugArg) {
2911
+ exp = getExperimentBySlug(db, slugArg);
2912
+ if (!exp) throw new Error(`Experiment not found: ${slugArg}`);
2913
+ } else {
2914
+ exp = getLatestExperiment(db);
2915
+ if (!exp) throw new Error("No active experiments to revert.");
2916
+ }
2917
+ const reason = getFlagValue(args, "--reason") ?? "Manually reverted";
2918
+ const category = args.includes("--structural") ? "structural" : "procedural";
2919
+ insertDeadEnd(
2920
+ db,
2921
+ exp.id,
2922
+ exp.hypothesis ?? exp.slug,
2923
+ reason,
2924
+ `Reverted: ${reason}`,
2925
+ exp.sub_type,
2926
+ category
2313
2927
  );
2314
- if (unverified.length > 0) {
2315
- console.error(`[majlis] ${unverified.length} unverified experiment(s):`);
2316
- for (const e of unverified) {
2317
- console.error(` - ${e.slug} (${e.status})`);
2928
+ updateExperimentStatus(db, exp.id, "dead_end");
2929
+ try {
2930
+ const currentBranch = (0, import_node_child_process2.execSync)("git rev-parse --abbrev-ref HEAD", {
2931
+ cwd: root,
2932
+ encoding: "utf-8"
2933
+ }).trim();
2934
+ if (currentBranch === exp.branch) {
2935
+ (0, import_node_child_process2.execSync)("git checkout main 2>/dev/null || git checkout master", {
2936
+ cwd: root,
2937
+ encoding: "utf-8",
2938
+ stdio: ["pipe", "pipe", "pipe"]
2939
+ });
2318
2940
  }
2319
- process.exit(1);
2941
+ } catch {
2942
+ warn("Could not switch git branches \u2014 do this manually.");
2320
2943
  }
2944
+ info(`Experiment ${exp.slug} reverted to dead-end. Reason: ${reason}`);
2321
2945
  }
2322
- var fs5, path5;
2323
- var init_query = __esm({
2324
- "src/commands/query.ts"() {
2946
+ var fs5, path5, import_node_child_process2;
2947
+ var init_experiment = __esm({
2948
+ "src/commands/experiment.ts"() {
2325
2949
  "use strict";
2326
2950
  fs5 = __toESM(require("fs"));
2327
2951
  path5 = __toESM(require("path"));
2952
+ import_node_child_process2 = require("child_process");
2328
2953
  init_connection();
2329
2954
  init_queries();
2330
2955
  init_config();
2956
+ init_spawn();
2331
2957
  init_format();
2332
2958
  }
2333
2959
  });
2334
2960
 
2335
- // src/state/types.ts
2336
- var TRANSITIONS, GRADE_ORDER;
2337
- var init_types = __esm({
2338
- "src/state/types.ts"() {
2339
- "use strict";
2340
- TRANSITIONS = {
2341
- ["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "gated" /* GATED */],
2342
- ["reframed" /* REFRAMED */]: ["gated" /* GATED */],
2343
- ["gated" /* GATED */]: ["building" /* BUILDING */, "gated" /* GATED */],
2344
- // self-loop for rejected hypotheses
2345
- ["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
2346
- // self-loop for retry after truncation
2347
- ["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
2348
- ["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
2349
- ["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
2350
- ["scouted" /* SCOUTED */]: ["verifying" /* VERIFYING */],
2351
- ["verifying" /* VERIFYING */]: ["verified" /* VERIFIED */],
2352
- ["verified" /* VERIFIED */]: ["resolved" /* RESOLVED */],
2353
- ["resolved" /* RESOLVED */]: ["compressed" /* COMPRESSED */, "building" /* BUILDING */],
2354
- // cycle-back skips gate
2355
- ["compressed" /* COMPRESSED */]: ["merged" /* MERGED */, "building" /* BUILDING */],
2356
- // cycle-back skips gate
2357
- ["merged" /* MERGED */]: [],
2358
- ["dead_end" /* DEAD_END */]: []
2359
- };
2360
- GRADE_ORDER = ["rejected", "weak", "good", "sound"];
2361
- }
2961
+ // src/commands/session.ts
2962
+ var session_exports = {};
2963
+ __export(session_exports, {
2964
+ session: () => session
2362
2965
  });
2363
-
2364
- // src/state/machine.ts
2365
- function transition(current, target) {
2366
- const valid = TRANSITIONS[current];
2367
- if (!valid.includes(target)) {
2368
- throw new Error(
2369
- `Invalid transition: ${current} \u2192 ${target}. Valid: [${valid.join(", ")}]`
2370
- );
2371
- }
2372
- return target;
2373
- }
2374
- function validNext(current) {
2375
- return TRANSITIONS[current];
2376
- }
2377
- function isTerminal(status2) {
2378
- return TRANSITIONS[status2].length === 0;
2379
- }
2380
- function determineNextStep(exp, valid, hasDoubts2, hasChallenges2) {
2381
- if (valid.length === 0) {
2382
- throw new Error(`Experiment ${exp.slug} is terminal (${exp.status})`);
2383
- }
2384
- const status2 = exp.status;
2385
- if (status2 === "classified" /* CLASSIFIED */ || status2 === "reframed" /* REFRAMED */) {
2386
- return valid.includes("gated" /* GATED */) ? "gated" /* GATED */ : valid[0];
2387
- }
2388
- if (status2 === "gated" /* GATED */) {
2389
- return valid.includes("building" /* BUILDING */) ? "building" /* BUILDING */ : valid[0];
2390
- }
2391
- if (status2 === "built" /* BUILT */ && !hasDoubts2) {
2392
- return valid.includes("doubted" /* DOUBTED */) ? "doubted" /* DOUBTED */ : valid[0];
2393
- }
2394
- if (status2 === "doubted" /* DOUBTED */ && !hasChallenges2) {
2395
- return valid.includes("challenged" /* CHALLENGED */) ? "challenged" /* CHALLENGED */ : valid[0];
2966
+ async function session(args) {
2967
+ const subcommand = args[0];
2968
+ if (!subcommand || subcommand !== "start" && subcommand !== "end") {
2969
+ throw new Error('Usage: majlis session start "intent" | majlis session end');
2396
2970
  }
2397
- if (status2 === "doubted" /* DOUBTED */ || status2 === "challenged" /* CHALLENGED */) {
2398
- if (valid.includes("verifying" /* VERIFYING */)) {
2399
- return "verifying" /* VERIFYING */;
2971
+ const root = findProjectRoot();
2972
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2973
+ const db = getDb(root);
2974
+ if (subcommand === "start") {
2975
+ const intent = args.slice(1).filter((a) => !a.startsWith("--")).join(" ");
2976
+ if (!intent) {
2977
+ throw new Error('Usage: majlis session start "intent"');
2978
+ }
2979
+ const existing = getActiveSession(db);
2980
+ if (existing) {
2981
+ warn(`Session already active: "${existing.intent}" (started ${existing.started_at})`);
2982
+ warn("End it first with `majlis session end`.");
2983
+ return;
2400
2984
  }
2985
+ const latestExp = getLatestExperiment(db);
2986
+ const sess = startSession(db, intent, latestExp?.id ?? null);
2987
+ success(`Session started: "${intent}" (id: ${sess.id})`);
2988
+ if (latestExp) {
2989
+ info(`Linked to experiment: ${latestExp.slug} (${latestExp.status})`);
2990
+ }
2991
+ } else {
2992
+ const active = getActiveSession(db);
2993
+ if (!active) {
2994
+ throw new Error("No active session to end.");
2995
+ }
2996
+ const accomplished = getFlagValue(args, "--accomplished") ?? null;
2997
+ const unfinished = getFlagValue(args, "--unfinished") ?? null;
2998
+ const fragility = getFlagValue(args, "--fragility") ?? null;
2999
+ endSession(db, active.id, accomplished, unfinished, fragility);
3000
+ success(`Session ended: "${active.intent}"`);
3001
+ if (accomplished) info(`Accomplished: ${accomplished}`);
3002
+ if (unfinished) info(`Unfinished: ${unfinished}`);
3003
+ if (fragility) warn(`New fragility: ${fragility}`);
2401
3004
  }
2402
- return valid[0];
2403
3005
  }
2404
- var init_machine = __esm({
2405
- "src/state/machine.ts"() {
3006
+ var init_session = __esm({
3007
+ "src/commands/session.ts"() {
2406
3008
  "use strict";
2407
- init_types();
3009
+ init_connection();
3010
+ init_queries();
3011
+ init_config();
3012
+ init_format();
2408
3013
  }
2409
3014
  });
2410
3015
 
2411
- // src/agents/types.ts
2412
- function getExtractionSchema(role) {
2413
- switch (role) {
2414
- case "builder":
2415
- return '{"decisions": [{"description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string"}]}';
2416
- case "critic":
2417
- return '{"doubts": [{"claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical"}]}';
2418
- case "adversary":
2419
- return '{"challenges": [{"description": "string", "reasoning": "string"}]}';
2420
- case "verifier":
2421
- return '{"grades": [{"component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string"}], "doubt_resolutions": [{"doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive"}]}';
2422
- case "gatekeeper":
2423
- return '{"gate_decision": "approve|reject|flag", "reason": "string", "stale_references": ["string"], "overlapping_dead_ends": [0]}';
2424
- case "reframer":
2425
- return '{"reframe": {"decomposition": "string", "divergences": ["string"], "recommendation": "string"}}';
2426
- case "scout":
2427
- return '{"findings": [{"approach": "string", "source": "string", "relevance": "string", "contradicts_current": true}]}';
2428
- case "compressor":
2429
- return '{"compression_report": {"synthesis_delta": "string", "new_dead_ends": ["string"], "fragility_changes": ["string"]}}';
2430
- default:
2431
- return EXTRACTION_SCHEMA;
3016
+ // src/commands/query.ts
3017
+ var query_exports = {};
3018
+ __export(query_exports, {
3019
+ query: () => query3
3020
+ });
3021
+ async function query3(command, args, isJson) {
3022
+ const root = findProjectRoot();
3023
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
3024
+ const db = getDb(root);
3025
+ switch (command) {
3026
+ case "decisions":
3027
+ return queryDecisions(db, args, isJson);
3028
+ case "dead-ends":
3029
+ return queryDeadEnds(db, args, isJson);
3030
+ case "fragility":
3031
+ return queryFragility(root, isJson);
3032
+ case "history":
3033
+ return queryHistory(db, args, isJson);
3034
+ case "circuit-breakers":
3035
+ return queryCircuitBreakers(db, root, isJson);
3036
+ case "check-commit":
3037
+ return checkCommit(db);
2432
3038
  }
2433
3039
  }
2434
- var EXTRACTION_SCHEMA, ROLE_REQUIRED_FIELDS;
2435
- var init_types2 = __esm({
2436
- "src/agents/types.ts"() {
2437
- "use strict";
2438
- EXTRACTION_SCHEMA = `{
2439
- "decisions": [{ "description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string" }],
2440
- "grades": [{ "component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string" }],
2441
- "doubts": [{ "claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical" }],
2442
- "guidance": "string (actionable builder guidance)",
2443
- "doubt_resolutions": [{ "doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive" }]
2444
- }`;
2445
- ROLE_REQUIRED_FIELDS = {
2446
- builder: ["decisions"],
2447
- critic: ["doubts"],
2448
- adversary: ["challenges"],
2449
- verifier: ["grades"],
2450
- gatekeeper: ["gate_decision"],
2451
- reframer: ["reframe"],
2452
- scout: ["findings"],
2453
- compressor: ["compression_report"]
2454
- };
3040
+ function queryDecisions(db, args, isJson) {
3041
+ const level = getFlagValue(args, "--level");
3042
+ const expIdStr = getFlagValue(args, "--experiment");
3043
+ const experimentId = expIdStr !== void 0 ? Number(expIdStr) : void 0;
3044
+ const decisions = listAllDecisions(db, level, experimentId);
3045
+ if (isJson) {
3046
+ console.log(JSON.stringify(decisions, null, 2));
3047
+ return;
2455
3048
  }
2456
- });
2457
-
2458
- // src/agents/parse.ts
2459
- async function extractStructuredData(role, markdown) {
2460
- const tier1 = extractMajlisJsonBlock(markdown);
2461
- if (tier1) {
2462
- const parsed = tryParseJson(tier1);
2463
- if (parsed) return parsed;
2464
- console.warn(`[majlis] Malformed JSON in <!-- majlis-json --> block for ${role}. Falling back.`);
3049
+ if (decisions.length === 0) {
3050
+ info("No decisions found.");
3051
+ return;
3052
+ }
3053
+ header("Decisions");
3054
+ const rows = decisions.map((d) => [
3055
+ String(d.id),
3056
+ String(d.experiment_id),
3057
+ evidenceColor(d.evidence_level),
3058
+ d.description.slice(0, 60) + (d.description.length > 60 ? "..." : ""),
3059
+ d.status
3060
+ ]);
3061
+ console.log(table(["ID", "Exp", "Level", "Description", "Status"], rows));
3062
+ }
3063
+ function queryDeadEnds(db, args, isJson) {
3064
+ const subType = getFlagValue(args, "--sub-type");
3065
+ const searchTerm = getFlagValue(args, "--search");
3066
+ let deadEnds;
3067
+ if (subType) {
3068
+ deadEnds = listDeadEndsBySubType(db, subType);
3069
+ } else if (searchTerm) {
3070
+ deadEnds = searchDeadEnds(db, searchTerm);
2465
3071
  } else {
2466
- console.warn(`[majlis] No <!-- majlis-json --> block found in ${role} output. Falling back.`);
3072
+ deadEnds = listAllDeadEnds(db);
2467
3073
  }
2468
- const tier2 = extractViaPatterns(role, markdown);
2469
- if (tier2 && hasData(tier2)) {
2470
- console.warn(`[majlis] Used regex fallback for ${role}. Review extracted data.`);
2471
- return tier2;
3074
+ if (isJson) {
3075
+ console.log(JSON.stringify(deadEnds, null, 2));
3076
+ return;
2472
3077
  }
2473
- console.warn(`[majlis] Regex fallback insufficient for ${role}. Using Haiku extraction.`);
2474
- const tier3 = await extractViaHaiku(role, markdown);
2475
- if (tier3) return tier3;
2476
- console.error(
2477
- `[majlis] FAILED to extract structured data from ${role} output. State machine will continue but data is missing. Manual review required.`
2478
- );
2479
- return null;
2480
- }
2481
- function extractMajlisJsonBlock(markdown) {
2482
- const match = markdown.match(/<!--\s*majlis-json\s*\n([\s\S]*?)-->/);
2483
- if (!match) return null;
2484
- return match[1].trim();
3078
+ if (deadEnds.length === 0) {
3079
+ info("No dead-ends recorded.");
3080
+ return;
3081
+ }
3082
+ header("Dead-End Registry");
3083
+ const rows = deadEnds.map((d) => [
3084
+ String(d.id),
3085
+ d.sub_type ?? "\u2014",
3086
+ d.approach.slice(0, 40) + (d.approach.length > 40 ? "..." : ""),
3087
+ d.structural_constraint.slice(0, 40) + (d.structural_constraint.length > 40 ? "..." : "")
3088
+ ]);
3089
+ console.log(table(["ID", "Sub-Type", "Approach", "Constraint"], rows));
2485
3090
  }
2486
- function tryParseJson(jsonStr) {
2487
- try {
2488
- return JSON.parse(jsonStr);
2489
- } catch {
2490
- return null;
3091
+ function queryFragility(root, isJson) {
3092
+ const fragPath = path6.join(root, "docs", "synthesis", "fragility.md");
3093
+ if (!fs6.existsSync(fragPath)) {
3094
+ info("No fragility map found.");
3095
+ return;
3096
+ }
3097
+ const content = fs6.readFileSync(fragPath, "utf-8");
3098
+ if (isJson) {
3099
+ console.log(JSON.stringify({ content }, null, 2));
3100
+ return;
2491
3101
  }
3102
+ header("Fragility Map");
3103
+ console.log(content);
2492
3104
  }
2493
- function extractViaPatterns(role, markdown) {
2494
- const result = {};
2495
- const decisionPattern = /\[(?:decision|Decision)\].*?(?:description|Description):\s*(.+?)(?:\n|$).*?(?:evidence.?level|Evidence.?Level|level):\s*(proof|test|strong_consensus|consensus|analogy|judgment).*?(?:justification|Justification):\s*(.+?)(?:\n|$)/gis;
2496
- const decisions = [];
2497
- const evidenceMarkers = /(?:^|\n)\s*[-*]\s*\*?\*?(?:Decision|DECISION)\*?\*?:\s*(.+?)(?:\n|$).*?(?:Evidence|EVIDENCE|Level):\s*(proof|test|strong_consensus|consensus|analogy|judgment)/gim;
2498
- let match;
2499
- while ((match = evidenceMarkers.exec(markdown)) !== null) {
2500
- decisions.push({
2501
- description: match[1].trim(),
2502
- evidence_level: match[2].toLowerCase().trim(),
2503
- justification: "Extracted via regex \u2014 review"
2504
- });
3105
+ function queryHistory(db, args, isJson) {
3106
+ const fixture = args.filter((a) => !a.startsWith("--"))[0];
3107
+ if (!fixture) {
3108
+ throw new Error("Usage: majlis history <fixture>");
2505
3109
  }
2506
- const inlineTagPattern = /\[(proof|test|strong_consensus|consensus|analogy|judgment)\]\s*(.+?)(?:\n|$)/gi;
2507
- while ((match = inlineTagPattern.exec(markdown)) !== null) {
2508
- const desc = match[2].trim();
2509
- if (!decisions.some((d) => d.description === desc)) {
2510
- decisions.push({
2511
- description: desc,
2512
- evidence_level: match[1].toLowerCase(),
2513
- justification: "Extracted via regex \u2014 review"
2514
- });
2515
- }
3110
+ const history = getMetricHistoryByFixture(db, fixture);
3111
+ if (isJson) {
3112
+ console.log(JSON.stringify(history, null, 2));
3113
+ return;
2516
3114
  }
2517
- if (decisions.length > 0) result.decisions = decisions;
2518
- const grades = [];
2519
- const gradePattern = /(?:^|\n)\s*[-*]?\s*\*?\*?(?:Grade|GRADE|Component)\*?\*?.*?(?:component|Component)?\s*[:=]\s*(.+?)(?:\n|,).*?(?:grade|Grade)\s*[:=]\s*(sound|good|weak|rejected)/gim;
2520
- while ((match = gradePattern.exec(markdown)) !== null) {
2521
- grades.push({
2522
- component: match[1].trim(),
2523
- grade: match[2].toLowerCase().trim()
2524
- });
3115
+ if (history.length === 0) {
3116
+ info(`No metric history for fixture: ${fixture}`);
3117
+ return;
2525
3118
  }
2526
- const simpleGradePattern = /(?:^|\n)\s*[-*]\s*\*?\*?(.+?)\*?\*?\s*[:—–-]\s*\*?\*?(sound|good|weak|rejected)\*?\*?/gim;
2527
- while ((match = simpleGradePattern.exec(markdown)) !== null) {
2528
- const comp = match[1].trim();
2529
- if (!grades.some((g) => g.component === comp)) {
2530
- grades.push({
2531
- component: comp,
2532
- grade: match[2].toLowerCase().trim()
2533
- });
2534
- }
3119
+ header(`Metric History \u2014 ${fixture}`);
3120
+ const rows = history.map((h) => [
3121
+ String(h.experiment_id),
3122
+ h.experiment_slug ?? "\u2014",
3123
+ h.phase,
3124
+ h.metric_name,
3125
+ String(h.metric_value),
3126
+ h.captured_at
3127
+ ]);
3128
+ console.log(table(["Exp", "Slug", "Phase", "Metric", "Value", "Captured"], rows));
3129
+ }
3130
+ function queryCircuitBreakers(db, root, isJson) {
3131
+ const config = loadConfig(root);
3132
+ const states = getAllCircuitBreakerStates(db, config.cycle.circuit_breaker_threshold);
3133
+ if (isJson) {
3134
+ console.log(JSON.stringify(states, null, 2));
3135
+ return;
2535
3136
  }
2536
- if (grades.length > 0) result.grades = grades;
2537
- const doubts = [];
2538
- const doubtPattern = /(?:Doubt|DOUBT|Claim doubted|CLAIM)\s*(?:\d+)?[:.]?\s*(.+?)(?:\n|$)[\s\S]*?(?:Severity|SEVERITY)\s*[:=]\s*(minor|moderate|critical)/gim;
2539
- while ((match = doubtPattern.exec(markdown)) !== null) {
2540
- doubts.push({
2541
- claim_doubted: match[1].trim(),
2542
- evidence_level_of_claim: "unknown",
2543
- // Don't fabricate — mark as unknown for review
2544
- evidence_for_doubt: "Extracted via regex \u2014 review original document",
2545
- severity: match[2].toLowerCase().trim()
2546
- });
3137
+ if (states.length === 0) {
3138
+ info("No circuit breaker data.");
3139
+ return;
2547
3140
  }
2548
- if (doubts.length > 0) result.doubts = doubts;
2549
- return result;
3141
+ header("Circuit Breakers");
3142
+ const rows = states.map((s) => [
3143
+ s.sub_type,
3144
+ String(s.failure_count),
3145
+ String(config.cycle.circuit_breaker_threshold),
3146
+ s.tripped ? red("TRIPPED") : green("OK")
3147
+ ]);
3148
+ console.log(table(["Sub-Type", "Failures", "Threshold", "Status"], rows));
2550
3149
  }
2551
- async function extractViaHaiku(role, markdown) {
3150
+ function checkCommit(db) {
3151
+ let stdinData = "";
2552
3152
  try {
2553
- const truncated = markdown.length > 8e3 ? markdown.slice(0, 8e3) + "\n[truncated]" : markdown;
2554
- const schema = getExtractionSchema(role);
2555
- const prompt = `Extract structured data from this ${role} document as JSON. Follow this schema exactly: ${schema}
2556
-
2557
- Document:
2558
- ${truncated}`;
2559
- const conversation = (0, import_claude_agent_sdk.query)({
2560
- prompt,
2561
- options: {
2562
- model: "haiku",
2563
- tools: [],
2564
- systemPrompt: "You are a JSON extraction assistant. Output only valid JSON matching the requested schema. No markdown, no explanation, just JSON.",
2565
- permissionMode: "bypassPermissions",
2566
- allowDangerouslySkipPermissions: true,
2567
- maxTurns: 1,
2568
- persistSession: false
2569
- }
2570
- });
2571
- let resultText = "";
2572
- for await (const message of conversation) {
2573
- if (message.type === "assistant") {
2574
- for (const block of message.message.content) {
2575
- if (block.type === "text") {
2576
- resultText += block.text;
2577
- }
2578
- }
3153
+ stdinData = fs6.readFileSync(0, "utf-8");
3154
+ } catch {
3155
+ }
3156
+ if (stdinData) {
3157
+ try {
3158
+ const hookInput = JSON.parse(stdinData);
3159
+ const command = hookInput?.tool_input?.command ?? "";
3160
+ if (!command.includes("git commit")) {
3161
+ return;
2579
3162
  }
3163
+ } catch {
2580
3164
  }
2581
- return tryParseJson(resultText.trim());
2582
- } catch (err) {
2583
- console.warn(`[majlis] Haiku extraction failed for ${role}: ${err instanceof Error ? err.message : String(err)}`);
2584
- return null;
3165
+ }
3166
+ const active = listActiveExperiments(db);
3167
+ const unverified = active.filter(
3168
+ (e) => !["merged", "dead_end", "verified", "resolved", "compressed"].includes(e.status)
3169
+ );
3170
+ if (unverified.length > 0) {
3171
+ console.error(`[majlis] ${unverified.length} unverified experiment(s):`);
3172
+ for (const e of unverified) {
3173
+ console.error(` - ${e.slug} (${e.status})`);
3174
+ }
3175
+ process.exit(1);
2585
3176
  }
2586
3177
  }
2587
- function hasData(output) {
2588
- return !!(output.decisions && output.decisions.length > 0 || output.grades && output.grades.length > 0 || output.doubts && output.doubts.length > 0 || output.challenges && output.challenges.length > 0 || output.findings && output.findings.length > 0 || output.guidance || output.reframe || output.compression_report || output.gate_decision);
2589
- }
2590
- function validateForRole(role, output) {
2591
- const required = ROLE_REQUIRED_FIELDS[role];
2592
- if (!required) return { valid: true, missing: [] };
2593
- const missing = required.filter((field) => {
2594
- const value = output[field];
2595
- if (value === void 0 || value === null) return true;
2596
- if (Array.isArray(value) && value.length === 0) return true;
2597
- return false;
2598
- });
2599
- return { valid: missing.length === 0, missing };
2600
- }
2601
- var import_claude_agent_sdk;
2602
- var init_parse = __esm({
2603
- "src/agents/parse.ts"() {
3178
+ var fs6, path6;
3179
+ var init_query = __esm({
3180
+ "src/commands/query.ts"() {
3181
+ "use strict";
3182
+ fs6 = __toESM(require("fs"));
3183
+ path6 = __toESM(require("path"));
3184
+ init_connection();
3185
+ init_queries();
3186
+ init_config();
3187
+ init_format();
3188
+ }
3189
+ });
3190
+
3191
+ // src/state/types.ts
3192
+ var TRANSITIONS, GRADE_ORDER;
3193
+ var init_types2 = __esm({
3194
+ "src/state/types.ts"() {
2604
3195
  "use strict";
2605
- init_types2();
2606
- import_claude_agent_sdk = require("@anthropic-ai/claude-agent-sdk");
3196
+ TRANSITIONS = {
3197
+ ["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "gated" /* GATED */],
3198
+ ["reframed" /* REFRAMED */]: ["gated" /* GATED */],
3199
+ ["gated" /* GATED */]: ["building" /* BUILDING */, "gated" /* GATED */],
3200
+ // self-loop for rejected hypotheses
3201
+ ["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
3202
+ // self-loop for retry after truncation
3203
+ ["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
3204
+ ["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
3205
+ ["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
3206
+ ["scouted" /* SCOUTED */]: ["verifying" /* VERIFYING */],
3207
+ ["verifying" /* VERIFYING */]: ["verified" /* VERIFIED */],
3208
+ ["verified" /* VERIFIED */]: ["resolved" /* RESOLVED */],
3209
+ ["resolved" /* RESOLVED */]: ["compressed" /* COMPRESSED */, "building" /* BUILDING */],
3210
+ // cycle-back skips gate
3211
+ ["compressed" /* COMPRESSED */]: ["merged" /* MERGED */, "building" /* BUILDING */],
3212
+ // cycle-back skips gate
3213
+ ["merged" /* MERGED */]: [],
3214
+ ["dead_end" /* DEAD_END */]: []
3215
+ };
3216
+ GRADE_ORDER = ["rejected", "weak", "good", "sound"];
2607
3217
  }
2608
3218
  });
2609
3219
 
2610
- // src/agents/spawn.ts
2611
- function loadAgentDefinition(role, projectRoot) {
2612
- const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2613
- const filePath = path6.join(root, ".majlis", "agents", `${role}.md`);
2614
- if (!fs6.existsSync(filePath)) {
2615
- throw new Error(`Agent definition not found: ${filePath}`);
2616
- }
2617
- const content = fs6.readFileSync(filePath, "utf-8");
2618
- const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
2619
- if (!frontmatterMatch) {
2620
- throw new Error(`Invalid agent definition (missing YAML frontmatter): ${filePath}`);
3220
+ // src/state/machine.ts
3221
+ function transition(current, target) {
3222
+ const valid = TRANSITIONS[current];
3223
+ if (!valid.includes(target)) {
3224
+ throw new Error(
3225
+ `Invalid transition: ${current} \u2192 ${target}. Valid: [${valid.join(", ")}]`
3226
+ );
2621
3227
  }
2622
- const frontmatter = frontmatterMatch[1];
2623
- const body = frontmatterMatch[2].trim();
2624
- const name = extractYamlField(frontmatter, "name") ?? role;
2625
- const model = extractYamlField(frontmatter, "model") ?? "opus";
2626
- const toolsStr = extractYamlField(frontmatter, "tools") ?? "[]";
2627
- const tools = toolsStr.replace(/[\[\]]/g, "").split(",").map((t) => t.trim()).filter(Boolean);
2628
- return { name, model, tools, systemPrompt: body };
3228
+ return target;
2629
3229
  }
2630
- function extractYamlField(yaml, field) {
2631
- const match = yaml.match(new RegExp(`^${field}:\\s*(.+)$`, "m"));
2632
- return match ? match[1].trim() : null;
3230
+ function validNext(current) {
3231
+ return TRANSITIONS[current];
2633
3232
  }
2634
- async function spawnAgent(role, context, projectRoot) {
2635
- const agentDef = loadAgentDefinition(role, projectRoot);
2636
- const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2637
- const taskPrompt = context.taskPrompt ?? `Perform your role as ${agentDef.name}.`;
2638
- const contextJson = JSON.stringify(context);
2639
- const prompt = `Here is your context:
2640
-
2641
- \`\`\`json
2642
- ${contextJson}
2643
- \`\`\`
2644
-
2645
- ${taskPrompt}`;
2646
- const turns = ROLE_MAX_TURNS[role] ?? 15;
2647
- console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
2648
- const { text: markdown, costUsd, truncated } = await runQuery({
2649
- prompt,
2650
- model: agentDef.model,
2651
- tools: agentDef.tools,
2652
- systemPrompt: agentDef.systemPrompt,
2653
- cwd: root,
2654
- maxTurns: turns,
2655
- label: role
2656
- });
2657
- console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
2658
- const artifactPath = writeArtifact(role, context, markdown, root);
2659
- if (artifactPath) {
2660
- console.log(`[${role}] Artifact written to ${artifactPath}`);
3233
+ function isTerminal(status2) {
3234
+ return TRANSITIONS[status2].length === 0;
3235
+ }
3236
+ function determineNextStep(exp, valid, hasDoubts2, hasChallenges2) {
3237
+ if (valid.length === 0) {
3238
+ throw new Error(`Experiment ${exp.slug} is terminal (${exp.status})`);
2661
3239
  }
2662
- const structured = await extractStructuredData(role, markdown);
2663
- if (structured) {
2664
- const { valid, missing } = validateForRole(role, structured);
2665
- if (!valid) {
2666
- console.warn(`[${role}] Output missing expected fields: ${missing.join(", ")}`);
2667
- }
3240
+ const status2 = exp.status;
3241
+ if (status2 === "classified" /* CLASSIFIED */ || status2 === "reframed" /* REFRAMED */) {
3242
+ return valid.includes("gated" /* GATED */) ? "gated" /* GATED */ : valid[0];
2668
3243
  }
2669
- return { output: markdown, structured, truncated };
2670
- }
2671
- async function spawnSynthesiser(context, projectRoot) {
2672
- const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2673
- const contextJson = JSON.stringify(context);
2674
- const taskPrompt = context.taskPrompt ?? "Synthesise the findings into actionable builder guidance.";
2675
- const prompt = `Here is your context:
2676
-
2677
- \`\`\`json
2678
- ${contextJson}
2679
- \`\`\`
2680
-
2681
- ${taskPrompt}`;
2682
- const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2683
- console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
2684
- const { text: markdown, costUsd, truncated } = await runQuery({
2685
- prompt,
2686
- model: "sonnet",
2687
- tools: ["Read", "Glob", "Grep"],
2688
- systemPrompt,
2689
- cwd: root,
2690
- maxTurns: 5,
2691
- label: "synthesiser"
2692
- });
2693
- console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
2694
- return { output: markdown, structured: { guidance: markdown }, truncated };
2695
- }
2696
- async function spawnRecovery(role, partialOutput, context, projectRoot) {
2697
- const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2698
- const expSlug = context.experiment?.slug ?? "unknown";
2699
- console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
2700
- const expDocPath = path6.join(
2701
- root,
2702
- "docs",
2703
- "experiments",
2704
- `${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
2705
- );
2706
- const templatePath = path6.join(root, "docs", "experiments", "_TEMPLATE.md");
2707
- const template = fs6.existsSync(templatePath) ? fs6.readFileSync(templatePath, "utf-8") : "";
2708
- const currentDoc = fs6.existsSync(expDocPath) ? fs6.readFileSync(expDocPath, "utf-8") : "";
2709
- const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
2710
-
2711
- Here is the partial agent output (reasoning + tool calls):
2712
- <partial_output>
2713
- ${partialOutput.slice(-3e3)}
2714
- </partial_output>
2715
-
2716
- Here is the current experiment doc:
2717
- <current_doc>
2718
- ${currentDoc}
2719
- </current_doc>
2720
-
2721
- Here is the template that the experiment doc should follow:
2722
- <template>
2723
- ${template}
2724
- </template>
2725
-
2726
- Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
2727
- - Keep any valid content from the current doc
2728
- - Fill in what you can infer from the partial output
2729
- - Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
2730
- - The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
2731
- - Do NOT include agent reasoning or thinking \u2014 only structured experiment content
2732
- - Be concise. This is cleanup, not new work.`;
2733
- const { text: _markdown } = await runQuery({
2734
- prompt,
2735
- model: "haiku",
2736
- tools: ["Read", "Write"],
2737
- systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
2738
- cwd: root,
2739
- maxTurns: 5,
2740
- label: "recovery"
2741
- });
2742
- console.log(`[recovery] Cleanup complete for ${expSlug}.`);
2743
- }
2744
- async function runQuery(opts) {
2745
- let truncated = false;
2746
- const tag = opts.label ?? "majlis";
2747
- const conversation = (0, import_claude_agent_sdk2.query)({
2748
- prompt: opts.prompt,
2749
- options: {
2750
- model: opts.model,
2751
- tools: opts.tools,
2752
- systemPrompt: {
2753
- type: "preset",
2754
- preset: "claude_code",
2755
- append: opts.systemPrompt
2756
- },
2757
- cwd: opts.cwd,
2758
- permissionMode: "bypassPermissions",
2759
- allowDangerouslySkipPermissions: true,
2760
- maxTurns: opts.maxTurns ?? 15,
2761
- persistSession: false,
2762
- settingSources: ["project"]
2763
- }
2764
- });
2765
- const textParts = [];
2766
- let costUsd = 0;
2767
- let turnCount = 0;
2768
- for await (const message of conversation) {
2769
- if (message.type === "assistant") {
2770
- turnCount++;
2771
- let hasText = false;
2772
- for (const block of message.message.content) {
2773
- if (block.type === "text") {
2774
- textParts.push(block.text);
2775
- hasText = true;
2776
- } else if (block.type === "tool_use") {
2777
- const toolName = block.name ?? "tool";
2778
- const input = block.input ?? {};
2779
- const detail = formatToolDetail(toolName, input);
2780
- process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
2781
- `);
2782
- }
2783
- }
2784
- if (hasText) {
2785
- const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
2786
- if (preview) {
2787
- process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
2788
- `);
2789
- }
2790
- }
2791
- } else if (message.type === "tool_progress") {
2792
- const elapsed = Math.round(message.elapsed_time_seconds);
2793
- if (elapsed > 0 && elapsed % 5 === 0) {
2794
- process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
2795
- `);
2796
- }
2797
- } else if (message.type === "result") {
2798
- if (message.subtype === "success") {
2799
- costUsd = message.total_cost_usd;
2800
- } else if (message.subtype === "error_max_turns") {
2801
- truncated = true;
2802
- costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
2803
- console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
2804
- } else {
2805
- const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
2806
- throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
2807
- }
2808
- }
3244
+ if (status2 === "gated" /* GATED */) {
3245
+ return valid.includes("building" /* BUILDING */) ? "building" /* BUILDING */ : valid[0];
2809
3246
  }
2810
- return { text: textParts.join("\n\n"), costUsd, truncated };
2811
- }
2812
- function formatToolDetail(toolName, input) {
2813
- switch (toolName) {
2814
- case "Read":
2815
- return input.file_path ? ` ${input.file_path}` : "";
2816
- case "Write":
2817
- return input.file_path ? ` \u2192 ${input.file_path}` : "";
2818
- case "Edit":
2819
- return input.file_path ? ` ${input.file_path}` : "";
2820
- case "Glob":
2821
- return input.pattern ? ` ${input.pattern}` : "";
2822
- case "Grep":
2823
- return input.pattern ? ` /${input.pattern}/` : "";
2824
- case "Bash":
2825
- return input.command ? ` $ ${input.command.slice(0, 80)}` : "";
2826
- case "WebSearch":
2827
- return input.query ? ` "${input.query}"` : "";
2828
- default:
2829
- return "";
3247
+ if (status2 === "built" /* BUILT */ && !hasDoubts2) {
3248
+ return valid.includes("doubted" /* DOUBTED */) ? "doubted" /* DOUBTED */ : valid[0];
3249
+ }
3250
+ if (status2 === "doubted" /* DOUBTED */ && !hasChallenges2) {
3251
+ return valid.includes("challenged" /* CHALLENGED */) ? "challenged" /* CHALLENGED */ : valid[0];
3252
+ }
3253
+ if (status2 === "doubted" /* DOUBTED */ || status2 === "challenged" /* CHALLENGED */) {
3254
+ if (valid.includes("verifying" /* VERIFYING */)) {
3255
+ return "verifying" /* VERIFYING */;
3256
+ }
2830
3257
  }
2831
- }
2832
- function writeArtifact(role, context, markdown, projectRoot) {
2833
- const dirMap = {
2834
- builder: "docs/experiments",
2835
- critic: "docs/doubts",
2836
- adversary: "docs/challenges",
2837
- verifier: "docs/verification",
2838
- reframer: "docs/reframes",
2839
- compressor: "docs/synthesis",
2840
- scout: "docs/rihla"
2841
- };
2842
- const dir = dirMap[role];
2843
- if (!dir) return null;
2844
- if (role === "builder" || role === "compressor") return null;
2845
- const fullDir = path6.join(projectRoot, dir);
2846
- if (!fs6.existsSync(fullDir)) {
2847
- fs6.mkdirSync(fullDir, { recursive: true });
3258
+ if (status2 === "compressed" /* COMPRESSED */) {
3259
+ return valid.includes("merged" /* MERGED */) ? "merged" /* MERGED */ : valid[0];
2848
3260
  }
2849
- const expSlug = context.experiment?.slug ?? "general";
2850
- const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
2851
- const filename = `${nextNum}-${role}-${expSlug}.md`;
2852
- const target = path6.join(fullDir, filename);
2853
- fs6.writeFileSync(target, markdown);
2854
- return target;
3261
+ return valid[0];
2855
3262
  }
2856
- var fs6, path6, import_claude_agent_sdk2, ROLE_MAX_TURNS, DIM2, RESET2, CYAN2;
2857
- var init_spawn = __esm({
2858
- "src/agents/spawn.ts"() {
3263
+ var init_machine = __esm({
3264
+ "src/state/machine.ts"() {
2859
3265
  "use strict";
2860
- fs6 = __toESM(require("fs"));
2861
- path6 = __toESM(require("path"));
2862
- import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
2863
- init_parse();
2864
- init_connection();
2865
- ROLE_MAX_TURNS = {
2866
- builder: 50,
2867
- critic: 30,
2868
- adversary: 30,
2869
- verifier: 50,
2870
- compressor: 30,
2871
- reframer: 20,
2872
- scout: 20,
2873
- gatekeeper: 10
2874
- };
2875
- DIM2 = "\x1B[2m";
2876
- RESET2 = "\x1B[0m";
2877
- CYAN2 = "\x1B[36m";
3266
+ init_types2();
2878
3267
  }
2879
3268
  });
2880
3269
 
@@ -2966,6 +3355,84 @@ async function resolve(db, exp, projectRoot) {
2966
3355
  }
2967
3356
  }
2968
3357
  }
3358
+ async function resolveDbOnly(db, exp, projectRoot) {
3359
+ let grades = getVerificationsByExperiment(db, exp.id);
3360
+ if (grades.length === 0) {
3361
+ warn(`No verification records for ${exp.slug}. Defaulting to weak.`);
3362
+ insertVerification(
3363
+ db,
3364
+ exp.id,
3365
+ "auto-default",
3366
+ "weak",
3367
+ null,
3368
+ null,
3369
+ "No structured verification output. Auto-defaulted to weak."
3370
+ );
3371
+ grades = getVerificationsByExperiment(db, exp.id);
3372
+ }
3373
+ const overallGrade = worstGrade(grades);
3374
+ switch (overallGrade) {
3375
+ case "sound":
3376
+ updateExperimentStatus(db, exp.id, "merged");
3377
+ success(`Experiment ${exp.slug} RESOLVED (sound) \u2014 git merge deferred.`);
3378
+ break;
3379
+ case "good": {
3380
+ const gaps = grades.filter((g) => g.grade === "good").map((g) => `- **${g.component}**: ${g.notes ?? "minor gaps"}`).join("\n");
3381
+ appendToFragilityMap(projectRoot, exp.slug, gaps);
3382
+ updateExperimentStatus(db, exp.id, "merged");
3383
+ success(`Experiment ${exp.slug} RESOLVED (good) \u2014 git merge deferred.`);
3384
+ break;
3385
+ }
3386
+ case "weak": {
3387
+ const confirmedDoubts = getConfirmedDoubts(db, exp.id);
3388
+ const guidance = await spawnSynthesiser({
3389
+ experiment: {
3390
+ id: exp.id,
3391
+ slug: exp.slug,
3392
+ hypothesis: exp.hypothesis,
3393
+ status: exp.status,
3394
+ sub_type: exp.sub_type,
3395
+ builder_guidance: exp.builder_guidance
3396
+ },
3397
+ verificationReport: grades,
3398
+ confirmedDoubts,
3399
+ taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
3400
+ }, projectRoot);
3401
+ const guidanceText = guidance.structured?.guidance ?? guidance.output;
3402
+ db.transaction(() => {
3403
+ storeBuilderGuidance(db, exp.id, guidanceText);
3404
+ updateExperimentStatus(db, exp.id, "building");
3405
+ if (exp.sub_type) {
3406
+ incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
3407
+ }
3408
+ })();
3409
+ warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance generated.`);
3410
+ break;
3411
+ }
3412
+ case "rejected": {
3413
+ const rejectedComponents = grades.filter((g) => g.grade === "rejected");
3414
+ const whyFailed = rejectedComponents.map((r) => r.notes ?? "rejected").join("; ");
3415
+ db.transaction(() => {
3416
+ insertDeadEnd(
3417
+ db,
3418
+ exp.id,
3419
+ exp.hypothesis ?? exp.slug,
3420
+ whyFailed,
3421
+ `Approach rejected: ${whyFailed}`,
3422
+ exp.sub_type,
3423
+ "structural"
3424
+ );
3425
+ updateExperimentStatus(db, exp.id, "dead_end");
3426
+ if (exp.sub_type) {
3427
+ incrementSubTypeFailure(db, exp.sub_type, exp.id, "rejected");
3428
+ }
3429
+ })();
3430
+ info(`Experiment ${exp.slug} DEAD-ENDED (rejected). Constraint recorded.`);
3431
+ break;
3432
+ }
3433
+ }
3434
+ return overallGrade;
3435
+ }
2969
3436
  function gitMerge(branch, cwd) {
2970
3437
  try {
2971
3438
  (0, import_node_child_process3.execSync)(`git merge ${branch} --no-ff -m "Merge experiment branch ${branch}"`, {
@@ -3016,7 +3483,7 @@ var init_resolve = __esm({
3016
3483
  "use strict";
3017
3484
  fs7 = __toESM(require("fs"));
3018
3485
  path7 = __toESM(require("path"));
3019
- init_types();
3486
+ init_types2();
3020
3487
  init_queries();
3021
3488
  init_spawn();
3022
3489
  import_node_child_process3 = require("child_process");
@@ -3028,7 +3495,9 @@ var init_resolve = __esm({
3028
3495
  var cycle_exports = {};
3029
3496
  __export(cycle_exports, {
3030
3497
  cycle: () => cycle,
3031
- resolveCmd: () => resolveCmd
3498
+ resolveCmd: () => resolveCmd,
3499
+ runResolve: () => runResolve,
3500
+ runStep: () => runStep
3032
3501
  });
3033
3502
  async function cycle(step, args) {
3034
3503
  const root = findProjectRoot();
@@ -3059,7 +3528,28 @@ async function resolveCmd(args) {
3059
3528
  const exp = resolveExperimentArg(db, args);
3060
3529
  transition(exp.status, "resolved" /* RESOLVED */);
3061
3530
  await resolve(db, exp, root);
3062
- updateExperimentStatus(db, exp.id, "resolved");
3531
+ }
3532
+ async function runStep(step, db, exp, root) {
3533
+ switch (step) {
3534
+ case "build":
3535
+ return doBuild(db, exp, root);
3536
+ case "challenge":
3537
+ return doChallenge(db, exp, root);
3538
+ case "doubt":
3539
+ return doDoubt(db, exp, root);
3540
+ case "scout":
3541
+ return doScout(db, exp, root);
3542
+ case "verify":
3543
+ return doVerify(db, exp, root);
3544
+ case "gate":
3545
+ return doGate(db, exp, root);
3546
+ case "compress":
3547
+ return doCompress(db, root);
3548
+ }
3549
+ }
3550
+ async function runResolve(db, exp, root) {
3551
+ transition(exp.status, "resolved" /* RESOLVED */);
3552
+ await resolve(db, exp, root);
3063
3553
  }
3064
3554
  async function doGate(db, exp, root) {
3065
3555
  transition(exp.status, "gated" /* GATED */);
@@ -3494,470 +3984,1155 @@ function ingestStructuredOutput(db, experimentId, structured) {
3494
3984
  for (const f of structured.findings) {
3495
3985
  insertFinding(db, experimentId, f.approach, f.source, f.relevance, f.contradicts_current);
3496
3986
  }
3497
- info(`Ingested ${structured.findings.length} finding(s)`);
3987
+ info(`Ingested ${structured.findings.length} finding(s)`);
3988
+ }
3989
+ }
3990
+ var fs8, path8, import_node_child_process4;
3991
+ var init_cycle = __esm({
3992
+ "src/commands/cycle.ts"() {
3993
+ "use strict";
3994
+ fs8 = __toESM(require("fs"));
3995
+ path8 = __toESM(require("path"));
3996
+ import_node_child_process4 = require("child_process");
3997
+ init_connection();
3998
+ init_queries();
3999
+ init_machine();
4000
+ init_types2();
4001
+ init_spawn();
4002
+ init_resolve();
4003
+ init_config();
4004
+ init_metrics();
4005
+ init_format();
4006
+ }
4007
+ });
4008
+
4009
+ // src/commands/classify.ts
4010
+ var classify_exports = {};
4011
+ __export(classify_exports, {
4012
+ classify: () => classify,
4013
+ reframe: () => reframe
4014
+ });
4015
+ async function classify(args) {
4016
+ const root = findProjectRoot();
4017
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
4018
+ const domain = args.filter((a) => !a.startsWith("--")).join(" ");
4019
+ if (!domain) {
4020
+ throw new Error('Usage: majlis classify "domain description"');
4021
+ }
4022
+ const synthesisPath = path9.join(root, "docs", "synthesis", "current.md");
4023
+ const synthesis = fs9.existsSync(synthesisPath) ? fs9.readFileSync(synthesisPath, "utf-8") : "";
4024
+ const deadEndsPath = path9.join(root, "docs", "synthesis", "dead-ends.md");
4025
+ const deadEnds = fs9.existsSync(deadEndsPath) ? fs9.readFileSync(deadEndsPath, "utf-8") : "";
4026
+ info(`Classifying problem domain: ${domain}`);
4027
+ const result = await spawnAgent("builder", {
4028
+ synthesis,
4029
+ taskPrompt: `Classify the following problem domain into canonical sub-types (Al-Khwarizmi method). For each sub-type: describe it, identify its canonical form, and list known constraints.
4030
+
4031
+ Domain: ${domain}
4032
+
4033
+ Dead-ends for context:
4034
+ ${deadEnds}
4035
+
4036
+ Write the classification to docs/classification/ following the template.`
4037
+ }, root);
4038
+ success("Classification complete. Check docs/classification/ for the output.");
4039
+ }
4040
+ async function reframe(args) {
4041
+ const root = findProjectRoot();
4042
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
4043
+ const classificationDir = path9.join(root, "docs", "classification");
4044
+ let classificationContent = "";
4045
+ if (fs9.existsSync(classificationDir)) {
4046
+ const files = fs9.readdirSync(classificationDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
4047
+ for (const f of files) {
4048
+ classificationContent += fs9.readFileSync(path9.join(classificationDir, f), "utf-8") + "\n\n";
4049
+ }
4050
+ }
4051
+ const synthesisPath = path9.join(root, "docs", "synthesis", "current.md");
4052
+ const synthesis = fs9.existsSync(synthesisPath) ? fs9.readFileSync(synthesisPath, "utf-8") : "";
4053
+ const deadEndsPath = path9.join(root, "docs", "synthesis", "dead-ends.md");
4054
+ const deadEnds = fs9.existsSync(deadEndsPath) ? fs9.readFileSync(deadEndsPath, "utf-8") : "";
4055
+ const configPath = path9.join(root, ".majlis", "config.json");
4056
+ let problemStatement = "";
4057
+ if (fs9.existsSync(configPath)) {
4058
+ const config = JSON.parse(fs9.readFileSync(configPath, "utf-8"));
4059
+ problemStatement = `${config.project?.description ?? ""}
4060
+ Objective: ${config.project?.objective ?? ""}`;
4061
+ }
4062
+ const target = args.filter((a) => !a.startsWith("--")).join(" ") || "current classification";
4063
+ info(`Reframing: ${target}`);
4064
+ const result = await spawnAgent("reframer", {
4065
+ synthesis,
4066
+ taskPrompt: `You are the Reframer. You receive ONLY the problem statement and classification \u2014 NOT builder code.
4067
+
4068
+ Problem Statement:
4069
+ ${problemStatement}
4070
+
4071
+ Current Classification:
4072
+ ${classificationContent}
4073
+
4074
+ Dead-End Registry:
4075
+ ${deadEnds}
4076
+
4077
+ Independently propose a decomposition. Compare with the existing classification. Flag structural divergences \u2014 these are the most valuable signals.
4078
+ Write to docs/reframes/.`
4079
+ }, root);
4080
+ success("Reframe complete. Check docs/reframes/ for the output.");
4081
+ }
4082
+ var fs9, path9;
4083
+ var init_classify = __esm({
4084
+ "src/commands/classify.ts"() {
4085
+ "use strict";
4086
+ fs9 = __toESM(require("fs"));
4087
+ path9 = __toESM(require("path"));
4088
+ init_connection();
4089
+ init_spawn();
4090
+ init_format();
4091
+ }
4092
+ });
4093
+
4094
+ // src/commands/audit.ts
4095
+ var audit_exports = {};
4096
+ __export(audit_exports, {
4097
+ audit: () => audit
4098
+ });
4099
+ async function audit(args) {
4100
+ const root = findProjectRoot();
4101
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
4102
+ const db = getDb(root);
4103
+ const objective = args.filter((a) => !a.startsWith("--")).join(" ");
4104
+ const config = loadConfig(root);
4105
+ const experiments = listAllExperiments(db);
4106
+ const deadEnds = listAllDeadEnds(db);
4107
+ const circuitBreakers = getAllCircuitBreakerStates(db, config.cycle.circuit_breaker_threshold);
4108
+ const classificationDir = path10.join(root, "docs", "classification");
4109
+ let classification = "";
4110
+ if (fs10.existsSync(classificationDir)) {
4111
+ const files = fs10.readdirSync(classificationDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
4112
+ for (const f of files) {
4113
+ classification += fs10.readFileSync(path10.join(classificationDir, f), "utf-8") + "\n\n";
4114
+ }
3498
4115
  }
4116
+ const synthesis = readFileOrEmpty(path10.join(root, "docs", "synthesis", "current.md"));
4117
+ header("Maqasid Check \u2014 Purpose Audit");
4118
+ const trippedBreakers = circuitBreakers.filter((cb) => cb.tripped);
4119
+ if (trippedBreakers.length > 0) {
4120
+ warn(`Circuit breaker(s) tripped: ${trippedBreakers.map((cb) => cb.sub_type).join(", ")}`);
4121
+ }
4122
+ const auditPrompt = `You are performing a Maqasid Check (purpose audit).
4123
+
4124
+ ORIGINAL OBJECTIVE: ${objective || config.project?.objective || "Not specified"}
4125
+
4126
+ CURRENT CLASSIFICATION:
4127
+ ${classification}
4128
+
4129
+ PROJECT SYNTHESIS:
4130
+ ${synthesis}
4131
+
4132
+ DEAD-ENDS (${deadEnds.length} total):
4133
+ ${deadEnds.map(
4134
+ (d) => `- ${d.approach}: ${d.structural_constraint}`
4135
+ ).join("\n")}
4136
+
4137
+ EXPERIMENT HISTORY (${experiments.length} total):
4138
+ ${experiments.map(
4139
+ (e) => `- #${e.id} ${e.slug}: ${e.status} (sub-type: ${e.sub_type ?? "none"})`
4140
+ ).join("\n")}
4141
+
4142
+ TRIPPED CIRCUIT BREAKERS:
4143
+ ${trippedBreakers.map(
4144
+ (cb) => `- ${cb.sub_type}: ${cb.failure_count} failures`
4145
+ ).join("\n") || "None"}
4146
+
4147
+ Answer these questions:
4148
+ 1. What is the actual objective? Trace back from current experiments to the root goal.
4149
+ 2. Is the current classification serving that objective? Or has the taxonomy become self-referential?
4150
+ 3. What would we do differently if we started from scratch with what we now know?
4151
+ 4. Is there a simpler formulation? If the classification has grown complex, something may be wrong.
4152
+
4153
+ Output: either "classification confirmed \u2014 continue" or "re-classify from X" with a specific proposal.`;
4154
+ const result = await spawnAgent("builder", {
4155
+ synthesis,
4156
+ taskPrompt: auditPrompt
4157
+ }, root);
4158
+ success("Purpose audit complete. Review the output above.");
3499
4159
  }
3500
- var fs8, path8, import_node_child_process4;
3501
- var init_cycle = __esm({
3502
- "src/commands/cycle.ts"() {
4160
+ var fs10, path10;
4161
+ var init_audit = __esm({
4162
+ "src/commands/audit.ts"() {
3503
4163
  "use strict";
3504
- fs8 = __toESM(require("fs"));
3505
- path8 = __toESM(require("path"));
3506
- import_node_child_process4 = require("child_process");
4164
+ fs10 = __toESM(require("fs"));
4165
+ path10 = __toESM(require("path"));
3507
4166
  init_connection();
3508
4167
  init_queries();
3509
- init_machine();
3510
- init_types();
3511
4168
  init_spawn();
3512
- init_resolve();
3513
4169
  init_config();
3514
- init_metrics();
3515
4170
  init_format();
3516
4171
  }
3517
4172
  });
3518
4173
 
3519
- // src/commands/classify.ts
3520
- var classify_exports = {};
3521
- __export(classify_exports, {
3522
- classify: () => classify,
3523
- reframe: () => reframe
4174
+ // src/commands/next.ts
4175
+ var next_exports = {};
4176
+ __export(next_exports, {
4177
+ next: () => next
3524
4178
  });
3525
- async function classify(args) {
4179
+ async function next(args, isJson) {
3526
4180
  const root = findProjectRoot();
3527
4181
  if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
3528
- const domain = args.filter((a) => !a.startsWith("--")).join(" ");
3529
- if (!domain) {
3530
- throw new Error('Usage: majlis classify "domain description"');
4182
+ const db = getDb(root);
4183
+ const config = loadConfig(root);
4184
+ const slugArg = args.filter((a) => !a.startsWith("--"))[0];
4185
+ let exp;
4186
+ if (slugArg) {
4187
+ const found = getExperimentBySlug(db, slugArg);
4188
+ if (!found) throw new Error(`Experiment not found: ${slugArg}`);
4189
+ exp = found;
4190
+ } else {
4191
+ const found = getLatestExperiment(db);
4192
+ if (!found) throw new Error('No active experiments. Run `majlis new "hypothesis"` first.');
4193
+ exp = found;
4194
+ }
4195
+ const auto = args.includes("--auto");
4196
+ if (auto) {
4197
+ await runAutoLoop(db, exp, config, root, isJson);
4198
+ } else {
4199
+ await runNextStep(db, exp, config, root, isJson);
3531
4200
  }
3532
- const synthesisPath = path9.join(root, "docs", "synthesis", "current.md");
3533
- const synthesis = fs9.existsSync(synthesisPath) ? fs9.readFileSync(synthesisPath, "utf-8") : "";
3534
- const deadEndsPath = path9.join(root, "docs", "synthesis", "dead-ends.md");
3535
- const deadEnds = fs9.existsSync(deadEndsPath) ? fs9.readFileSync(deadEndsPath, "utf-8") : "";
3536
- info(`Classifying problem domain: ${domain}`);
3537
- const result = await spawnAgent("builder", {
3538
- synthesis,
3539
- taskPrompt: `Classify the following problem domain into canonical sub-types (Al-Khwarizmi method). For each sub-type: describe it, identify its canonical form, and list known constraints.
3540
-
3541
- Domain: ${domain}
3542
-
3543
- Dead-ends for context:
3544
- ${deadEnds}
3545
-
3546
- Write the classification to docs/classification/ following the template.`
3547
- }, root);
3548
- success("Classification complete. Check docs/classification/ for the output.");
3549
4201
  }
3550
- async function reframe(args) {
3551
- const root = findProjectRoot();
3552
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
3553
- const classificationDir = path9.join(root, "docs", "classification");
3554
- let classificationContent = "";
3555
- if (fs9.existsSync(classificationDir)) {
3556
- const files = fs9.readdirSync(classificationDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
3557
- for (const f of files) {
3558
- classificationContent += fs9.readFileSync(path9.join(classificationDir, f), "utf-8") + "\n\n";
4202
+ async function runNextStep(db, exp, config, root, isJson) {
4203
+ const currentStatus = exp.status;
4204
+ const valid = validNext(currentStatus);
4205
+ if (valid.length === 0) {
4206
+ if (isJson) {
4207
+ console.log(JSON.stringify({ experiment: exp.slug, status: exp.status, terminal: true }));
4208
+ } else {
4209
+ info(`Experiment ${exp.slug} is terminal (${exp.status}).`);
3559
4210
  }
4211
+ return;
3560
4212
  }
3561
- const synthesisPath = path9.join(root, "docs", "synthesis", "current.md");
3562
- const synthesis = fs9.existsSync(synthesisPath) ? fs9.readFileSync(synthesisPath, "utf-8") : "";
3563
- const deadEndsPath = path9.join(root, "docs", "synthesis", "dead-ends.md");
3564
- const deadEnds = fs9.existsSync(deadEndsPath) ? fs9.readFileSync(deadEndsPath, "utf-8") : "";
3565
- const configPath = path9.join(root, ".majlis", "config.json");
3566
- let problemStatement = "";
3567
- if (fs9.existsSync(configPath)) {
3568
- const config = JSON.parse(fs9.readFileSync(configPath, "utf-8"));
3569
- problemStatement = `${config.project?.description ?? ""}
3570
- Objective: ${config.project?.objective ?? ""}`;
4213
+ if (exp.sub_type && checkCircuitBreaker(db, exp.sub_type, config.cycle.circuit_breaker_threshold)) {
4214
+ warn(`Circuit breaker: ${exp.sub_type} has ${config.cycle.circuit_breaker_threshold}+ failures.`);
4215
+ insertDeadEnd(
4216
+ db,
4217
+ exp.id,
4218
+ exp.hypothesis ?? exp.slug,
4219
+ `Circuit breaker tripped for ${exp.sub_type}`,
4220
+ `Sub-type ${exp.sub_type} exceeded ${config.cycle.circuit_breaker_threshold} failures`,
4221
+ exp.sub_type,
4222
+ "procedural"
4223
+ );
4224
+ updateExperimentStatus(db, exp.id, "dead_end");
4225
+ warn("Experiment dead-ended. Triggering Maqasid Check (purpose audit).");
4226
+ await audit([config.project?.objective ?? ""]);
4227
+ return;
4228
+ }
4229
+ const sessionsSinceCompression = getSessionsSinceCompression(db);
4230
+ if (sessionsSinceCompression >= config.cycle.compression_interval) {
4231
+ warn(
4232
+ `${sessionsSinceCompression} sessions since last compression. Consider running: majlis compress`
4233
+ );
4234
+ }
4235
+ const expHasDoubts = hasDoubts(db, exp.id);
4236
+ const expHasChallenges = hasChallenges(db, exp.id);
4237
+ const nextStep = determineNextStep(exp, valid, expHasDoubts, expHasChallenges);
4238
+ if (isJson) {
4239
+ console.log(JSON.stringify({
4240
+ experiment: exp.slug,
4241
+ current_status: exp.status,
4242
+ next_step: nextStep,
4243
+ valid_transitions: valid
4244
+ }));
4245
+ return;
4246
+ }
4247
+ info(`${exp.slug}: ${exp.status} \u2192 ${nextStep}`);
4248
+ await executeStep(nextStep, exp, root);
4249
+ }
4250
+ async function runAutoLoop(db, exp, config, root, isJson) {
4251
+ const MAX_ITERATIONS = 20;
4252
+ let iteration = 0;
4253
+ header(`Auto mode \u2014 ${exp.slug}`);
4254
+ while (iteration < MAX_ITERATIONS) {
4255
+ iteration++;
4256
+ const freshExp = getExperimentBySlug(db, exp.slug);
4257
+ if (!freshExp) break;
4258
+ exp = freshExp;
4259
+ if (isTerminal(exp.status)) {
4260
+ success(`Experiment ${exp.slug} reached terminal state: ${exp.status}`);
4261
+ break;
4262
+ }
4263
+ if (exp.sub_type && checkCircuitBreaker(db, exp.sub_type, config.cycle.circuit_breaker_threshold)) {
4264
+ warn(`Circuit breaker tripped for ${exp.sub_type}. Stopping auto mode.`);
4265
+ insertDeadEnd(
4266
+ db,
4267
+ exp.id,
4268
+ exp.hypothesis ?? exp.slug,
4269
+ `Circuit breaker tripped for ${exp.sub_type}`,
4270
+ `Sub-type ${exp.sub_type} exceeded ${config.cycle.circuit_breaker_threshold} failures`,
4271
+ exp.sub_type,
4272
+ "procedural"
4273
+ );
4274
+ updateExperimentStatus(db, exp.id, "dead_end");
4275
+ await audit([config.project?.objective ?? ""]);
4276
+ break;
4277
+ }
4278
+ const valid = validNext(exp.status);
4279
+ if (valid.length === 0) break;
4280
+ const expHasDoubts = hasDoubts(db, exp.id);
4281
+ const expHasChallenges = hasChallenges(db, exp.id);
4282
+ const nextStep = determineNextStep(exp, valid, expHasDoubts, expHasChallenges);
4283
+ info(`[${iteration}/${MAX_ITERATIONS}] ${exp.slug}: ${exp.status} \u2192 ${nextStep}`);
4284
+ await executeStep(nextStep, exp, root);
4285
+ }
4286
+ if (iteration >= MAX_ITERATIONS) {
4287
+ warn(`Reached maximum iterations (${MAX_ITERATIONS}). Stopping auto mode.`);
4288
+ }
4289
+ }
4290
+ async function executeStep(step, exp, root) {
4291
+ const expArgs = [exp.slug];
4292
+ switch (step) {
4293
+ case "building" /* BUILDING */:
4294
+ await cycle("build", expArgs);
4295
+ break;
4296
+ case "challenged" /* CHALLENGED */:
4297
+ await cycle("challenge", expArgs);
4298
+ break;
4299
+ case "doubted" /* DOUBTED */:
4300
+ await cycle("doubt", expArgs);
4301
+ break;
4302
+ case "scouted" /* SCOUTED */:
4303
+ await cycle("scout", expArgs);
4304
+ break;
4305
+ case "verifying" /* VERIFYING */:
4306
+ await cycle("verify", expArgs);
4307
+ break;
4308
+ case "resolved" /* RESOLVED */:
4309
+ await resolveCmd(expArgs);
4310
+ break;
4311
+ case "compressed" /* COMPRESSED */:
4312
+ await cycle("compress", []);
4313
+ updateExperimentStatus(getDb(root), exp.id, "compressed");
4314
+ info(`Experiment ${exp.slug} compressed.`);
4315
+ break;
4316
+ case "gated" /* GATED */:
4317
+ await cycle("gate", expArgs);
4318
+ break;
4319
+ case "reframed" /* REFRAMED */:
4320
+ updateExperimentStatus(getDb(root), exp.id, "reframed");
4321
+ info(`Reframe acknowledged for ${exp.slug}. Proceeding to gate.`);
4322
+ break;
4323
+ case "merged" /* MERGED */:
4324
+ updateExperimentStatus(getDb(root), exp.id, "merged");
4325
+ success(`Experiment ${exp.slug} merged.`);
4326
+ break;
4327
+ case "dead_end" /* DEAD_END */:
4328
+ info(`Experiment ${exp.slug} is dead-ended. No further action.`);
4329
+ break;
4330
+ default:
4331
+ warn(`Don't know how to execute step: ${step}`);
3571
4332
  }
3572
- const target = args.filter((a) => !a.startsWith("--")).join(" ") || "current classification";
3573
- info(`Reframing: ${target}`);
3574
- const result = await spawnAgent("reframer", {
3575
- synthesis,
3576
- taskPrompt: `You are the Reframer. You receive ONLY the problem statement and classification \u2014 NOT builder code.
3577
-
3578
- Problem Statement:
3579
- ${problemStatement}
3580
-
3581
- Current Classification:
3582
- ${classificationContent}
3583
-
3584
- Dead-End Registry:
3585
- ${deadEnds}
3586
-
3587
- Independently propose a decomposition. Compare with the existing classification. Flag structural divergences \u2014 these are the most valuable signals.
3588
- Write to docs/reframes/.`
3589
- }, root);
3590
- success("Reframe complete. Check docs/reframes/ for the output.");
3591
4333
  }
3592
- var fs9, path9;
3593
- var init_classify = __esm({
3594
- "src/commands/classify.ts"() {
4334
+ var init_next = __esm({
4335
+ "src/commands/next.ts"() {
3595
4336
  "use strict";
3596
- fs9 = __toESM(require("fs"));
3597
- path9 = __toESM(require("path"));
3598
4337
  init_connection();
3599
- init_spawn();
4338
+ init_queries();
4339
+ init_machine();
4340
+ init_types2();
4341
+ init_queries();
4342
+ init_config();
4343
+ init_cycle();
4344
+ init_audit();
3600
4345
  init_format();
3601
4346
  }
3602
4347
  });
3603
4348
 
3604
- // src/commands/audit.ts
3605
- var audit_exports = {};
3606
- __export(audit_exports, {
3607
- audit: () => audit
4349
+ // src/commands/run.ts
4350
+ var run_exports = {};
4351
+ __export(run_exports, {
4352
+ run: () => run
3608
4353
  });
3609
- async function audit(args) {
4354
+ async function run(args) {
3610
4355
  const root = findProjectRoot();
3611
4356
  if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
4357
+ const goal = args.filter((a) => !a.startsWith("--")).join(" ");
4358
+ if (!goal) {
4359
+ throw new Error('Usage: majlis run "goal description"');
4360
+ }
3612
4361
  const db = getDb(root);
3613
- const objective = args.filter((a) => !a.startsWith("--")).join(" ");
3614
4362
  const config = loadConfig(root);
3615
- const experiments = listAllExperiments(db);
3616
- const deadEnds = listAllDeadEnds(db);
3617
- const circuitBreakers = getAllCircuitBreakerStates(db, config.cycle.circuit_breaker_threshold);
3618
- const classificationDir = path10.join(root, "docs", "classification");
3619
- let classification = "";
3620
- if (fs10.existsSync(classificationDir)) {
3621
- const files = fs10.readdirSync(classificationDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
3622
- for (const f of files) {
3623
- classification += fs10.readFileSync(path10.join(classificationDir, f), "utf-8") + "\n\n";
4363
+ const MAX_EXPERIMENTS = 10;
4364
+ const MAX_STEPS2 = 200;
4365
+ let experimentCount = 0;
4366
+ let stepCount = 0;
4367
+ let consecutiveFailures = 0;
4368
+ const usedHypotheses = /* @__PURE__ */ new Set();
4369
+ header(`Autonomous Mode \u2014 ${goal}`);
4370
+ while (stepCount < MAX_STEPS2 && experimentCount < MAX_EXPERIMENTS) {
4371
+ if (isShutdownRequested()) {
4372
+ warn("Shutdown requested. Stopping autonomous mode.");
4373
+ break;
4374
+ }
4375
+ stepCount++;
4376
+ let exp = getLatestExperiment(db);
4377
+ if (!exp) {
4378
+ experimentCount++;
4379
+ if (experimentCount > MAX_EXPERIMENTS) {
4380
+ warn(`Reached max experiments (${MAX_EXPERIMENTS}). Stopping.`);
4381
+ break;
4382
+ }
4383
+ const sessionsSinceCompression = getSessionsSinceCompression(db);
4384
+ if (sessionsSinceCompression >= config.cycle.compression_interval) {
4385
+ info("Compressing before next experiment...");
4386
+ await cycle("compress", []);
4387
+ }
4388
+ info(`[Experiment ${experimentCount}/${MAX_EXPERIMENTS}] Planning next experiment...`);
4389
+ const hypothesis = await deriveNextHypothesis(goal, root, db);
4390
+ if (!hypothesis) {
4391
+ success("Planner says the goal has been met. Stopping.");
4392
+ break;
4393
+ }
4394
+ if (usedHypotheses.has(hypothesis)) {
4395
+ warn(`Planner returned duplicate hypothesis: "${hypothesis.slice(0, 80)}". Stopping.`);
4396
+ break;
4397
+ }
4398
+ usedHypotheses.add(hypothesis);
4399
+ info(`Next hypothesis: ${hypothesis}`);
4400
+ exp = await createNewExperiment(db, root, hypothesis);
4401
+ success(`Created experiment #${exp.id}: ${exp.slug}`);
4402
+ }
4403
+ if (isTerminal(exp.status)) {
4404
+ if (exp.status === "merged") {
4405
+ success(`Experiment ${exp.slug} merged.`);
4406
+ } else if (exp.status === "dead_end") {
4407
+ info(`Experiment ${exp.slug} dead-ended.`);
4408
+ }
4409
+ continue;
4410
+ }
4411
+ info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
4412
+ try {
4413
+ await next([exp.slug], false);
4414
+ consecutiveFailures = 0;
4415
+ } catch (err) {
4416
+ consecutiveFailures++;
4417
+ const message = err instanceof Error ? err.message : String(err);
4418
+ warn(`Step failed for ${exp.slug}: ${message}`);
4419
+ try {
4420
+ insertDeadEnd(
4421
+ db,
4422
+ exp.id,
4423
+ exp.hypothesis ?? exp.slug,
4424
+ message,
4425
+ `Process failure: ${message}`,
4426
+ exp.sub_type,
4427
+ "procedural"
4428
+ );
4429
+ updateExperimentStatus(db, exp.id, "dead_end");
4430
+ } catch (innerErr) {
4431
+ const innerMsg = innerErr instanceof Error ? innerErr.message : String(innerErr);
4432
+ warn(`Could not record dead-end: ${innerMsg}`);
4433
+ }
4434
+ if (consecutiveFailures >= 3) {
4435
+ warn(`${consecutiveFailures} consecutive failures. Stopping autonomous mode.`);
4436
+ break;
4437
+ }
3624
4438
  }
3625
4439
  }
3626
- const synthesis = readFileOrEmpty(path10.join(root, "docs", "synthesis", "current.md"));
3627
- header("Maqasid Check \u2014 Purpose Audit");
3628
- const trippedBreakers = circuitBreakers.filter((cb) => cb.tripped);
3629
- if (trippedBreakers.length > 0) {
3630
- warn(`Circuit breaker(s) tripped: ${trippedBreakers.map((cb) => cb.sub_type).join(", ")}`);
4440
+ if (stepCount >= MAX_STEPS2) {
4441
+ warn(`Reached max steps (${MAX_STEPS2}). Stopping autonomous mode.`);
3631
4442
  }
3632
- const auditPrompt = `You are performing a Maqasid Check (purpose audit).
4443
+ header("Autonomous Mode Complete");
4444
+ info(`Goal: ${goal}`);
4445
+ info(`Experiments: ${experimentCount}, Steps: ${stepCount}`);
4446
+ info("Run `majlis status` to see final state.");
4447
+ }
4448
+ async function deriveNextHypothesis(goal, root, db) {
4449
+ const synthesis = truncateContext(readFileOrEmpty(path11.join(root, "docs", "synthesis", "current.md")), CONTEXT_LIMITS.synthesis);
4450
+ const fragility = truncateContext(readFileOrEmpty(path11.join(root, "docs", "synthesis", "fragility.md")), CONTEXT_LIMITS.fragility);
4451
+ const deadEndsDoc = truncateContext(readFileOrEmpty(path11.join(root, "docs", "synthesis", "dead-ends.md")), CONTEXT_LIMITS.deadEnds);
4452
+ const deadEnds = listAllDeadEnds(db);
4453
+ const config = loadConfig(root);
4454
+ let metricsOutput = "";
4455
+ if (config.metrics?.command) {
4456
+ try {
4457
+ metricsOutput = (0, import_node_child_process5.execSync)(config.metrics.command, {
4458
+ cwd: root,
4459
+ encoding: "utf-8",
4460
+ timeout: 6e4,
4461
+ stdio: ["pipe", "pipe", "pipe"]
4462
+ }).trim();
4463
+ } catch {
4464
+ metricsOutput = "(metrics command failed)";
4465
+ }
4466
+ }
4467
+ const result = await spawnSynthesiser({
4468
+ taskPrompt: `You are the Planner for an autonomous Majlis run.
3633
4469
 
3634
- ORIGINAL OBJECTIVE: ${objective || config.project?.objective || "Not specified"}
4470
+ ## Goal
4471
+ ${goal}
3635
4472
 
3636
- CURRENT CLASSIFICATION:
3637
- ${classification}
4473
+ ## Current Metrics
4474
+ ${metricsOutput || "(no metrics configured)"}
3638
4475
 
3639
- PROJECT SYNTHESIS:
3640
- ${synthesis}
4476
+ ## Synthesis (what we know so far)
4477
+ ${synthesis || "(empty \u2014 first experiment)"}
3641
4478
 
3642
- DEAD-ENDS (${deadEnds.length} total):
3643
- ${deadEnds.map(
3644
- (d) => `- ${d.approach}: ${d.structural_constraint}`
3645
- ).join("\n")}
4479
+ ## Fragility Map (known weak areas)
4480
+ ${fragility || "(none)"}
3646
4481
 
3647
- EXPERIMENT HISTORY (${experiments.length} total):
3648
- ${experiments.map(
3649
- (e) => `- #${e.id} ${e.slug}: ${e.status} (sub-type: ${e.sub_type ?? "none"})`
3650
- ).join("\n")}
4482
+ ## Dead-End Registry
4483
+ ${deadEndsDoc || "(none)"}
3651
4484
 
3652
- TRIPPED CIRCUIT BREAKERS:
3653
- ${trippedBreakers.map(
3654
- (cb) => `- ${cb.sub_type}: ${cb.failure_count} failures`
3655
- ).join("\n") || "None"}
4485
+ ## Dead Ends (from DB \u2014 ${deadEnds.length} total)
4486
+ ${deadEnds.map((d) => `- [${d.category ?? "structural"}] ${d.approach}: ${d.why_failed} [constraint: ${d.structural_constraint}]`).join("\n") || "(none)"}
3656
4487
 
3657
- Answer these questions:
3658
- 1. What is the actual objective? Trace back from current experiments to the root goal.
3659
- 2. Is the current classification serving that objective? Or has the taxonomy become self-referential?
3660
- 3. What would we do differently if we started from scratch with what we now know?
3661
- 4. Is there a simpler formulation? If the classification has grown complex, something may be wrong.
4488
+ Note: [structural] dead ends are HARD CONSTRAINTS \u2014 your hypothesis MUST NOT repeat these approaches.
4489
+ [procedural] dead ends are process failures \u2014 the approach may still be valid if executed differently.
3662
4490
 
3663
- Output: either "classification confirmed \u2014 continue" or "re-classify from X" with a specific proposal.`;
3664
- const result = await spawnAgent("builder", {
3665
- synthesis,
3666
- taskPrompt: auditPrompt
4491
+ ## Your Task
4492
+ 1. Assess: based on the metrics and synthesis, has the goal been met? Be specific.
4493
+ 2. If YES \u2014 output the JSON block below with goal_met: true.
4494
+ 3. If NO \u2014 propose the SINGLE most promising next experiment hypothesis.
4495
+ - It must NOT repeat a dead-ended approach (check the dead-end registry!)
4496
+ - It should attack the weakest point revealed by synthesis/fragility
4497
+ - It must be specific and actionable \u2014 name the function or mechanism to change
4498
+ - Do NOT reference specific line numbers \u2014 they shift between experiments
4499
+ - The hypothesis should be a single sentence describing what to do, e.g.:
4500
+ "Activate addSeamEdges() in the runEdgeFirst pipeline for full-revolution cylinder faces"
4501
+
4502
+ CRITICAL: Your LAST line of output MUST be EXACTLY this format (on its own line, nothing after it):
4503
+ <!-- majlis-json {"goal_met": false, "hypothesis": "your single-sentence hypothesis here"} -->
4504
+
4505
+ If the goal is met:
4506
+ <!-- majlis-json {"goal_met": true, "hypothesis": null} -->`
3667
4507
  }, root);
3668
- success("Purpose audit complete. Review the output above.");
4508
+ const structured = result.structured;
4509
+ if (structured?.goal_met === true) {
4510
+ return null;
4511
+ }
4512
+ if (structured?.hypothesis) {
4513
+ return structured.hypothesis;
4514
+ }
4515
+ const jsonMatch = result.output.match(/"hypothesis"\s*:\s*"([^"]+)"/);
4516
+ if (jsonMatch && jsonMatch[1].length > 10) return jsonMatch[1].trim();
4517
+ const blockMatch = result.output.match(/<!--\s*majlis-json\s*(\{[\s\S]*?\})\s*-->/);
4518
+ if (blockMatch) {
4519
+ try {
4520
+ const parsed = JSON.parse(blockMatch[1]);
4521
+ if (parsed.goal_met === true) return null;
4522
+ if (parsed.hypothesis) return parsed.hypothesis;
4523
+ } catch {
4524
+ }
4525
+ }
4526
+ warn("Planner did not return structured output. Retrying with focused prompt...");
4527
+ const retry = await spawnSynthesiser({
4528
+ taskPrompt: `Based on this analysis, output ONLY a single-line JSON block:
4529
+
4530
+ ${result.output.slice(-2e3)}
4531
+
4532
+ <!-- majlis-json {"goal_met": false, "hypothesis": "your hypothesis"} -->`
4533
+ }, root);
4534
+ if (retry.structured?.hypothesis) return retry.structured.hypothesis;
4535
+ warn("Could not extract hypothesis. Using goal as fallback.");
4536
+ return goal;
4537
+ }
4538
+ async function createNewExperiment(db, root, hypothesis) {
4539
+ const slug = await generateSlug(hypothesis, root);
4540
+ let finalSlug = slug;
4541
+ let attempt = 0;
4542
+ while (getExperimentBySlug(db, finalSlug)) {
4543
+ attempt++;
4544
+ finalSlug = `${slug}-${attempt}`;
4545
+ }
4546
+ const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
4547
+ const num = allExps.count + 1;
4548
+ const paddedNum = String(num).padStart(3, "0");
4549
+ const branch = `exp/${paddedNum}-${finalSlug}`;
4550
+ try {
4551
+ (0, import_node_child_process5.execSync)(`git checkout -b ${branch}`, {
4552
+ cwd: root,
4553
+ encoding: "utf-8",
4554
+ stdio: ["pipe", "pipe", "pipe"]
4555
+ });
4556
+ info(`Created branch: ${branch}`);
4557
+ } catch {
4558
+ warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
4559
+ }
4560
+ const exp = createExperiment(db, finalSlug, branch, hypothesis, null, null);
4561
+ updateExperimentStatus(db, exp.id, "reframed");
4562
+ exp.status = "reframed";
4563
+ const docsDir = path11.join(root, "docs", "experiments");
4564
+ const templatePath = path11.join(docsDir, "_TEMPLATE.md");
4565
+ if (fs11.existsSync(templatePath)) {
4566
+ const template = fs11.readFileSync(templatePath, "utf-8");
4567
+ const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
4568
+ const logPath = path11.join(docsDir, `${paddedNum}-${finalSlug}.md`);
4569
+ fs11.writeFileSync(logPath, logContent);
4570
+ info(`Created experiment log: docs/experiments/${paddedNum}-${finalSlug}.md`);
4571
+ }
4572
+ return exp;
3669
4573
  }
3670
- var fs10, path10;
3671
- var init_audit = __esm({
3672
- "src/commands/audit.ts"() {
4574
+ var fs11, path11, import_node_child_process5;
4575
+ var init_run = __esm({
4576
+ "src/commands/run.ts"() {
3673
4577
  "use strict";
3674
- fs10 = __toESM(require("fs"));
3675
- path10 = __toESM(require("path"));
4578
+ fs11 = __toESM(require("fs"));
4579
+ path11 = __toESM(require("path"));
4580
+ import_node_child_process5 = require("child_process");
3676
4581
  init_connection();
3677
4582
  init_queries();
4583
+ init_machine();
4584
+ init_next();
4585
+ init_cycle();
3678
4586
  init_spawn();
3679
4587
  init_config();
4588
+ init_shutdown();
3680
4589
  init_format();
3681
4590
  }
3682
4591
  });
3683
4592
 
3684
- // src/commands/next.ts
3685
- var next_exports = {};
3686
- __export(next_exports, {
3687
- next: () => next
3688
- });
3689
- async function next(args, isJson) {
3690
- const root = findProjectRoot();
3691
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
3692
- const db = getDb(root);
3693
- const config = loadConfig(root);
3694
- const slugArg = args.filter((a) => !a.startsWith("--"))[0];
3695
- let exp;
3696
- if (slugArg) {
3697
- const found = getExperimentBySlug(db, slugArg);
3698
- if (!found) throw new Error(`Experiment not found: ${slugArg}`);
3699
- exp = found;
3700
- } else {
3701
- const found = getLatestExperiment(db);
3702
- if (!found) throw new Error('No active experiments. Run `majlis new "hypothesis"` first.');
3703
- exp = found;
3704
- }
3705
- const auto = args.includes("--auto");
3706
- if (auto) {
3707
- await runAutoLoop(db, exp, config, root, isJson);
3708
- } else {
3709
- await runNextStep(db, exp, config, root, isJson);
3710
- }
4593
+ // src/swarm/worktree.ts
4594
+ function createWorktree(mainRoot, slug, paddedNum) {
4595
+ const projectName = path12.basename(mainRoot);
4596
+ const worktreeName = `${projectName}-swarm-${paddedNum}-${slug}`;
4597
+ const worktreePath = path12.join(path12.dirname(mainRoot), worktreeName);
4598
+ const branch = `swarm/${paddedNum}-${slug}`;
4599
+ (0, import_node_child_process6.execSync)(`git worktree add ${JSON.stringify(worktreePath)} -b ${branch}`, {
4600
+ cwd: mainRoot,
4601
+ encoding: "utf-8",
4602
+ stdio: ["pipe", "pipe", "pipe"]
4603
+ });
4604
+ return {
4605
+ path: worktreePath,
4606
+ branch,
4607
+ slug,
4608
+ hypothesis: "",
4609
+ // filled in by caller
4610
+ paddedNum
4611
+ };
3711
4612
  }
3712
- async function runNextStep(db, exp, config, root, isJson) {
3713
- const currentStatus = exp.status;
3714
- const valid = validNext(currentStatus);
3715
- if (valid.length === 0) {
3716
- if (isJson) {
3717
- console.log(JSON.stringify({ experiment: exp.slug, status: exp.status, terminal: true }));
3718
- } else {
3719
- info(`Experiment ${exp.slug} is terminal (${exp.status}).`);
4613
+ function initializeWorktree(mainRoot, worktreePath) {
4614
+ const majlisDir = path12.join(worktreePath, ".majlis");
4615
+ fs12.mkdirSync(majlisDir, { recursive: true });
4616
+ const configSrc = path12.join(mainRoot, ".majlis", "config.json");
4617
+ if (fs12.existsSync(configSrc)) {
4618
+ fs12.copyFileSync(configSrc, path12.join(majlisDir, "config.json"));
4619
+ }
4620
+ const agentsSrc = path12.join(mainRoot, ".majlis", "agents");
4621
+ if (fs12.existsSync(agentsSrc)) {
4622
+ const agentsDst = path12.join(majlisDir, "agents");
4623
+ fs12.mkdirSync(agentsDst, { recursive: true });
4624
+ for (const file of fs12.readdirSync(agentsSrc)) {
4625
+ fs12.copyFileSync(path12.join(agentsSrc, file), path12.join(agentsDst, file));
3720
4626
  }
3721
- return;
3722
4627
  }
3723
- if (exp.sub_type && checkCircuitBreaker(db, exp.sub_type, config.cycle.circuit_breaker_threshold)) {
3724
- warn(`Circuit breaker: ${exp.sub_type} has ${config.cycle.circuit_breaker_threshold}+ failures.`);
3725
- insertDeadEnd(
3726
- db,
3727
- exp.id,
3728
- exp.hypothesis ?? exp.slug,
3729
- `Circuit breaker tripped for ${exp.sub_type}`,
3730
- `Sub-type ${exp.sub_type} exceeded ${config.cycle.circuit_breaker_threshold} failures`,
3731
- exp.sub_type,
3732
- "procedural"
3733
- );
3734
- updateExperimentStatus(db, exp.id, "dead_end");
3735
- warn("Experiment dead-ended. Triggering Maqasid Check (purpose audit).");
3736
- await audit([config.project?.objective ?? ""]);
3737
- return;
4628
+ const synthSrc = path12.join(mainRoot, "docs", "synthesis");
4629
+ if (fs12.existsSync(synthSrc)) {
4630
+ const synthDst = path12.join(worktreePath, "docs", "synthesis");
4631
+ fs12.mkdirSync(synthDst, { recursive: true });
4632
+ for (const file of fs12.readdirSync(synthSrc)) {
4633
+ const srcFile = path12.join(synthSrc, file);
4634
+ if (fs12.statSync(srcFile).isFile()) {
4635
+ fs12.copyFileSync(srcFile, path12.join(synthDst, file));
4636
+ }
4637
+ }
3738
4638
  }
3739
- const sessionsSinceCompression = getSessionsSinceCompression(db);
3740
- if (sessionsSinceCompression >= config.cycle.compression_interval) {
3741
- warn(
3742
- `${sessionsSinceCompression} sessions since last compression. Consider running: majlis compress`
3743
- );
4639
+ const templateSrc = path12.join(mainRoot, "docs", "experiments", "_TEMPLATE.md");
4640
+ if (fs12.existsSync(templateSrc)) {
4641
+ const expDir = path12.join(worktreePath, "docs", "experiments");
4642
+ fs12.mkdirSync(expDir, { recursive: true });
4643
+ fs12.copyFileSync(templateSrc, path12.join(expDir, "_TEMPLATE.md"));
3744
4644
  }
3745
- const expHasDoubts = hasDoubts(db, exp.id);
3746
- const expHasChallenges = hasChallenges(db, exp.id);
3747
- const nextStep = determineNextStep(exp, valid, expHasDoubts, expHasChallenges);
3748
- if (isJson) {
3749
- console.log(JSON.stringify({
3750
- experiment: exp.slug,
3751
- current_status: exp.status,
3752
- next_step: nextStep,
3753
- valid_transitions: valid
3754
- }));
3755
- return;
4645
+ const db = openDbAt(worktreePath);
4646
+ db.close();
4647
+ }
4648
+ function cleanupWorktree(mainRoot, wt) {
4649
+ try {
4650
+ (0, import_node_child_process6.execSync)(`git worktree remove ${JSON.stringify(wt.path)} --force`, {
4651
+ cwd: mainRoot,
4652
+ encoding: "utf-8",
4653
+ stdio: ["pipe", "pipe", "pipe"]
4654
+ });
4655
+ } catch {
4656
+ warn(`Could not remove worktree ${wt.path} \u2014 remove manually.`);
4657
+ }
4658
+ try {
4659
+ (0, import_node_child_process6.execSync)(`git branch -D ${wt.branch}`, {
4660
+ cwd: mainRoot,
4661
+ encoding: "utf-8",
4662
+ stdio: ["pipe", "pipe", "pipe"]
4663
+ });
4664
+ } catch {
4665
+ }
4666
+ try {
4667
+ (0, import_node_child_process6.execSync)("git worktree prune", {
4668
+ cwd: mainRoot,
4669
+ encoding: "utf-8",
4670
+ stdio: ["pipe", "pipe", "pipe"]
4671
+ });
4672
+ } catch {
3756
4673
  }
3757
- info(`${exp.slug}: ${exp.status} \u2192 ${nextStep}`);
3758
- await executeStep(nextStep, exp, root);
3759
4674
  }
3760
- async function runAutoLoop(db, exp, config, root, isJson) {
3761
- const MAX_ITERATIONS = 20;
3762
- let iteration = 0;
3763
- header(`Auto mode \u2014 ${exp.slug}`);
3764
- while (iteration < MAX_ITERATIONS) {
3765
- iteration++;
3766
- const freshExp = getExperimentBySlug(db, exp.slug);
3767
- if (!freshExp) break;
3768
- exp = freshExp;
3769
- if (isTerminal(exp.status)) {
3770
- success(`Experiment ${exp.slug} reached terminal state: ${exp.status}`);
3771
- break;
4675
+ var fs12, path12, import_node_child_process6;
4676
+ var init_worktree = __esm({
4677
+ "src/swarm/worktree.ts"() {
4678
+ "use strict";
4679
+ fs12 = __toESM(require("fs"));
4680
+ path12 = __toESM(require("path"));
4681
+ import_node_child_process6 = require("child_process");
4682
+ init_connection();
4683
+ init_format();
4684
+ }
4685
+ });
4686
+
4687
+ // src/swarm/runner.ts
4688
+ async function runExperimentInWorktree(wt) {
4689
+ const label = `[swarm:${wt.paddedNum}]`;
4690
+ let db;
4691
+ let exp = null;
4692
+ let overallGrade = null;
4693
+ let stepCount = 0;
4694
+ try {
4695
+ db = openDbAt(wt.path);
4696
+ exp = createExperiment(db, wt.slug, wt.branch, wt.hypothesis, null, null);
4697
+ updateExperimentStatus(db, exp.id, "reframed");
4698
+ exp.status = "reframed";
4699
+ const templatePath = path13.join(wt.path, "docs", "experiments", "_TEMPLATE.md");
4700
+ if (fs13.existsSync(templatePath)) {
4701
+ const template = fs13.readFileSync(templatePath, "utf-8");
4702
+ const logContent = template.replace(/\{\{title\}\}/g, wt.hypothesis).replace(/\{\{hypothesis\}\}/g, wt.hypothesis).replace(/\{\{branch\}\}/g, wt.branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
4703
+ const logPath = path13.join(wt.path, "docs", "experiments", `${wt.paddedNum}-${wt.slug}.md`);
4704
+ fs13.writeFileSync(logPath, logContent);
3772
4705
  }
3773
- if (exp.sub_type && checkCircuitBreaker(db, exp.sub_type, config.cycle.circuit_breaker_threshold)) {
3774
- warn(`Circuit breaker tripped for ${exp.sub_type}. Stopping auto mode.`);
3775
- insertDeadEnd(
3776
- db,
3777
- exp.id,
3778
- exp.hypothesis ?? exp.slug,
3779
- `Circuit breaker tripped for ${exp.sub_type}`,
3780
- `Sub-type ${exp.sub_type} exceeded ${config.cycle.circuit_breaker_threshold} failures`,
3781
- exp.sub_type,
3782
- "procedural"
4706
+ info(`${label} Starting: ${wt.hypothesis}`);
4707
+ while (stepCount < MAX_STEPS) {
4708
+ if (isShutdownRequested()) {
4709
+ warn(`${label} Shutdown requested. Stopping.`);
4710
+ break;
4711
+ }
4712
+ stepCount++;
4713
+ const fresh = getExperimentBySlug(db, wt.slug);
4714
+ if (!fresh) break;
4715
+ exp = fresh;
4716
+ if (isTerminal(exp.status)) {
4717
+ success(`${label} Reached terminal: ${exp.status}`);
4718
+ break;
4719
+ }
4720
+ const valid = validNext(exp.status);
4721
+ if (valid.length === 0) break;
4722
+ const nextStep = determineNextStep(
4723
+ exp,
4724
+ valid,
4725
+ hasDoubts(db, exp.id),
4726
+ hasChallenges(db, exp.id)
3783
4727
  );
3784
- updateExperimentStatus(db, exp.id, "dead_end");
3785
- await audit([config.project?.objective ?? ""]);
3786
- break;
4728
+ info(`${label} [${stepCount}/${MAX_STEPS}] ${exp.status} -> ${nextStep}`);
4729
+ if (nextStep === "resolved" /* RESOLVED */) {
4730
+ overallGrade = await resolveDbOnly(db, exp, wt.path);
4731
+ continue;
4732
+ }
4733
+ if (nextStep === "compressed" /* COMPRESSED */) {
4734
+ await runStep("compress", db, exp, wt.path);
4735
+ updateExperimentStatus(db, exp.id, "compressed");
4736
+ continue;
4737
+ }
4738
+ if (nextStep === "merged" /* MERGED */) {
4739
+ updateExperimentStatus(db, exp.id, "merged");
4740
+ success(`${label} Merged.`);
4741
+ break;
4742
+ }
4743
+ if (nextStep === "reframed" /* REFRAMED */) {
4744
+ updateExperimentStatus(db, exp.id, "reframed");
4745
+ continue;
4746
+ }
4747
+ const stepName = statusToStepName(nextStep);
4748
+ if (!stepName) {
4749
+ warn(`${label} Unknown step: ${nextStep}`);
4750
+ break;
4751
+ }
4752
+ try {
4753
+ await runStep(stepName, db, exp, wt.path);
4754
+ } catch (err) {
4755
+ const message = err instanceof Error ? err.message : String(err);
4756
+ warn(`${label} Step failed: ${message}`);
4757
+ try {
4758
+ insertDeadEnd(
4759
+ db,
4760
+ exp.id,
4761
+ exp.hypothesis ?? exp.slug,
4762
+ message,
4763
+ `Process failure: ${message}`,
4764
+ exp.sub_type,
4765
+ "procedural"
4766
+ );
4767
+ updateExperimentStatus(db, exp.id, "dead_end");
4768
+ } catch {
4769
+ }
4770
+ break;
4771
+ }
4772
+ }
4773
+ if (stepCount >= MAX_STEPS) {
4774
+ warn(`${label} Hit max steps (${MAX_STEPS}).`);
4775
+ }
4776
+ const finalExp = getExperimentBySlug(db, wt.slug);
4777
+ if (finalExp) exp = finalExp;
4778
+ const finalStatus = exp?.status ?? "error";
4779
+ return {
4780
+ worktree: wt,
4781
+ experiment: exp,
4782
+ finalStatus,
4783
+ overallGrade,
4784
+ costUsd: 0,
4785
+ // TODO: track via SDK when available
4786
+ stepCount
4787
+ };
4788
+ } catch (err) {
4789
+ const message = err instanceof Error ? err.message : String(err);
4790
+ warn(`${label} Fatal error: ${message}`);
4791
+ return {
4792
+ worktree: wt,
4793
+ experiment: exp,
4794
+ finalStatus: "error",
4795
+ overallGrade: null,
4796
+ costUsd: 0,
4797
+ stepCount,
4798
+ error: message
4799
+ };
4800
+ } finally {
4801
+ if (db) {
4802
+ try {
4803
+ db.close();
4804
+ } catch {
4805
+ }
3787
4806
  }
3788
- const valid = validNext(exp.status);
3789
- if (valid.length === 0) break;
3790
- const expHasDoubts = hasDoubts(db, exp.id);
3791
- const expHasChallenges = hasChallenges(db, exp.id);
3792
- const nextStep = determineNextStep(exp, valid, expHasDoubts, expHasChallenges);
3793
- info(`[${iteration}/${MAX_ITERATIONS}] ${exp.slug}: ${exp.status} \u2192 ${nextStep}`);
3794
- await executeStep(nextStep, exp, root);
3795
- }
3796
- if (iteration >= MAX_ITERATIONS) {
3797
- warn(`Reached maximum iterations (${MAX_ITERATIONS}). Stopping auto mode.`);
3798
4807
  }
3799
4808
  }
3800
- async function executeStep(step, exp, root) {
3801
- const expArgs = [exp.slug];
3802
- switch (step) {
4809
+ function statusToStepName(status2) {
4810
+ switch (status2) {
4811
+ case "gated" /* GATED */:
4812
+ return "gate";
3803
4813
  case "building" /* BUILDING */:
3804
- await cycle("build", expArgs);
3805
- break;
4814
+ return "build";
3806
4815
  case "challenged" /* CHALLENGED */:
3807
- await cycle("challenge", expArgs);
3808
- break;
4816
+ return "challenge";
3809
4817
  case "doubted" /* DOUBTED */:
3810
- await cycle("doubt", expArgs);
3811
- break;
4818
+ return "doubt";
3812
4819
  case "scouted" /* SCOUTED */:
3813
- await cycle("scout", expArgs);
3814
- break;
4820
+ return "scout";
3815
4821
  case "verifying" /* VERIFYING */:
3816
- await cycle("verify", expArgs);
3817
- break;
3818
- case "resolved" /* RESOLVED */:
3819
- await resolveCmd(expArgs);
3820
- break;
3821
- case "compressed" /* COMPRESSED */:
3822
- await cycle("compress", []);
3823
- updateExperimentStatus(getDb(root), exp.id, "compressed");
3824
- info(`Experiment ${exp.slug} compressed.`);
3825
- break;
3826
- case "gated" /* GATED */:
3827
- await cycle("gate", expArgs);
3828
- break;
3829
- case "reframed" /* REFRAMED */:
3830
- updateExperimentStatus(getDb(root), exp.id, "reframed");
3831
- info(`Reframe acknowledged for ${exp.slug}. Proceeding to gate.`);
3832
- break;
4822
+ return "verify";
3833
4823
  default:
3834
- warn(`Don't know how to execute step: ${step}`);
4824
+ return null;
3835
4825
  }
3836
4826
  }
3837
- var init_next = __esm({
3838
- "src/commands/next.ts"() {
4827
+ var fs13, path13, MAX_STEPS;
4828
+ var init_runner = __esm({
4829
+ "src/swarm/runner.ts"() {
3839
4830
  "use strict";
4831
+ fs13 = __toESM(require("fs"));
4832
+ path13 = __toESM(require("path"));
3840
4833
  init_connection();
3841
4834
  init_queries();
3842
4835
  init_machine();
3843
- init_types();
3844
- init_queries();
3845
- init_config();
4836
+ init_types2();
3846
4837
  init_cycle();
3847
- init_audit();
4838
+ init_resolve();
4839
+ init_shutdown();
4840
+ init_format();
4841
+ MAX_STEPS = 20;
4842
+ }
4843
+ });
4844
+
4845
+ // src/swarm/aggregate.ts
4846
+ function importExperimentFromWorktree(sourceDb, targetDb, slug) {
4847
+ const sourceExp = sourceDb.prepare(
4848
+ "SELECT * FROM experiments WHERE slug = ?"
4849
+ ).get(slug);
4850
+ if (!sourceExp) {
4851
+ throw new Error(`Experiment ${slug} not found in source DB`);
4852
+ }
4853
+ const sourceId = sourceExp.id;
4854
+ const insertExp = targetDb.prepare(`
4855
+ INSERT INTO experiments (slug, branch, status, classification_ref, sub_type,
4856
+ hypothesis, builder_guidance, created_at, updated_at)
4857
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
4858
+ `);
4859
+ const result = insertExp.run(
4860
+ sourceExp.slug,
4861
+ sourceExp.branch,
4862
+ sourceExp.status,
4863
+ sourceExp.classification_ref,
4864
+ sourceExp.sub_type,
4865
+ sourceExp.hypothesis,
4866
+ sourceExp.builder_guidance,
4867
+ sourceExp.created_at,
4868
+ sourceExp.updated_at
4869
+ );
4870
+ const targetId = result.lastInsertRowid;
4871
+ for (const table2 of CHILD_TABLES) {
4872
+ importChildTable(sourceDb, targetDb, table2, sourceId, targetId);
4873
+ }
4874
+ const stfRows = sourceDb.prepare(
4875
+ "SELECT * FROM sub_type_failures WHERE experiment_id = ?"
4876
+ ).all(sourceId);
4877
+ for (const row of stfRows) {
4878
+ targetDb.prepare(`
4879
+ INSERT INTO sub_type_failures (sub_type, experiment_id, grade, created_at)
4880
+ VALUES (?, ?, ?, ?)
4881
+ `).run(row.sub_type, targetId, row.grade, row.created_at);
4882
+ }
4883
+ return targetId;
4884
+ }
4885
+ function importChildTable(sourceDb, targetDb, table2, sourceExpId, targetExpId) {
4886
+ const rows = sourceDb.prepare(
4887
+ `SELECT * FROM ${table2} WHERE experiment_id = ?`
4888
+ ).all(sourceExpId);
4889
+ if (rows.length === 0) return;
4890
+ const cols = Object.keys(rows[0]).filter((c) => c !== "id");
4891
+ const placeholders = cols.map(() => "?").join(", ");
4892
+ const insert = targetDb.prepare(
4893
+ `INSERT INTO ${table2} (${cols.join(", ")}) VALUES (${placeholders})`
4894
+ );
4895
+ for (const row of rows) {
4896
+ const values = cols.map(
4897
+ (c) => c === "experiment_id" ? targetExpId : row[c]
4898
+ );
4899
+ insert.run(...values);
4900
+ }
4901
+ }
4902
+ function aggregateSwarmResults(mainRoot, mainDb, results) {
4903
+ let mergedCount = 0;
4904
+ let deadEndCount = 0;
4905
+ let errorCount = 0;
4906
+ let totalCostUsd = 0;
4907
+ for (const r of results) {
4908
+ totalCostUsd += r.costUsd;
4909
+ if (r.error || !r.experiment) {
4910
+ errorCount++;
4911
+ continue;
4912
+ }
4913
+ try {
4914
+ const sourceDb = openDbAt(r.worktree.path);
4915
+ mainDb.transaction(() => {
4916
+ importExperimentFromWorktree(sourceDb, mainDb, r.worktree.slug);
4917
+ })();
4918
+ sourceDb.close();
4919
+ if (r.finalStatus === "merged") mergedCount++;
4920
+ else if (r.finalStatus === "dead_end") deadEndCount++;
4921
+ } catch (err) {
4922
+ const msg = err instanceof Error ? err.message : String(err);
4923
+ warn(`Failed to import ${r.worktree.slug}: ${msg}`);
4924
+ errorCount++;
4925
+ }
4926
+ }
4927
+ const ranked = results.filter((r) => r.overallGrade && !r.error).sort((a, b) => {
4928
+ const aRank = GRADE_RANK[a.overallGrade] ?? 99;
4929
+ const bRank = GRADE_RANK[b.overallGrade] ?? 99;
4930
+ return aRank - bRank;
4931
+ });
4932
+ const best = ranked.length > 0 ? ranked[0] : null;
4933
+ return {
4934
+ goal: "",
4935
+ // filled by caller
4936
+ parallelCount: results.length,
4937
+ results,
4938
+ bestExperiment: best,
4939
+ totalCostUsd,
4940
+ mergedCount,
4941
+ deadEndCount,
4942
+ errorCount
4943
+ };
4944
+ }
4945
+ var CHILD_TABLES, GRADE_RANK;
4946
+ var init_aggregate = __esm({
4947
+ "src/swarm/aggregate.ts"() {
4948
+ "use strict";
4949
+ init_connection();
3848
4950
  init_format();
4951
+ CHILD_TABLES = [
4952
+ "decisions",
4953
+ "doubts",
4954
+ "challenges",
4955
+ "verifications",
4956
+ "metrics",
4957
+ "dead_ends",
4958
+ "reframes",
4959
+ "findings"
4960
+ ];
4961
+ GRADE_RANK = {
4962
+ sound: 0,
4963
+ good: 1,
4964
+ weak: 2,
4965
+ rejected: 3
4966
+ };
3849
4967
  }
3850
4968
  });
3851
4969
 
3852
- // src/commands/run.ts
3853
- var run_exports = {};
3854
- __export(run_exports, {
3855
- run: () => run
4970
+ // src/commands/swarm.ts
4971
+ var swarm_exports = {};
4972
+ __export(swarm_exports, {
4973
+ swarm: () => swarm
3856
4974
  });
3857
- async function run(args) {
4975
+ async function swarm(args) {
3858
4976
  const root = findProjectRoot();
3859
4977
  if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
3860
4978
  const goal = args.filter((a) => !a.startsWith("--")).join(" ");
3861
- if (!goal) {
3862
- throw new Error('Usage: majlis run "goal description"');
4979
+ if (!goal) throw new Error('Usage: majlis swarm "goal description" [--parallel N]');
4980
+ const parallelStr = getFlagValue(args, "--parallel");
4981
+ const parallelCount = Math.min(
4982
+ Math.max(2, parseInt(parallelStr ?? String(DEFAULT_PARALLEL), 10) || DEFAULT_PARALLEL),
4983
+ MAX_PARALLEL
4984
+ );
4985
+ try {
4986
+ const status2 = (0, import_node_child_process7.execSync)("git status --porcelain", {
4987
+ cwd: root,
4988
+ encoding: "utf-8",
4989
+ stdio: ["pipe", "pipe", "pipe"]
4990
+ }).trim();
4991
+ if (status2) {
4992
+ warn("Working tree has uncommitted changes. Commit or stash before swarming.");
4993
+ throw new Error("Dirty working tree. Commit or stash first.");
4994
+ }
4995
+ } catch (err) {
4996
+ if (err instanceof Error && err.message.includes("Dirty working tree")) throw err;
4997
+ warn("Could not check git status.");
3863
4998
  }
3864
4999
  const db = getDb(root);
3865
- const config = loadConfig(root);
3866
- const MAX_EXPERIMENTS = 10;
3867
- const MAX_STEPS = 200;
3868
- let experimentCount = 0;
3869
- let stepCount = 0;
3870
- let consecutiveFailures = 0;
3871
- const usedHypotheses = /* @__PURE__ */ new Set();
3872
- header(`Autonomous Mode \u2014 ${goal}`);
3873
- while (stepCount < MAX_STEPS && experimentCount < MAX_EXPERIMENTS) {
3874
- if (isShutdownRequested()) {
3875
- warn("Shutdown requested. Stopping autonomous mode.");
3876
- break;
3877
- }
3878
- stepCount++;
3879
- let exp = getLatestExperiment(db);
3880
- if (!exp) {
3881
- experimentCount++;
3882
- if (experimentCount > MAX_EXPERIMENTS) {
3883
- warn(`Reached max experiments (${MAX_EXPERIMENTS}). Stopping.`);
3884
- break;
3885
- }
3886
- const sessionsSinceCompression = getSessionsSinceCompression(db);
3887
- if (sessionsSinceCompression >= config.cycle.compression_interval) {
3888
- info("Compressing before next experiment...");
3889
- await cycle("compress", []);
3890
- }
3891
- info(`[Experiment ${experimentCount}/${MAX_EXPERIMENTS}] Planning next experiment...`);
3892
- const hypothesis = await deriveNextHypothesis(goal, root, db);
3893
- if (!hypothesis) {
3894
- success("Planner says the goal has been met. Stopping.");
3895
- break;
3896
- }
3897
- if (usedHypotheses.has(hypothesis)) {
3898
- warn(`Planner returned duplicate hypothesis: "${hypothesis.slice(0, 80)}". Stopping.`);
3899
- break;
3900
- }
3901
- usedHypotheses.add(hypothesis);
3902
- info(`Next hypothesis: ${hypothesis}`);
3903
- exp = createNewExperiment(db, root, hypothesis);
3904
- success(`Created experiment #${exp.id}: ${exp.slug}`);
3905
- }
3906
- if (isTerminal(exp.status)) {
3907
- if (exp.status === "merged") {
3908
- success(`Experiment ${exp.slug} merged.`);
3909
- } else if (exp.status === "dead_end") {
3910
- info(`Experiment ${exp.slug} dead-ended.`);
3911
- }
3912
- continue;
3913
- }
3914
- info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
5000
+ const swarmRun = createSwarmRun(db, goal, parallelCount);
5001
+ header(`Swarm Mode \u2014 ${goal}`);
5002
+ info(`Generating ${parallelCount} diverse hypotheses...`);
5003
+ const hypotheses = await deriveMultipleHypotheses(goal, root, parallelCount);
5004
+ if (hypotheses.length === 0) {
5005
+ success("Planner says the goal has been met. Nothing to swarm.");
5006
+ updateSwarmRun(db, swarmRun.id, "completed", 0, null);
5007
+ return;
5008
+ }
5009
+ info(`Got ${hypotheses.length} hypotheses:`);
5010
+ for (let i = 0; i < hypotheses.length; i++) {
5011
+ info(` ${i + 1}. ${hypotheses[i]}`);
5012
+ }
5013
+ const worktrees = [];
5014
+ for (let i = 0; i < hypotheses.length; i++) {
5015
+ const paddedNum = String(i + 1).padStart(3, "0");
5016
+ const slug = await generateSlug(hypotheses[i], root);
3915
5017
  try {
3916
- await next([exp.slug], false);
3917
- consecutiveFailures = 0;
5018
+ const wt = createWorktree(root, slug, paddedNum);
5019
+ wt.hypothesis = hypotheses[i];
5020
+ initializeWorktree(root, wt.path);
5021
+ worktrees.push(wt);
5022
+ addSwarmMember(db, swarmRun.id, slug, wt.path);
5023
+ info(`Created worktree ${paddedNum}: ${slug}`);
3918
5024
  } catch (err) {
3919
- consecutiveFailures++;
3920
- const message = err instanceof Error ? err.message : String(err);
3921
- warn(`Step failed for ${exp.slug}: ${message}`);
3922
- try {
3923
- insertDeadEnd(
3924
- db,
3925
- exp.id,
3926
- exp.hypothesis ?? exp.slug,
3927
- message,
3928
- `Process failure: ${message}`,
3929
- exp.sub_type,
3930
- "procedural"
3931
- );
3932
- updateExperimentStatus(db, exp.id, "dead_end");
3933
- } catch (innerErr) {
3934
- const innerMsg = innerErr instanceof Error ? innerErr.message : String(innerErr);
3935
- warn(`Could not record dead-end: ${innerMsg}`);
3936
- }
3937
- if (consecutiveFailures >= 3) {
3938
- warn(`${consecutiveFailures} consecutive failures. Stopping autonomous mode.`);
3939
- break;
3940
- }
5025
+ const msg = err instanceof Error ? err.message : String(err);
5026
+ warn(`Failed to create worktree for hypothesis ${i + 1}: ${msg}`);
5027
+ }
5028
+ }
5029
+ if (worktrees.length === 0) {
5030
+ warn("No worktrees created. Aborting swarm.");
5031
+ updateSwarmRun(db, swarmRun.id, "failed", 0, null);
5032
+ return;
5033
+ }
5034
+ info(`Running ${worktrees.length} experiments in parallel...`);
5035
+ info("");
5036
+ const settled = await Promise.allSettled(
5037
+ worktrees.map((wt) => runExperimentInWorktree(wt))
5038
+ );
5039
+ const results = settled.map((s, i) => {
5040
+ if (s.status === "fulfilled") return s.value;
5041
+ return {
5042
+ worktree: worktrees[i],
5043
+ experiment: null,
5044
+ finalStatus: "error",
5045
+ overallGrade: null,
5046
+ costUsd: 0,
5047
+ stepCount: 0,
5048
+ error: s.reason instanceof Error ? s.reason.message : String(s.reason)
5049
+ };
5050
+ });
5051
+ for (const r of results) {
5052
+ updateSwarmMember(
5053
+ db,
5054
+ swarmRun.id,
5055
+ r.worktree.slug,
5056
+ r.finalStatus,
5057
+ r.overallGrade,
5058
+ r.costUsd,
5059
+ r.error ?? null
5060
+ );
5061
+ }
5062
+ info("");
5063
+ header("Aggregation");
5064
+ const summary = aggregateSwarmResults(root, db, results);
5065
+ summary.goal = goal;
5066
+ if (summary.bestExperiment && isMergeable(summary.bestExperiment.overallGrade)) {
5067
+ const best = summary.bestExperiment;
5068
+ info(`Best experiment: ${best.worktree.slug} (${best.overallGrade})`);
5069
+ try {
5070
+ (0, import_node_child_process7.execSync)(
5071
+ `git merge ${best.worktree.branch} --no-ff -m "Merge swarm winner: ${best.worktree.slug}"`,
5072
+ { cwd: root, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }
5073
+ );
5074
+ success(`Merged ${best.worktree.slug} into main.`);
5075
+ } catch {
5076
+ warn(`Git merge of ${best.worktree.slug} failed. Merge manually with:`);
5077
+ info(` git merge ${best.worktree.branch} --no-ff`);
3941
5078
  }
5079
+ } else {
5080
+ info("No experiment achieved sound/good grade. Nothing merged.");
3942
5081
  }
3943
- if (stepCount >= MAX_STEPS) {
3944
- warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
5082
+ for (const r of results) {
5083
+ if (r === summary.bestExperiment || r.error || !r.experiment) continue;
5084
+ const mainExp = getExperimentBySlug(db, r.worktree.slug);
5085
+ if (mainExp && mainExp.status !== "dead_end") {
5086
+ updateExperimentStatus(db, mainExp.id, "dead_end");
5087
+ }
3945
5088
  }
3946
- header("Autonomous Mode Complete");
5089
+ updateSwarmRun(
5090
+ db,
5091
+ swarmRun.id,
5092
+ summary.errorCount === results.length ? "failed" : "completed",
5093
+ summary.totalCostUsd,
5094
+ summary.bestExperiment?.worktree.slug ?? null
5095
+ );
5096
+ info("Cleaning up worktrees...");
5097
+ for (const wt of worktrees) {
5098
+ cleanupWorktree(root, wt);
5099
+ }
5100
+ info("");
5101
+ header("Swarm Summary");
3947
5102
  info(`Goal: ${goal}`);
3948
- info(`Experiments: ${experimentCount}, Steps: ${stepCount}`);
3949
- info("Run `majlis status` to see final state.");
3950
- }
3951
- async function deriveNextHypothesis(goal, root, db) {
3952
- const synthesis = truncateContext(readFileOrEmpty(path11.join(root, "docs", "synthesis", "current.md")), CONTEXT_LIMITS.synthesis);
3953
- const fragility = truncateContext(readFileOrEmpty(path11.join(root, "docs", "synthesis", "fragility.md")), CONTEXT_LIMITS.fragility);
3954
- const deadEndsDoc = truncateContext(readFileOrEmpty(path11.join(root, "docs", "synthesis", "dead-ends.md")), CONTEXT_LIMITS.deadEnds);
5103
+ info(`Parallel: ${worktrees.length}`);
5104
+ info(`Results:`);
5105
+ for (const r of results) {
5106
+ const grade = r.overallGrade ?? "n/a";
5107
+ const status2 = r.error ? `ERROR: ${r.error.slice(0, 60)}` : r.finalStatus;
5108
+ const marker = r === summary.bestExperiment ? " <-- BEST" : "";
5109
+ info(` ${r.worktree.paddedNum} ${r.worktree.slug}: ${grade} (${status2})${marker}`);
5110
+ }
5111
+ info(`Merged: ${summary.mergedCount} | Dead-ends: ${summary.deadEndCount} | Errors: ${summary.errorCount}`);
5112
+ }
5113
+ function isMergeable(grade) {
5114
+ return grade === "sound" || grade === "good";
5115
+ }
5116
+ async function deriveMultipleHypotheses(goal, root, count) {
5117
+ const synthesis = truncateContext(
5118
+ readFileOrEmpty(path14.join(root, "docs", "synthesis", "current.md")),
5119
+ CONTEXT_LIMITS.synthesis
5120
+ );
5121
+ const fragility = truncateContext(
5122
+ readFileOrEmpty(path14.join(root, "docs", "synthesis", "fragility.md")),
5123
+ CONTEXT_LIMITS.fragility
5124
+ );
5125
+ const deadEndsDoc = truncateContext(
5126
+ readFileOrEmpty(path14.join(root, "docs", "synthesis", "dead-ends.md")),
5127
+ CONTEXT_LIMITS.deadEnds
5128
+ );
5129
+ const db = getDb(root);
3955
5130
  const deadEnds = listAllDeadEnds(db);
3956
5131
  const config = loadConfig(root);
3957
5132
  let metricsOutput = "";
3958
5133
  if (config.metrics?.command) {
3959
5134
  try {
3960
- metricsOutput = (0, import_node_child_process5.execSync)(config.metrics.command, {
5135
+ metricsOutput = (0, import_node_child_process7.execSync)(config.metrics.command, {
3961
5136
  cwd: root,
3962
5137
  encoding: "utf-8",
3963
5138
  timeout: 6e4,
@@ -3968,7 +5143,7 @@ async function deriveNextHypothesis(goal, root, db) {
3968
5143
  }
3969
5144
  }
3970
5145
  const result = await spawnSynthesiser({
3971
- taskPrompt: `You are the Planner for an autonomous Majlis run.
5146
+ taskPrompt: `You are the Planner for a parallel Majlis swarm.
3972
5147
 
3973
5148
  ## Goal
3974
5149
  ${goal}
@@ -3988,119 +5163,195 @@ ${deadEndsDoc || "(none)"}
3988
5163
  ## Dead Ends (from DB \u2014 ${deadEnds.length} total)
3989
5164
  ${deadEnds.map((d) => `- [${d.category ?? "structural"}] ${d.approach}: ${d.why_failed} [constraint: ${d.structural_constraint}]`).join("\n") || "(none)"}
3990
5165
 
3991
- Note: [structural] dead ends are HARD CONSTRAINTS \u2014 your hypothesis MUST NOT repeat these approaches.
5166
+ Note: [structural] dead ends are HARD CONSTRAINTS \u2014 hypotheses MUST NOT repeat these approaches.
3992
5167
  [procedural] dead ends are process failures \u2014 the approach may still be valid if executed differently.
3993
5168
 
3994
5169
  ## Your Task
3995
5170
  1. Assess: based on the metrics and synthesis, has the goal been met? Be specific.
3996
5171
  2. If YES \u2014 output the JSON block below with goal_met: true.
3997
- 3. If NO \u2014 propose the SINGLE most promising next experiment hypothesis.
3998
- - It must NOT repeat a dead-ended approach (check the dead-end registry!)
3999
- - It should attack the weakest point revealed by synthesis/fragility
4000
- - It must be specific and actionable \u2014 name the function or mechanism to change
4001
- - Do NOT reference specific line numbers \u2014 they shift between experiments
4002
- - The hypothesis should be a single sentence describing what to do, e.g.:
4003
- "Activate addSeamEdges() in the runEdgeFirst pipeline for full-revolution cylinder faces"
5172
+ 3. If NO \u2014 generate exactly ${count} DIVERSE hypotheses for parallel testing.
5173
+
5174
+ Requirements for hypotheses:
5175
+ - Each must attack the problem from a DIFFERENT angle
5176
+ - They must NOT share the same mechanism, function target, or strategy
5177
+ - At least one should be an unconventional or indirect approach
5178
+ - None may repeat a dead-ended structural approach
5179
+ - Each must be specific and actionable \u2014 name the function or mechanism to change
5180
+ - Do NOT reference specific line numbers \u2014 they shift between experiments
4004
5181
 
4005
5182
  CRITICAL: Your LAST line of output MUST be EXACTLY this format (on its own line, nothing after it):
4006
- <!-- majlis-json {"goal_met": false, "hypothesis": "your single-sentence hypothesis here"} -->
5183
+ <!-- majlis-json {"goal_met": false, "hypotheses": ["hypothesis 1", "hypothesis 2", "hypothesis 3"]} -->
4007
5184
 
4008
5185
  If the goal is met:
4009
- <!-- majlis-json {"goal_met": true, "hypothesis": null} -->`
5186
+ <!-- majlis-json {"goal_met": true, "hypotheses": []} -->`
4010
5187
  }, root);
4011
- const structured = result.structured;
4012
- if (structured?.goal_met === true) {
4013
- return null;
4014
- }
4015
- if (structured?.hypothesis) {
4016
- return structured.hypothesis;
5188
+ if (result.structured?.goal_met === true) return [];
5189
+ if (result.structured?.hypotheses && Array.isArray(result.structured.hypotheses)) {
5190
+ return result.structured.hypotheses.filter(
5191
+ (h) => typeof h === "string" && h.length > 10
5192
+ );
4017
5193
  }
4018
- const jsonMatch = result.output.match(/"hypothesis"\s*:\s*"([^"]+)"/);
4019
- if (jsonMatch && jsonMatch[1].length > 10) return jsonMatch[1].trim();
4020
5194
  const blockMatch = result.output.match(/<!--\s*majlis-json\s*(\{[\s\S]*?\})\s*-->/);
4021
5195
  if (blockMatch) {
4022
5196
  try {
4023
5197
  const parsed = JSON.parse(blockMatch[1]);
4024
- if (parsed.goal_met === true) return null;
4025
- if (parsed.hypothesis) return parsed.hypothesis;
5198
+ if (parsed.goal_met === true) return [];
5199
+ if (Array.isArray(parsed.hypotheses)) {
5200
+ return parsed.hypotheses.filter(
5201
+ (h) => typeof h === "string" && h.length > 10
5202
+ );
5203
+ }
4026
5204
  } catch {
4027
5205
  }
4028
5206
  }
4029
- warn("Planner did not return structured output. Retrying with focused prompt...");
4030
- const retry = await spawnSynthesiser({
4031
- taskPrompt: `Based on this analysis, output ONLY a single-line JSON block:
5207
+ warn("Planner did not return structured hypotheses. Using goal as single hypothesis.");
5208
+ return [goal];
5209
+ }
5210
+ var path14, import_node_child_process7, MAX_PARALLEL, DEFAULT_PARALLEL;
5211
+ var init_swarm = __esm({
5212
+ "src/commands/swarm.ts"() {
5213
+ "use strict";
5214
+ path14 = __toESM(require("path"));
5215
+ import_node_child_process7 = require("child_process");
5216
+ init_connection();
5217
+ init_queries();
5218
+ init_spawn();
5219
+ init_config();
5220
+ init_worktree();
5221
+ init_runner();
5222
+ init_aggregate();
5223
+ init_format();
5224
+ MAX_PARALLEL = 8;
5225
+ DEFAULT_PARALLEL = 3;
5226
+ }
5227
+ });
4032
5228
 
4033
- ${result.output.slice(-2e3)}
5229
+ // src/commands/diagnose.ts
5230
+ var diagnose_exports = {};
5231
+ __export(diagnose_exports, {
5232
+ diagnose: () => diagnose
5233
+ });
5234
+ async function diagnose(args) {
5235
+ const root = findProjectRoot();
5236
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
5237
+ const db = getDb(root);
5238
+ const focus = args.filter((a) => !a.startsWith("--")).join(" ");
5239
+ const keepScripts = args.includes("--keep-scripts");
5240
+ const scriptsDir = path15.join(root, ".majlis", "scripts");
5241
+ if (!fs14.existsSync(scriptsDir)) {
5242
+ fs14.mkdirSync(scriptsDir, { recursive: true });
5243
+ }
5244
+ header("Deep Diagnosis");
5245
+ if (focus) info(`Focus: ${focus}`);
5246
+ const dbExport = exportForDiagnostician(db);
5247
+ const synthesis = readFileOrEmpty(path15.join(root, "docs", "synthesis", "current.md"));
5248
+ const fragility = readFileOrEmpty(path15.join(root, "docs", "synthesis", "fragility.md"));
5249
+ const deadEndsDoc = readFileOrEmpty(path15.join(root, "docs", "synthesis", "dead-ends.md"));
5250
+ const config = loadConfig(root);
5251
+ let metricsOutput = "";
5252
+ if (config.metrics?.command) {
5253
+ try {
5254
+ metricsOutput = (0, import_node_child_process8.execSync)(config.metrics.command, {
5255
+ cwd: root,
5256
+ encoding: "utf-8",
5257
+ timeout: 6e4,
5258
+ stdio: ["pipe", "pipe", "pipe"]
5259
+ }).trim();
5260
+ } catch {
5261
+ metricsOutput = "(metrics command failed)";
5262
+ }
5263
+ }
5264
+ let taskPrompt = `## Full Database Export (CANONICAL \u2014 source of truth)
5265
+ ${dbExport}
4034
5266
 
4035
- <!-- majlis-json {"goal_met": false, "hypothesis": "your hypothesis"} -->`
4036
- }, root);
4037
- if (retry.structured?.hypothesis) return retry.structured.hypothesis;
4038
- warn("Could not extract hypothesis. Using goal as fallback.");
4039
- return goal;
4040
- }
4041
- function createNewExperiment(db, root, hypothesis) {
4042
- const slug = slugify2(hypothesis);
4043
- let finalSlug = slug;
4044
- let attempt = 0;
4045
- while (getExperimentBySlug(db, finalSlug)) {
4046
- attempt++;
4047
- finalSlug = `${slug}-${attempt}`;
5267
+ `;
5268
+ taskPrompt += `## Current Synthesis
5269
+ ${synthesis || "(empty \u2014 no experiments yet)"}
5270
+
5271
+ `;
5272
+ taskPrompt += `## Fragility Map
5273
+ ${fragility || "(none)"}
5274
+
5275
+ `;
5276
+ taskPrompt += `## Dead-End Registry
5277
+ ${deadEndsDoc || "(none)"}
5278
+
5279
+ `;
5280
+ taskPrompt += `## Current Metrics
5281
+ ${metricsOutput || "(no metrics configured)"}
5282
+
5283
+ `;
5284
+ taskPrompt += `## Project Objective
5285
+ ${config.project?.objective || "(not specified)"}
5286
+
5287
+ `;
5288
+ if (focus) {
5289
+ taskPrompt += `## Focus Area
5290
+ The user has asked you to focus your diagnosis on: ${focus}
5291
+
5292
+ `;
4048
5293
  }
4049
- const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
4050
- const num = allExps.count + 1;
4051
- const paddedNum = String(num).padStart(3, "0");
4052
- const branch = `exp/${paddedNum}-${finalSlug}`;
4053
- try {
4054
- (0, import_node_child_process5.execSync)(`git checkout -b ${branch}`, {
4055
- cwd: root,
4056
- encoding: "utf-8",
4057
- stdio: ["pipe", "pipe", "pipe"]
4058
- });
4059
- info(`Created branch: ${branch}`);
4060
- } catch {
4061
- warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
5294
+ taskPrompt += `## Your Task
5295
+ Perform a deep diagnostic analysis of this project. Identify root causes, recurring patterns, evidence gaps, and investigation directions. You have 60 turns \u2014 use them for depth. Write analysis scripts to .majlis/scripts/ as needed.
5296
+
5297
+ Remember: you may write files ONLY to .majlis/scripts/. You cannot modify project code.`;
5298
+ info("Spawning diagnostician (60 turns, full DB access)...");
5299
+ const result = await spawnAgent("diagnostician", { taskPrompt }, root);
5300
+ const diagnosisDir = path15.join(root, "docs", "diagnosis");
5301
+ if (!fs14.existsSync(diagnosisDir)) {
5302
+ fs14.mkdirSync(diagnosisDir, { recursive: true });
5303
+ }
5304
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-").slice(0, 19);
5305
+ const artifactPath = path15.join(diagnosisDir, `diagnosis-${timestamp}.md`);
5306
+ fs14.writeFileSync(artifactPath, result.output);
5307
+ info(`Diagnostic report: docs/diagnosis/diagnosis-${timestamp}.md`);
5308
+ if (result.structured?.diagnosis) {
5309
+ const d = result.structured.diagnosis;
5310
+ if (d.root_causes?.length) {
5311
+ info(`Root causes identified: ${d.root_causes.length}`);
5312
+ }
5313
+ if (d.investigation_directions?.length) {
5314
+ info(`Investigation directions: ${d.investigation_directions.length}`);
5315
+ }
4062
5316
  }
4063
- const exp = createExperiment(db, finalSlug, branch, hypothesis, null, null);
4064
- updateExperimentStatus(db, exp.id, "reframed");
4065
- exp.status = "reframed";
4066
- const docsDir = path11.join(root, "docs", "experiments");
4067
- const templatePath = path11.join(docsDir, "_TEMPLATE.md");
4068
- if (fs11.existsSync(templatePath)) {
4069
- const template = fs11.readFileSync(templatePath, "utf-8");
4070
- const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
4071
- const logPath = path11.join(docsDir, `${paddedNum}-${finalSlug}.md`);
4072
- fs11.writeFileSync(logPath, logContent);
4073
- info(`Created experiment log: docs/experiments/${paddedNum}-${finalSlug}.md`);
5317
+ if (!keepScripts) {
5318
+ try {
5319
+ const files = fs14.readdirSync(scriptsDir);
5320
+ for (const f of files) {
5321
+ fs14.unlinkSync(path15.join(scriptsDir, f));
5322
+ }
5323
+ fs14.rmdirSync(scriptsDir);
5324
+ info("Cleaned up .majlis/scripts/");
5325
+ } catch {
5326
+ }
5327
+ } else {
5328
+ info("Scripts preserved in .majlis/scripts/ (--keep-scripts)");
4074
5329
  }
4075
- return exp;
4076
- }
4077
- function slugify2(text) {
4078
- return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
5330
+ if (result.truncated) {
5331
+ warn("Diagnostician was truncated (hit 60 turn limit).");
5332
+ }
5333
+ success("Diagnosis complete.");
4079
5334
  }
4080
- var fs11, path11, import_node_child_process5;
4081
- var init_run = __esm({
4082
- "src/commands/run.ts"() {
5335
+ var fs14, path15, import_node_child_process8;
5336
+ var init_diagnose = __esm({
5337
+ "src/commands/diagnose.ts"() {
4083
5338
  "use strict";
4084
- fs11 = __toESM(require("fs"));
4085
- path11 = __toESM(require("path"));
4086
- import_node_child_process5 = require("child_process");
5339
+ fs14 = __toESM(require("fs"));
5340
+ path15 = __toESM(require("path"));
5341
+ import_node_child_process8 = require("child_process");
4087
5342
  init_connection();
4088
5343
  init_queries();
4089
- init_machine();
4090
- init_next();
4091
- init_cycle();
4092
5344
  init_spawn();
4093
5345
  init_config();
4094
- init_shutdown();
4095
5346
  init_format();
4096
5347
  }
4097
5348
  });
4098
5349
 
4099
5350
  // src/cli.ts
4100
- var fs12 = __toESM(require("fs"));
4101
- var path12 = __toESM(require("path"));
5351
+ var fs15 = __toESM(require("fs"));
5352
+ var path16 = __toESM(require("path"));
4102
5353
  var VERSION = JSON.parse(
4103
- fs12.readFileSync(path12.join(__dirname, "..", "package.json"), "utf-8")
5354
+ fs15.readFileSync(path16.join(__dirname, "..", "package.json"), "utf-8")
4104
5355
  ).version;
4105
5356
  async function main() {
4106
5357
  let sigintCount = 0;
@@ -4210,11 +5461,21 @@ async function main() {
4210
5461
  await run2(rest);
4211
5462
  break;
4212
5463
  }
5464
+ case "swarm": {
5465
+ const { swarm: swarm2 } = await Promise.resolve().then(() => (init_swarm(), swarm_exports));
5466
+ await swarm2(rest);
5467
+ break;
5468
+ }
4213
5469
  case "audit": {
4214
5470
  const { audit: audit2 } = await Promise.resolve().then(() => (init_audit(), audit_exports));
4215
5471
  await audit2(rest);
4216
5472
  break;
4217
5473
  }
5474
+ case "diagnose": {
5475
+ const { diagnose: diagnose2 } = await Promise.resolve().then(() => (init_diagnose(), diagnose_exports));
5476
+ await diagnose2(rest);
5477
+ break;
5478
+ }
4218
5479
  default:
4219
5480
  console.error(`Unknown command: ${command}`);
4220
5481
  printHelp();
@@ -4267,6 +5528,7 @@ Queries:
4267
5528
 
4268
5529
  Audit:
4269
5530
  audit "objective" Maqasid check \u2014 is the frame right?
5531
+ diagnose ["focus area"] Deep diagnosis \u2014 root causes, patterns, gaps
4270
5532
 
4271
5533
  Sessions:
4272
5534
  session start "intent" Declare session intent
@@ -4274,6 +5536,7 @@ Sessions:
4274
5536
 
4275
5537
  Orchestration:
4276
5538
  run "goal" Autonomous orchestration until goal met
5539
+ swarm "goal" [--parallel N] Run N experiments in parallel worktrees
4277
5540
 
4278
5541
  Flags:
4279
5542
  --json Output as JSON