majlis 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +1104 -966
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -575,6 +575,10 @@ Read as much code as you need to understand the problem. Reading is free \u2014
575
575
  as many turns as necessary on Read, Grep, and Glob to build full context before
576
576
  you touch anything.
577
577
 
578
+ Do NOT read raw data files (fixtures/, ground truth JSON/STL). The synthesis
579
+ has the relevant facts. Reading raw data wastes turns re-deriving what the
580
+ doubt/challenge/verify cycle already established.
581
+
578
582
  ## The Rule: ONE Change, Then Document
579
583
 
580
584
  You make ONE code change per cycle. Not two, not "one more quick fix." ONE.
@@ -738,6 +742,9 @@ If the builder claims improvement but the framework metrics show regression, fla
738
742
  - Run at most 3-5 targeted diagnostic scripts, focused on the critical doubts/challenges.
739
743
  - Do NOT run exhaustive diagnostics on every claim.
740
744
 
745
+ Framework-captured metrics are ground truth \u2014 if they show regression, that
746
+ alone justifies a "rejected" grade. Do not re-derive from raw fixture data.
747
+
741
748
  Grade each component: sound / good / weak / rejected
742
749
  Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
743
750
 
@@ -817,6 +824,13 @@ the database export.
817
824
  The framework does NOT auto-save your output for these files.
818
825
  7. Review classification: new sub-types? resolved sub-types?
819
826
 
827
+ You may ONLY write to these three files:
828
+ - docs/synthesis/current.md
829
+ - docs/synthesis/fragility.md
830
+ - docs/synthesis/dead-ends.md
831
+
832
+ Do NOT modify MEMORY.md, .claude/, classification/, experiments/, or any other paths.
833
+
820
834
  You may NOT write code, make decisions, or run experiments.
821
835
 
822
836
  ## Structured Output Format
@@ -1805,1076 +1819,1197 @@ var init_status = __esm({
1805
1819
  }
1806
1820
  });
1807
1821
 
1808
- // src/metrics.ts
1809
- function compareMetrics(db, experimentId, config) {
1810
- const before = getMetricsByExperimentAndPhase(db, experimentId, "before");
1811
- const after = getMetricsByExperimentAndPhase(db, experimentId, "after");
1812
- const fixtures = new Set([...before, ...after].map((m) => m.fixture));
1813
- const trackedMetrics = Object.keys(config.metrics.tracked);
1814
- const comparisons = [];
1815
- for (const fixture of fixtures) {
1816
- for (const metric of trackedMetrics) {
1817
- const b = before.find((m) => m.fixture === fixture && m.metric_name === metric);
1818
- const a = after.find((m) => m.fixture === fixture && m.metric_name === metric);
1819
- if (b && a) {
1820
- const direction = config.metrics.tracked[metric]?.direction ?? "lower_is_better";
1821
- const regression = isRegression(b.metric_value, a.metric_value, direction);
1822
- comparisons.push({
1823
- fixture,
1824
- metric,
1825
- before: b.metric_value,
1826
- after: a.metric_value,
1827
- delta: a.metric_value - b.metric_value,
1828
- regression
1829
- });
1830
- }
1831
- }
1832
- }
1833
- return comparisons;
1834
- }
1835
- function isRegression(before, after, direction) {
1836
- switch (direction) {
1837
- case "lower_is_better":
1838
- return after > before;
1839
- case "higher_is_better":
1840
- return after < before;
1841
- case "closer_to_gt":
1842
- return false;
1822
+ // src/agents/types.ts
1823
+ function getExtractionSchema(role) {
1824
+ switch (role) {
1825
+ case "builder":
1826
+ return '{"decisions": [{"description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string"}]}';
1827
+ case "critic":
1828
+ return '{"doubts": [{"claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical"}]}';
1829
+ case "adversary":
1830
+ return '{"challenges": [{"description": "string", "reasoning": "string"}]}';
1831
+ case "verifier":
1832
+ return '{"grades": [{"component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string"}], "doubt_resolutions": [{"doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive"}]}';
1833
+ case "gatekeeper":
1834
+ return '{"gate_decision": "approve|reject|flag", "reason": "string", "stale_references": ["string"], "overlapping_dead_ends": [0]}';
1835
+ case "reframer":
1836
+ return '{"reframe": {"decomposition": "string", "divergences": ["string"], "recommendation": "string"}}';
1837
+ case "scout":
1838
+ return '{"findings": [{"approach": "string", "source": "string", "relevance": "string", "contradicts_current": true}]}';
1839
+ case "compressor":
1840
+ return '{"compression_report": {"synthesis_delta": "string", "new_dead_ends": ["string"], "fragility_changes": ["string"]}}';
1843
1841
  default:
1844
- return false;
1845
- }
1846
- }
1847
- function parseMetricsOutput(jsonStr) {
1848
- const data = JSON.parse(jsonStr);
1849
- const results = [];
1850
- if (data.fixtures && typeof data.fixtures === "object") {
1851
- for (const [fixture, metrics] of Object.entries(data.fixtures)) {
1852
- for (const [metricName, metricValue] of Object.entries(metrics)) {
1853
- if (typeof metricValue === "number") {
1854
- results.push({ fixture, metric_name: metricName, metric_value: metricValue });
1855
- }
1856
- }
1857
- }
1842
+ return EXTRACTION_SCHEMA;
1858
1843
  }
1859
- return results;
1860
1844
  }
1861
- var init_metrics = __esm({
1862
- "src/metrics.ts"() {
1845
+ var EXTRACTION_SCHEMA, ROLE_REQUIRED_FIELDS;
1846
+ var init_types = __esm({
1847
+ "src/agents/types.ts"() {
1863
1848
  "use strict";
1864
- init_queries();
1849
+ EXTRACTION_SCHEMA = `{
1850
+ "decisions": [{ "description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string" }],
1851
+ "grades": [{ "component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string" }],
1852
+ "doubts": [{ "claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical" }],
1853
+ "guidance": "string (actionable builder guidance)",
1854
+ "doubt_resolutions": [{ "doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive" }]
1855
+ }`;
1856
+ ROLE_REQUIRED_FIELDS = {
1857
+ builder: ["decisions"],
1858
+ critic: ["doubts"],
1859
+ adversary: ["challenges"],
1860
+ verifier: ["grades"],
1861
+ gatekeeper: ["gate_decision"],
1862
+ reframer: ["reframe"],
1863
+ scout: ["findings"],
1864
+ compressor: ["compression_report"]
1865
+ };
1865
1866
  }
1866
1867
  });
1867
1868
 
1868
- // src/commands/measure.ts
1869
- var measure_exports = {};
1870
- __export(measure_exports, {
1871
- baseline: () => baseline,
1872
- compare: () => compare,
1873
- measure: () => measure
1874
- });
1875
- async function baseline(args) {
1876
- await captureMetrics("before", args);
1877
- }
1878
- async function measure(args) {
1879
- await captureMetrics("after", args);
1880
- }
1881
- async function captureMetrics(phase, args) {
1882
- const root = findProjectRoot();
1883
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
1884
- const db = getDb(root);
1885
- const config = loadConfig(root);
1886
- const expIdStr = getFlagValue(args, "--experiment");
1887
- let exp;
1888
- if (expIdStr !== void 0) {
1889
- exp = getExperimentById(db, Number(expIdStr));
1869
+ // src/agents/parse.ts
1870
+ async function extractStructuredData(role, markdown) {
1871
+ const tier1 = extractMajlisJsonBlock(markdown);
1872
+ if (tier1) {
1873
+ const parsed = tryParseJson(tier1);
1874
+ if (parsed) return parsed;
1875
+ console.warn(`[majlis] Malformed JSON in <!-- majlis-json --> block for ${role}. Falling back.`);
1890
1876
  } else {
1891
- exp = getLatestExperiment(db);
1892
- }
1893
- if (!exp) throw new Error('No active experiment. Run `majlis new "hypothesis"` first.');
1894
- if (config.build.pre_measure) {
1895
- info(`Running pre-measure: ${config.build.pre_measure}`);
1896
- try {
1897
- (0, import_node_child_process.execSync)(config.build.pre_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
1898
- } catch {
1899
- warn("Pre-measure command failed \u2014 continuing anyway.");
1900
- }
1877
+ console.warn(`[majlis] No <!-- majlis-json --> block found in ${role} output. Falling back.`);
1901
1878
  }
1902
- if (!config.metrics.command) {
1903
- throw new Error("No metrics.command configured in .majlis/config.json");
1879
+ const tier2 = extractViaPatterns(role, markdown);
1880
+ if (tier2 && hasData(tier2)) {
1881
+ console.warn(`[majlis] Used regex fallback for ${role}. Review extracted data.`);
1882
+ return tier2;
1904
1883
  }
1905
- info(`Running metrics: ${config.metrics.command}`);
1906
- let metricsOutput;
1884
+ console.warn(`[majlis] Regex fallback insufficient for ${role}. Using Haiku extraction.`);
1885
+ const tier3 = await extractViaHaiku(role, markdown);
1886
+ if (tier3) return tier3;
1887
+ console.error(
1888
+ `[majlis] FAILED to extract structured data from ${role} output. State machine will continue but data is missing. Manual review required.`
1889
+ );
1890
+ return null;
1891
+ }
1892
+ function extractMajlisJsonBlock(markdown) {
1893
+ const match = markdown.match(/<!--\s*majlis-json\s*\n([\s\S]*?)-->/);
1894
+ if (!match) return null;
1895
+ return match[1].trim();
1896
+ }
1897
+ function tryParseJson(jsonStr) {
1907
1898
  try {
1908
- metricsOutput = (0, import_node_child_process.execSync)(config.metrics.command, {
1909
- cwd: root,
1910
- encoding: "utf-8",
1911
- stdio: ["pipe", "pipe", "pipe"]
1912
- });
1913
- } catch (err) {
1914
- throw new Error(`Metrics command failed: ${err instanceof Error ? err.message : String(err)}`);
1915
- }
1916
- const parsed = parseMetricsOutput(metricsOutput);
1917
- if (parsed.length === 0) {
1918
- warn("Metrics command returned no data.");
1919
- return;
1899
+ return JSON.parse(jsonStr);
1900
+ } catch {
1901
+ return null;
1920
1902
  }
1921
- for (const m of parsed) {
1922
- insertMetric(db, exp.id, phase, m.fixture, m.metric_name, m.metric_value);
1903
+ }
1904
+ function extractViaPatterns(role, markdown) {
1905
+ const result = {};
1906
+ const decisionPattern = /\[(?:decision|Decision)\].*?(?:description|Description):\s*(.+?)(?:\n|$).*?(?:evidence.?level|Evidence.?Level|level):\s*(proof|test|strong_consensus|consensus|analogy|judgment).*?(?:justification|Justification):\s*(.+?)(?:\n|$)/gis;
1907
+ const decisions = [];
1908
+ const evidenceMarkers = /(?:^|\n)\s*[-*]\s*\*?\*?(?:Decision|DECISION)\*?\*?:\s*(.+?)(?:\n|$).*?(?:Evidence|EVIDENCE|Level):\s*(proof|test|strong_consensus|consensus|analogy|judgment)/gim;
1909
+ let match;
1910
+ while ((match = evidenceMarkers.exec(markdown)) !== null) {
1911
+ decisions.push({
1912
+ description: match[1].trim(),
1913
+ evidence_level: match[2].toLowerCase().trim(),
1914
+ justification: "Extracted via regex \u2014 review"
1915
+ });
1923
1916
  }
1924
- success(`Captured ${parsed.length} metric(s) for ${exp.slug} (phase: ${phase})`);
1925
- if (config.build.post_measure) {
1926
- try {
1927
- (0, import_node_child_process.execSync)(config.build.post_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
1928
- } catch {
1929
- warn("Post-measure command failed.");
1917
+ const inlineTagPattern = /\[(proof|test|strong_consensus|consensus|analogy|judgment)\]\s*(.+?)(?:\n|$)/gi;
1918
+ while ((match = inlineTagPattern.exec(markdown)) !== null) {
1919
+ const desc = match[2].trim();
1920
+ if (!decisions.some((d) => d.description === desc)) {
1921
+ decisions.push({
1922
+ description: desc,
1923
+ evidence_level: match[1].toLowerCase(),
1924
+ justification: "Extracted via regex \u2014 review"
1925
+ });
1930
1926
  }
1931
1927
  }
1932
- }
1933
- async function compare(args, isJson) {
1934
- const root = findProjectRoot();
1935
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
1936
- const db = getDb(root);
1937
- const config = loadConfig(root);
1938
- const expIdStr = getFlagValue(args, "--experiment");
1939
- let exp;
1940
- if (expIdStr !== void 0) {
1941
- exp = getExperimentById(db, Number(expIdStr));
1942
- } else {
1943
- exp = getLatestExperiment(db);
1928
+ if (decisions.length > 0) result.decisions = decisions;
1929
+ const grades = [];
1930
+ const gradePattern = /(?:^|\n)\s*[-*]?\s*\*?\*?(?:Grade|GRADE|Component)\*?\*?.*?(?:component|Component)?\s*[:=]\s*(.+?)(?:\n|,).*?(?:grade|Grade)\s*[:=]\s*(sound|good|weak|rejected)/gim;
1931
+ while ((match = gradePattern.exec(markdown)) !== null) {
1932
+ grades.push({
1933
+ component: match[1].trim(),
1934
+ grade: match[2].toLowerCase().trim()
1935
+ });
1944
1936
  }
1945
- if (!exp) throw new Error("No active experiment.");
1946
- const comparisons = compareMetrics(db, exp.id, config);
1947
- if (comparisons.length === 0) {
1948
- warn(`No before/after metrics to compare for ${exp.slug}. Run baseline and measure first.`);
1949
- return;
1937
+ const simpleGradePattern = /(?:^|\n)\s*[-*]\s*\*?\*?(.+?)\*?\*?\s*[:—–-]\s*\*?\*?(sound|good|weak|rejected)\*?\*?/gim;
1938
+ while ((match = simpleGradePattern.exec(markdown)) !== null) {
1939
+ const comp = match[1].trim();
1940
+ if (!grades.some((g) => g.component === comp)) {
1941
+ grades.push({
1942
+ component: comp,
1943
+ grade: match[2].toLowerCase().trim()
1944
+ });
1945
+ }
1950
1946
  }
1951
- if (isJson) {
1952
- console.log(JSON.stringify({ experiment: exp.slug, comparisons }, null, 2));
1953
- return;
1947
+ if (grades.length > 0) result.grades = grades;
1948
+ const doubts = [];
1949
+ const doubtPattern = /(?:Doubt|DOUBT|Claim doubted|CLAIM)\s*(?:\d+)?[:.]?\s*(.+?)(?:\n|$)[\s\S]*?(?:Severity|SEVERITY)\s*[:=]\s*(minor|moderate|critical)/gim;
1950
+ while ((match = doubtPattern.exec(markdown)) !== null) {
1951
+ doubts.push({
1952
+ claim_doubted: match[1].trim(),
1953
+ evidence_level_of_claim: "unknown",
1954
+ // Don't fabricate — mark as unknown for review
1955
+ evidence_for_doubt: "Extracted via regex \u2014 review original document",
1956
+ severity: match[2].toLowerCase().trim()
1957
+ });
1954
1958
  }
1955
- header(`Metric Comparison \u2014 ${exp.slug}`);
1956
- const regressions = comparisons.filter((c) => c.regression);
1957
- const rows = comparisons.map((c) => [
1958
- c.fixture,
1959
- c.metric,
1960
- String(c.before),
1961
- String(c.after),
1962
- formatDelta(c.delta),
1963
- c.regression ? red("REGRESSION") : green("OK")
1964
- ]);
1965
- console.log(table(["Fixture", "Metric", "Before", "After", "Delta", "Status"], rows));
1966
- if (regressions.length > 0) {
1967
- console.log();
1968
- warn(`${regressions.length} regression(s) detected!`);
1969
- } else {
1970
- console.log();
1971
- success("No regressions detected.");
1959
+ if (doubts.length > 0) result.doubts = doubts;
1960
+ return result;
1961
+ }
1962
+ async function extractViaHaiku(role, markdown) {
1963
+ try {
1964
+ const truncated = markdown.length > 8e3 ? markdown.slice(0, 8e3) + "\n[truncated]" : markdown;
1965
+ const schema = getExtractionSchema(role);
1966
+ const prompt = `Extract structured data from this ${role} document as JSON. Follow this schema exactly: ${schema}
1967
+
1968
+ Document:
1969
+ ${truncated}`;
1970
+ const conversation = (0, import_claude_agent_sdk.query)({
1971
+ prompt,
1972
+ options: {
1973
+ model: "haiku",
1974
+ tools: [],
1975
+ systemPrompt: "You are a JSON extraction assistant. Output only valid JSON matching the requested schema. No markdown, no explanation, just JSON.",
1976
+ permissionMode: "bypassPermissions",
1977
+ allowDangerouslySkipPermissions: true,
1978
+ maxTurns: 1,
1979
+ persistSession: false
1980
+ }
1981
+ });
1982
+ let resultText = "";
1983
+ for await (const message of conversation) {
1984
+ if (message.type === "assistant") {
1985
+ for (const block of message.message.content) {
1986
+ if (block.type === "text") {
1987
+ resultText += block.text;
1988
+ }
1989
+ }
1990
+ }
1991
+ }
1992
+ return tryParseJson(resultText.trim());
1993
+ } catch (err) {
1994
+ console.warn(`[majlis] Haiku extraction failed for ${role}: ${err instanceof Error ? err.message : String(err)}`);
1995
+ return null;
1972
1996
  }
1973
1997
  }
1974
- function formatDelta(delta) {
1975
- const prefix = delta > 0 ? "+" : "";
1976
- return `${prefix}${delta.toFixed(4)}`;
1998
+ function hasData(output) {
1999
+ return !!(output.decisions && output.decisions.length > 0 || output.grades && output.grades.length > 0 || output.doubts && output.doubts.length > 0 || output.challenges && output.challenges.length > 0 || output.findings && output.findings.length > 0 || output.guidance || output.reframe || output.compression_report || output.gate_decision);
1977
2000
  }
1978
- var import_node_child_process;
1979
- var init_measure = __esm({
1980
- "src/commands/measure.ts"() {
2001
+ function validateForRole(role, output) {
2002
+ const required = ROLE_REQUIRED_FIELDS[role];
2003
+ if (!required) return { valid: true, missing: [] };
2004
+ const missing = required.filter((field) => {
2005
+ const value = output[field];
2006
+ if (value === void 0 || value === null) return true;
2007
+ if (Array.isArray(value) && value.length === 0) return true;
2008
+ return false;
2009
+ });
2010
+ return { valid: missing.length === 0, missing };
2011
+ }
2012
+ var import_claude_agent_sdk;
2013
+ var init_parse = __esm({
2014
+ "src/agents/parse.ts"() {
1981
2015
  "use strict";
1982
- import_node_child_process = require("child_process");
1983
- init_connection();
1984
- init_queries();
1985
- init_metrics();
1986
- init_config();
1987
- init_format();
2016
+ init_types();
2017
+ import_claude_agent_sdk = require("@anthropic-ai/claude-agent-sdk");
1988
2018
  }
1989
2019
  });
1990
2020
 
1991
- // src/commands/experiment.ts
1992
- var experiment_exports = {};
1993
- __export(experiment_exports, {
1994
- newExperiment: () => newExperiment,
1995
- revert: () => revert
1996
- });
1997
- async function newExperiment(args) {
1998
- const root = findProjectRoot();
1999
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2000
- const hypothesis = args.filter((a) => !a.startsWith("--")).join(" ");
2001
- if (!hypothesis) {
2002
- throw new Error('Usage: majlis new "hypothesis"');
2003
- }
2004
- const db = getDb(root);
2005
- const config = loadConfig(root);
2006
- const slug = slugify(hypothesis);
2007
- if (getExperimentBySlug(db, slug)) {
2008
- throw new Error(`Experiment with slug "${slug}" already exists.`);
2009
- }
2010
- const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
2011
- const num = allExps.count + 1;
2012
- const paddedNum = String(num).padStart(3, "0");
2013
- const branch = `exp/${paddedNum}-${slug}`;
2014
- try {
2015
- (0, import_node_child_process2.execSync)(`git checkout -b ${branch}`, {
2016
- cwd: root,
2017
- encoding: "utf-8",
2018
- stdio: ["pipe", "pipe", "pipe"]
2019
- });
2020
- info(`Created branch: ${branch}`);
2021
- } catch (err) {
2022
- warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
2021
+ // src/agents/spawn.ts
2022
+ function loadAgentDefinition(role, projectRoot) {
2023
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2024
+ const filePath = path4.join(root, ".majlis", "agents", `${role}.md`);
2025
+ if (!fs4.existsSync(filePath)) {
2026
+ throw new Error(`Agent definition not found: ${filePath}`);
2023
2027
  }
2024
- const subType = getFlagValue(args, "--sub-type") ?? null;
2025
- const exp = createExperiment(db, slug, branch, hypothesis, subType, null);
2026
- success(`Created experiment #${exp.id}: ${exp.slug}`);
2027
- const docsDir = path4.join(root, "docs", "experiments");
2028
- const templatePath = path4.join(docsDir, "_TEMPLATE.md");
2029
- if (fs4.existsSync(templatePath)) {
2030
- const template = fs4.readFileSync(templatePath, "utf-8");
2031
- const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, subType ?? "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
2032
- const logPath = path4.join(docsDir, `${paddedNum}-${slug}.md`);
2033
- fs4.writeFileSync(logPath, logContent);
2034
- info(`Created experiment log: docs/experiments/${paddedNum}-${slug}.md`);
2028
+ const content = fs4.readFileSync(filePath, "utf-8");
2029
+ const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
2030
+ if (!frontmatterMatch) {
2031
+ throw new Error(`Invalid agent definition (missing YAML frontmatter): ${filePath}`);
2035
2032
  }
2036
- if (config.cycle.auto_baseline_on_new_experiment && config.metrics.command) {
2037
- info("Auto-baselining... (run `majlis baseline` to do this manually)");
2038
- try {
2039
- const { baseline: baseline2 } = await Promise.resolve().then(() => (init_measure(), measure_exports));
2040
- await baseline2(["--experiment", String(exp.id)]);
2041
- } catch (err) {
2042
- warn("Auto-baseline failed \u2014 run `majlis baseline` manually.");
2043
- }
2033
+ const frontmatter = frontmatterMatch[1];
2034
+ const body = frontmatterMatch[2].trim();
2035
+ const name = extractYamlField(frontmatter, "name") ?? role;
2036
+ const model = extractYamlField(frontmatter, "model") ?? "opus";
2037
+ const toolsStr = extractYamlField(frontmatter, "tools") ?? "[]";
2038
+ const tools = toolsStr.replace(/[\[\]]/g, "").split(",").map((t) => t.trim()).filter(Boolean);
2039
+ return { name, model, tools, systemPrompt: body };
2040
+ }
2041
+ function buildCheckpointMessage(role, toolUseCount, maxTurns) {
2042
+ const approxTurn = Math.round(toolUseCount / 2);
2043
+ const header2 = `[MAJLIS CHECKPOINT \u2014 ~${approxTurn} of ${maxTurns} turns used]`;
2044
+ switch (role) {
2045
+ case "builder":
2046
+ return `${header2}
2047
+ Reminder: ONE code change per cycle.
2048
+ - Have you run the benchmark? YES \u2192 document results + output JSON + STOP.
2049
+ - If NO \u2192 run it now, then wrap up.
2050
+ Do NOT start a second change or investigate unrelated failures.`;
2051
+ case "verifier":
2052
+ return `${header2}
2053
+ AT MOST 3 diagnostic scripts total.
2054
+ - If \u22653 scripts run \u2192 produce grades + output JSON now.
2055
+ - Trust framework metrics. Do not re-derive from raw data.`;
2056
+ case "critic":
2057
+ return `${header2}
2058
+ Focus on the SINGLE weakest assumption.
2059
+ - Have you identified the core doubt? YES \u2192 write it up + output JSON.
2060
+ - Do not enumerate every possible concern \u2014 pick the most dangerous one.`;
2061
+ case "adversary":
2062
+ return `${header2}
2063
+ Design ONE targeted challenge, not a test suite.
2064
+ - Have you defined the challenge? YES \u2192 write it up + output JSON.
2065
+ - Focus on what would DISPROVE the hypothesis, not general testing.`;
2066
+ case "compressor":
2067
+ return `${header2}
2068
+ You may ONLY write to docs/synthesis/.
2069
+ - Have you updated current.md, fragility.md, dead-ends.md?
2070
+ - If yes \u2192 output compression report JSON.
2071
+ - Do NOT write to MEMORY.md or files outside docs/synthesis/.`;
2072
+ default:
2073
+ return `${header2}
2074
+ Check: is your core task done? If yes, wrap up and output JSON.`;
2075
+ }
2076
+ }
2077
+ function buildPreToolUseGuards(role) {
2078
+ if (role === "compressor") {
2079
+ const guardHook = async (input) => {
2080
+ const toolInput = input.tool_input ?? {};
2081
+ const filePath = toolInput.file_path ?? "";
2082
+ if (filePath && !filePath.includes("/docs/synthesis/")) {
2083
+ return {
2084
+ decision: "block",
2085
+ reason: `Compressor may only write to docs/synthesis/. Blocked: ${filePath}`
2086
+ };
2087
+ }
2088
+ return {};
2089
+ };
2090
+ return [
2091
+ { matcher: "Write", hooks: [guardHook] },
2092
+ { matcher: "Edit", hooks: [guardHook] }
2093
+ ];
2044
2094
  }
2095
+ return void 0;
2045
2096
  }
2046
- async function revert(args) {
2047
- const root = findProjectRoot();
2048
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2049
- const db = getDb(root);
2050
- let exp;
2051
- const slugArg = args.filter((a) => !a.startsWith("--"))[0];
2052
- if (slugArg) {
2053
- exp = getExperimentBySlug(db, slugArg);
2054
- if (!exp) throw new Error(`Experiment not found: ${slugArg}`);
2055
- } else {
2056
- exp = getLatestExperiment(db);
2057
- if (!exp) throw new Error("No active experiments to revert.");
2097
+ function buildAgentHooks(role, maxTurns) {
2098
+ const result = {};
2099
+ let hasHooks = false;
2100
+ const interval = CHECKPOINT_INTERVAL[role];
2101
+ if (interval) {
2102
+ let toolUseCount = 0;
2103
+ const checkpointHook = async () => {
2104
+ toolUseCount++;
2105
+ if (toolUseCount % interval === 0) {
2106
+ const msg = buildCheckpointMessage(role, toolUseCount, maxTurns);
2107
+ return {
2108
+ hookSpecificOutput: {
2109
+ hookEventName: "PostToolUse",
2110
+ additionalContext: msg
2111
+ }
2112
+ };
2113
+ }
2114
+ return {};
2115
+ };
2116
+ result.PostToolUse = [{ hooks: [checkpointHook] }];
2117
+ hasHooks = true;
2058
2118
  }
2059
- const reason = getFlagValue(args, "--reason") ?? "Manually reverted";
2060
- const category = args.includes("--structural") ? "structural" : "procedural";
2061
- insertDeadEnd(
2062
- db,
2063
- exp.id,
2064
- exp.hypothesis ?? exp.slug,
2065
- reason,
2066
- `Reverted: ${reason}`,
2067
- exp.sub_type,
2068
- category
2069
- );
2070
- updateExperimentStatus(db, exp.id, "dead_end");
2071
- try {
2072
- const currentBranch = (0, import_node_child_process2.execSync)("git rev-parse --abbrev-ref HEAD", {
2073
- cwd: root,
2074
- encoding: "utf-8"
2075
- }).trim();
2076
- if (currentBranch === exp.branch) {
2077
- (0, import_node_child_process2.execSync)("git checkout main 2>/dev/null || git checkout master", {
2078
- cwd: root,
2079
- encoding: "utf-8",
2080
- stdio: ["pipe", "pipe", "pipe"]
2081
- });
2082
- }
2083
- } catch {
2084
- warn("Could not switch git branches \u2014 do this manually.");
2119
+ const guards = buildPreToolUseGuards(role);
2120
+ if (guards) {
2121
+ result.PreToolUse = guards;
2122
+ hasHooks = true;
2085
2123
  }
2086
- info(`Experiment ${exp.slug} reverted to dead-end. Reason: ${reason}`);
2124
+ return hasHooks ? result : void 0;
2087
2125
  }
2088
- function slugify(text) {
2089
- return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
2126
+ function extractYamlField(yaml, field) {
2127
+ const match = yaml.match(new RegExp(`^${field}:\\s*(.+)$`, "m"));
2128
+ return match ? match[1].trim() : null;
2090
2129
  }
2091
- var fs4, path4, import_node_child_process2;
2092
- var init_experiment = __esm({
2093
- "src/commands/experiment.ts"() {
2094
- "use strict";
2095
- fs4 = __toESM(require("fs"));
2096
- path4 = __toESM(require("path"));
2097
- import_node_child_process2 = require("child_process");
2098
- init_connection();
2099
- init_queries();
2100
- init_config();
2101
- init_format();
2102
- }
2103
- });
2130
+ async function spawnAgent(role, context, projectRoot) {
2131
+ const agentDef = loadAgentDefinition(role, projectRoot);
2132
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2133
+ const taskPrompt = context.taskPrompt ?? `Perform your role as ${agentDef.name}.`;
2134
+ const contextJson = JSON.stringify(context);
2135
+ const prompt = `Here is your context:
2104
2136
 
2105
- // src/commands/session.ts
2106
- var session_exports = {};
2107
- __export(session_exports, {
2108
- session: () => session
2109
- });
2110
- async function session(args) {
2111
- const subcommand = args[0];
2112
- if (!subcommand || subcommand !== "start" && subcommand !== "end") {
2113
- throw new Error('Usage: majlis session start "intent" | majlis session end');
2137
+ \`\`\`json
2138
+ ${contextJson}
2139
+ \`\`\`
2140
+
2141
+ ${taskPrompt}`;
2142
+ const turns = ROLE_MAX_TURNS[role] ?? 15;
2143
+ console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
2144
+ const { text: markdown, costUsd, truncated } = await runQuery({
2145
+ prompt,
2146
+ model: agentDef.model,
2147
+ tools: agentDef.tools,
2148
+ systemPrompt: agentDef.systemPrompt,
2149
+ cwd: root,
2150
+ maxTurns: turns,
2151
+ label: role,
2152
+ role
2153
+ });
2154
+ console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
2155
+ const artifactPath = writeArtifact(role, context, markdown, root);
2156
+ if (artifactPath) {
2157
+ console.log(`[${role}] Artifact written to ${artifactPath}`);
2114
2158
  }
2115
- const root = findProjectRoot();
2116
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2117
- const db = getDb(root);
2118
- if (subcommand === "start") {
2119
- const intent = args.slice(1).filter((a) => !a.startsWith("--")).join(" ");
2120
- if (!intent) {
2121
- throw new Error('Usage: majlis session start "intent"');
2159
+ const structured = await extractStructuredData(role, markdown);
2160
+ if (structured) {
2161
+ const { valid, missing } = validateForRole(role, structured);
2162
+ if (!valid) {
2163
+ console.warn(`[${role}] Output missing expected fields: ${missing.join(", ")}`);
2122
2164
  }
2123
- const existing = getActiveSession(db);
2124
- if (existing) {
2125
- warn(`Session already active: "${existing.intent}" (started ${existing.started_at})`);
2126
- warn("End it first with `majlis session end`.");
2127
- return;
2128
- }
2129
- const latestExp = getLatestExperiment(db);
2130
- const sess = startSession(db, intent, latestExp?.id ?? null);
2131
- success(`Session started: "${intent}" (id: ${sess.id})`);
2132
- if (latestExp) {
2133
- info(`Linked to experiment: ${latestExp.slug} (${latestExp.status})`);
2134
- }
2135
- } else {
2136
- const active = getActiveSession(db);
2137
- if (!active) {
2138
- throw new Error("No active session to end.");
2139
- }
2140
- const accomplished = getFlagValue(args, "--accomplished") ?? null;
2141
- const unfinished = getFlagValue(args, "--unfinished") ?? null;
2142
- const fragility = getFlagValue(args, "--fragility") ?? null;
2143
- endSession(db, active.id, accomplished, unfinished, fragility);
2144
- success(`Session ended: "${active.intent}"`);
2145
- if (accomplished) info(`Accomplished: ${accomplished}`);
2146
- if (unfinished) info(`Unfinished: ${unfinished}`);
2147
- if (fragility) warn(`New fragility: ${fragility}`);
2148
2165
  }
2166
+ return { output: markdown, structured, truncated };
2149
2167
  }
2150
- var init_session = __esm({
2151
- "src/commands/session.ts"() {
2152
- "use strict";
2153
- init_connection();
2154
- init_queries();
2155
- init_config();
2156
- init_format();
2157
- }
2158
- });
2168
+ async function spawnSynthesiser(context, projectRoot) {
2169
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2170
+ const contextJson = JSON.stringify(context);
2171
+ const taskPrompt = context.taskPrompt ?? "Synthesise the findings into actionable builder guidance.";
2172
+ const prompt = `Here is your context:
2159
2173
 
2160
- // src/commands/query.ts
2161
- var query_exports = {};
2162
- __export(query_exports, {
2163
- query: () => query
2164
- });
2165
- async function query(command, args, isJson) {
2166
- const root = findProjectRoot();
2167
- if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2168
- const db = getDb(root);
2169
- switch (command) {
2170
- case "decisions":
2171
- return queryDecisions(db, args, isJson);
2172
- case "dead-ends":
2173
- return queryDeadEnds(db, args, isJson);
2174
- case "fragility":
2175
- return queryFragility(root, isJson);
2176
- case "history":
2177
- return queryHistory(db, args, isJson);
2178
- case "circuit-breakers":
2179
- return queryCircuitBreakers(db, root, isJson);
2180
- case "check-commit":
2181
- return checkCommit(db);
2182
- }
2174
+ \`\`\`json
2175
+ ${contextJson}
2176
+ \`\`\`
2177
+
2178
+ ${taskPrompt}`;
2179
+ const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2180
+ console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
2181
+ const { text: markdown, costUsd, truncated } = await runQuery({
2182
+ prompt,
2183
+ model: "sonnet",
2184
+ tools: ["Read", "Glob", "Grep"],
2185
+ systemPrompt,
2186
+ cwd: root,
2187
+ maxTurns: 5,
2188
+ label: "synthesiser",
2189
+ role: "synthesiser"
2190
+ });
2191
+ console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
2192
+ return { output: markdown, structured: { guidance: markdown }, truncated };
2183
2193
  }
2184
- function queryDecisions(db, args, isJson) {
2185
- const level = getFlagValue(args, "--level");
2186
- const expIdStr = getFlagValue(args, "--experiment");
2187
- const experimentId = expIdStr !== void 0 ? Number(expIdStr) : void 0;
2188
- const decisions = listAllDecisions(db, level, experimentId);
2189
- if (isJson) {
2190
- console.log(JSON.stringify(decisions, null, 2));
2191
- return;
2192
- }
2193
- if (decisions.length === 0) {
2194
- info("No decisions found.");
2195
- return;
2196
- }
2197
- header("Decisions");
2198
- const rows = decisions.map((d) => [
2199
- String(d.id),
2200
- String(d.experiment_id),
2201
- evidenceColor(d.evidence_level),
2202
- d.description.slice(0, 60) + (d.description.length > 60 ? "..." : ""),
2203
- d.status
2204
- ]);
2205
- console.log(table(["ID", "Exp", "Level", "Description", "Status"], rows));
2194
+ async function spawnRecovery(role, partialOutput, context, projectRoot) {
2195
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2196
+ const expSlug = context.experiment?.slug ?? "unknown";
2197
+ console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
2198
+ const expDocPath = path4.join(
2199
+ root,
2200
+ "docs",
2201
+ "experiments",
2202
+ `${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
2203
+ );
2204
+ const templatePath = path4.join(root, "docs", "experiments", "_TEMPLATE.md");
2205
+ const template = fs4.existsSync(templatePath) ? fs4.readFileSync(templatePath, "utf-8") : "";
2206
+ const currentDoc = fs4.existsSync(expDocPath) ? fs4.readFileSync(expDocPath, "utf-8") : "";
2207
+ const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
2208
+
2209
+ Here is the partial agent output (reasoning + tool calls):
2210
+ <partial_output>
2211
+ ${partialOutput.slice(-3e3)}
2212
+ </partial_output>
2213
+
2214
+ Here is the current experiment doc:
2215
+ <current_doc>
2216
+ ${currentDoc}
2217
+ </current_doc>
2218
+
2219
+ Here is the template that the experiment doc should follow:
2220
+ <template>
2221
+ ${template}
2222
+ </template>
2223
+
2224
+ Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
2225
+ - Keep any valid content from the current doc
2226
+ - Fill in what you can infer from the partial output
2227
+ - Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
2228
+ - The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
2229
+ - Do NOT include agent reasoning or thinking \u2014 only structured experiment content
2230
+ - Be concise. This is cleanup, not new work.`;
2231
+ const { text: _markdown } = await runQuery({
2232
+ prompt,
2233
+ model: "haiku",
2234
+ tools: ["Read", "Write"],
2235
+ systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
2236
+ cwd: root,
2237
+ maxTurns: 5,
2238
+ label: "recovery",
2239
+ role: "recovery"
2240
+ });
2241
+ console.log(`[recovery] Cleanup complete for ${expSlug}.`);
2206
2242
  }
2207
- function queryDeadEnds(db, args, isJson) {
2208
- const subType = getFlagValue(args, "--sub-type");
2209
- const searchTerm = getFlagValue(args, "--search");
2210
- let deadEnds;
2211
- if (subType) {
2212
- deadEnds = listDeadEndsBySubType(db, subType);
2213
- } else if (searchTerm) {
2214
- deadEnds = searchDeadEnds(db, searchTerm);
2215
- } else {
2216
- deadEnds = listAllDeadEnds(db);
2217
- }
2218
- if (isJson) {
2219
- console.log(JSON.stringify(deadEnds, null, 2));
2220
- return;
2221
- }
2222
- if (deadEnds.length === 0) {
2223
- info("No dead-ends recorded.");
2224
- return;
2243
+ async function runQuery(opts) {
2244
+ let truncated = false;
2245
+ const tag = opts.label ?? "majlis";
2246
+ const hooks = opts.role ? buildAgentHooks(opts.role, opts.maxTurns ?? 15) : void 0;
2247
+ const conversation = (0, import_claude_agent_sdk2.query)({
2248
+ prompt: opts.prompt,
2249
+ options: {
2250
+ model: opts.model,
2251
+ tools: opts.tools,
2252
+ systemPrompt: {
2253
+ type: "preset",
2254
+ preset: "claude_code",
2255
+ append: opts.systemPrompt
2256
+ },
2257
+ cwd: opts.cwd,
2258
+ permissionMode: "bypassPermissions",
2259
+ allowDangerouslySkipPermissions: true,
2260
+ maxTurns: opts.maxTurns ?? 15,
2261
+ persistSession: false,
2262
+ settingSources: ["project"],
2263
+ hooks
2264
+ }
2265
+ });
2266
+ const textParts = [];
2267
+ let costUsd = 0;
2268
+ let turnCount = 0;
2269
+ for await (const message of conversation) {
2270
+ if (message.type === "assistant") {
2271
+ turnCount++;
2272
+ let hasText = false;
2273
+ for (const block of message.message.content) {
2274
+ if (block.type === "text") {
2275
+ textParts.push(block.text);
2276
+ hasText = true;
2277
+ } else if (block.type === "tool_use") {
2278
+ const toolName = block.name ?? "tool";
2279
+ const input = block.input ?? {};
2280
+ const detail = formatToolDetail(toolName, input);
2281
+ process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
2282
+ `);
2283
+ }
2284
+ }
2285
+ if (hasText) {
2286
+ const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
2287
+ if (preview) {
2288
+ process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
2289
+ `);
2290
+ }
2291
+ }
2292
+ } else if (message.type === "tool_progress") {
2293
+ const elapsed = Math.round(message.elapsed_time_seconds);
2294
+ if (elapsed > 0 && elapsed % 5 === 0) {
2295
+ process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
2296
+ `);
2297
+ }
2298
+ } else if (message.type === "result") {
2299
+ if (message.subtype === "success") {
2300
+ costUsd = message.total_cost_usd;
2301
+ } else if (message.subtype === "error_max_turns") {
2302
+ truncated = true;
2303
+ costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
2304
+ console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
2305
+ } else {
2306
+ const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
2307
+ throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
2308
+ }
2309
+ }
2225
2310
  }
2226
- header("Dead-End Registry");
2227
- const rows = deadEnds.map((d) => [
2228
- String(d.id),
2229
- d.sub_type ?? "\u2014",
2230
- d.approach.slice(0, 40) + (d.approach.length > 40 ? "..." : ""),
2231
- d.structural_constraint.slice(0, 40) + (d.structural_constraint.length > 40 ? "..." : "")
2232
- ]);
2233
- console.log(table(["ID", "Sub-Type", "Approach", "Constraint"], rows));
2311
+ return { text: textParts.join("\n\n"), costUsd, truncated };
2234
2312
  }
2235
- function queryFragility(root, isJson) {
2236
- const fragPath = path5.join(root, "docs", "synthesis", "fragility.md");
2237
- if (!fs5.existsSync(fragPath)) {
2238
- info("No fragility map found.");
2239
- return;
2240
- }
2241
- const content = fs5.readFileSync(fragPath, "utf-8");
2242
- if (isJson) {
2243
- console.log(JSON.stringify({ content }, null, 2));
2244
- return;
2313
+ async function generateSlug(hypothesis, projectRoot) {
2314
+ const fallback = hypothesis.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 30).replace(/-$/, "");
2315
+ try {
2316
+ const { text } = await runQuery({
2317
+ prompt: `Generate a short, descriptive git branch slug (2-4 words, lowercase, hyphen-separated) for this experiment hypothesis:
2318
+
2319
+ "${hypothesis.slice(0, 500)}"
2320
+
2321
+ Output ONLY the slug, nothing else. Examples: uv-containment-filter, skip-degenerate-faces, fix-edge-sewing-order`,
2322
+ model: "haiku",
2323
+ tools: [],
2324
+ systemPrompt: "Output only a short hyphenated slug. No explanation, no quotes, no punctuation except hyphens.",
2325
+ cwd: projectRoot,
2326
+ maxTurns: 1,
2327
+ label: "slug",
2328
+ role: "slug"
2329
+ });
2330
+ const slug = text.trim().toLowerCase().replace(/[^a-z0-9-]+/g, "").replace(/^-|-$/g, "").slice(0, 40);
2331
+ return slug.length >= 3 ? slug : fallback;
2332
+ } catch {
2333
+ return fallback;
2245
2334
  }
2246
- header("Fragility Map");
2247
- console.log(content);
2248
2335
  }
2249
- function queryHistory(db, args, isJson) {
2250
- const fixture = args.filter((a) => !a.startsWith("--"))[0];
2251
- if (!fixture) {
2252
- throw new Error("Usage: majlis history <fixture>");
2253
- }
2254
- const history = getMetricHistoryByFixture(db, fixture);
2255
- if (isJson) {
2256
- console.log(JSON.stringify(history, null, 2));
2257
- return;
2336
+ function formatToolDetail(toolName, input) {
2337
+ switch (toolName) {
2338
+ case "Read":
2339
+ return input.file_path ? ` ${input.file_path}` : "";
2340
+ case "Write":
2341
+ return input.file_path ? ` \u2192 ${input.file_path}` : "";
2342
+ case "Edit":
2343
+ return input.file_path ? ` ${input.file_path}` : "";
2344
+ case "Glob":
2345
+ return input.pattern ? ` ${input.pattern}` : "";
2346
+ case "Grep":
2347
+ return input.pattern ? ` /${input.pattern}/` : "";
2348
+ case "Bash":
2349
+ return input.command ? ` $ ${input.command.slice(0, 80)}` : "";
2350
+ case "WebSearch":
2351
+ return input.query ? ` "${input.query}"` : "";
2352
+ default:
2353
+ return "";
2258
2354
  }
2259
- if (history.length === 0) {
2260
- info(`No metric history for fixture: ${fixture}`);
2261
- return;
2355
+ }
2356
+ function writeArtifact(role, context, markdown, projectRoot) {
2357
+ const dirMap = {
2358
+ builder: "docs/experiments",
2359
+ critic: "docs/doubts",
2360
+ adversary: "docs/challenges",
2361
+ verifier: "docs/verification",
2362
+ reframer: "docs/reframes",
2363
+ compressor: "docs/synthesis",
2364
+ scout: "docs/rihla"
2365
+ };
2366
+ const dir = dirMap[role];
2367
+ if (!dir) return null;
2368
+ if (role === "builder" || role === "compressor") return null;
2369
+ const fullDir = path4.join(projectRoot, dir);
2370
+ if (!fs4.existsSync(fullDir)) {
2371
+ fs4.mkdirSync(fullDir, { recursive: true });
2262
2372
  }
2263
- header(`Metric History \u2014 ${fixture}`);
2264
- const rows = history.map((h) => [
2265
- String(h.experiment_id),
2266
- h.experiment_slug ?? "\u2014",
2267
- h.phase,
2268
- h.metric_name,
2269
- String(h.metric_value),
2270
- h.captured_at
2271
- ]);
2272
- console.log(table(["Exp", "Slug", "Phase", "Metric", "Value", "Captured"], rows));
2373
+ const expSlug = context.experiment?.slug ?? "general";
2374
+ const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
2375
+ const filename = `${nextNum}-${role}-${expSlug}.md`;
2376
+ const target = path4.join(fullDir, filename);
2377
+ fs4.writeFileSync(target, markdown);
2378
+ return target;
2273
2379
  }
2274
- function queryCircuitBreakers(db, root, isJson) {
2275
- const config = loadConfig(root);
2276
- const states = getAllCircuitBreakerStates(db, config.cycle.circuit_breaker_threshold);
2277
- if (isJson) {
2278
- console.log(JSON.stringify(states, null, 2));
2279
- return;
2380
+ var fs4, path4, import_claude_agent_sdk2, ROLE_MAX_TURNS, CHECKPOINT_INTERVAL, DIM2, RESET2, CYAN2;
2381
+ var init_spawn = __esm({
2382
+ "src/agents/spawn.ts"() {
2383
+ "use strict";
2384
+ fs4 = __toESM(require("fs"));
2385
+ path4 = __toESM(require("path"));
2386
+ import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
2387
+ init_parse();
2388
+ init_connection();
2389
+ ROLE_MAX_TURNS = {
2390
+ builder: 50,
2391
+ critic: 30,
2392
+ adversary: 30,
2393
+ verifier: 50,
2394
+ compressor: 30,
2395
+ reframer: 20,
2396
+ scout: 20,
2397
+ gatekeeper: 10
2398
+ };
2399
+ CHECKPOINT_INTERVAL = {
2400
+ builder: 15,
2401
+ verifier: 12,
2402
+ critic: 15,
2403
+ adversary: 15,
2404
+ compressor: 15
2405
+ };
2406
+ DIM2 = "\x1B[2m";
2407
+ RESET2 = "\x1B[0m";
2408
+ CYAN2 = "\x1B[36m";
2280
2409
  }
2281
- if (states.length === 0) {
2282
- info("No circuit breaker data.");
2283
- return;
2410
+ });
2411
+
2412
+ // src/metrics.ts
2413
+ function compareMetrics(db, experimentId, config) {
2414
+ const before = getMetricsByExperimentAndPhase(db, experimentId, "before");
2415
+ const after = getMetricsByExperimentAndPhase(db, experimentId, "after");
2416
+ const fixtures = new Set([...before, ...after].map((m) => m.fixture));
2417
+ const trackedMetrics = Object.keys(config.metrics.tracked);
2418
+ const comparisons = [];
2419
+ for (const fixture of fixtures) {
2420
+ for (const metric of trackedMetrics) {
2421
+ const b = before.find((m) => m.fixture === fixture && m.metric_name === metric);
2422
+ const a = after.find((m) => m.fixture === fixture && m.metric_name === metric);
2423
+ if (b && a) {
2424
+ const direction = config.metrics.tracked[metric]?.direction ?? "lower_is_better";
2425
+ const regression = isRegression(b.metric_value, a.metric_value, direction);
2426
+ comparisons.push({
2427
+ fixture,
2428
+ metric,
2429
+ before: b.metric_value,
2430
+ after: a.metric_value,
2431
+ delta: a.metric_value - b.metric_value,
2432
+ regression
2433
+ });
2434
+ }
2435
+ }
2284
2436
  }
2285
- header("Circuit Breakers");
2286
- const rows = states.map((s) => [
2287
- s.sub_type,
2288
- String(s.failure_count),
2289
- String(config.cycle.circuit_breaker_threshold),
2290
- s.tripped ? red("TRIPPED") : green("OK")
2291
- ]);
2292
- console.log(table(["Sub-Type", "Failures", "Threshold", "Status"], rows));
2437
+ return comparisons;
2293
2438
  }
2294
- function checkCommit(db) {
2295
- let stdinData = "";
2296
- try {
2297
- stdinData = fs5.readFileSync(0, "utf-8");
2298
- } catch {
2439
+ function isRegression(before, after, direction) {
2440
+ switch (direction) {
2441
+ case "lower_is_better":
2442
+ return after > before;
2443
+ case "higher_is_better":
2444
+ return after < before;
2445
+ case "closer_to_gt":
2446
+ return false;
2447
+ default:
2448
+ return false;
2299
2449
  }
2300
- if (stdinData) {
2301
- try {
2302
- const hookInput = JSON.parse(stdinData);
2303
- const command = hookInput?.tool_input?.command ?? "";
2304
- if (!command.includes("git commit")) {
2305
- return;
2450
+ }
2451
+ function parseMetricsOutput(jsonStr) {
2452
+ const data = JSON.parse(jsonStr);
2453
+ const results = [];
2454
+ if (data.fixtures && typeof data.fixtures === "object") {
2455
+ for (const [fixture, metrics] of Object.entries(data.fixtures)) {
2456
+ for (const [metricName, metricValue] of Object.entries(metrics)) {
2457
+ if (typeof metricValue === "number") {
2458
+ results.push({ fixture, metric_name: metricName, metric_value: metricValue });
2459
+ }
2306
2460
  }
2307
- } catch {
2308
- }
2309
- }
2310
- const active = listActiveExperiments(db);
2311
- const unverified = active.filter(
2312
- (e) => !["merged", "dead_end", "verified", "resolved", "compressed"].includes(e.status)
2313
- );
2314
- if (unverified.length > 0) {
2315
- console.error(`[majlis] ${unverified.length} unverified experiment(s):`);
2316
- for (const e of unverified) {
2317
- console.error(` - ${e.slug} (${e.status})`);
2318
2461
  }
2319
- process.exit(1);
2320
2462
  }
2463
+ return results;
2321
2464
  }
2322
- var fs5, path5;
2323
- var init_query = __esm({
2324
- "src/commands/query.ts"() {
2465
+ var init_metrics = __esm({
2466
+ "src/metrics.ts"() {
2325
2467
  "use strict";
2326
- fs5 = __toESM(require("fs"));
2327
- path5 = __toESM(require("path"));
2328
- init_connection();
2329
2468
  init_queries();
2330
- init_config();
2331
- init_format();
2332
2469
  }
2333
2470
  });
2334
2471
 
2335
- // src/state/types.ts
2336
- var TRANSITIONS, GRADE_ORDER;
2337
- var init_types = __esm({
2338
- "src/state/types.ts"() {
2339
- "use strict";
2340
- TRANSITIONS = {
2341
- ["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "gated" /* GATED */],
2342
- ["reframed" /* REFRAMED */]: ["gated" /* GATED */],
2343
- ["gated" /* GATED */]: ["building" /* BUILDING */, "gated" /* GATED */],
2344
- // self-loop for rejected hypotheses
2345
- ["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
2346
- // self-loop for retry after truncation
2347
- ["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
2348
- ["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
2349
- ["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
2350
- ["scouted" /* SCOUTED */]: ["verifying" /* VERIFYING */],
2351
- ["verifying" /* VERIFYING */]: ["verified" /* VERIFIED */],
2352
- ["verified" /* VERIFIED */]: ["resolved" /* RESOLVED */],
2353
- ["resolved" /* RESOLVED */]: ["compressed" /* COMPRESSED */, "building" /* BUILDING */],
2354
- // cycle-back skips gate
2355
- ["compressed" /* COMPRESSED */]: ["merged" /* MERGED */, "building" /* BUILDING */],
2356
- // cycle-back skips gate
2357
- ["merged" /* MERGED */]: [],
2358
- ["dead_end" /* DEAD_END */]: []
2359
- };
2360
- GRADE_ORDER = ["rejected", "weak", "good", "sound"];
2361
- }
2472
+ // src/commands/measure.ts
2473
+ var measure_exports = {};
2474
+ __export(measure_exports, {
2475
+ baseline: () => baseline,
2476
+ compare: () => compare,
2477
+ measure: () => measure
2362
2478
  });
2363
-
2364
- // src/state/machine.ts
2365
- function transition(current, target) {
2366
- const valid = TRANSITIONS[current];
2367
- if (!valid.includes(target)) {
2368
- throw new Error(
2369
- `Invalid transition: ${current} \u2192 ${target}. Valid: [${valid.join(", ")}]`
2370
- );
2371
- }
2372
- return target;
2373
- }
2374
- function validNext(current) {
2375
- return TRANSITIONS[current];
2479
+ async function baseline(args) {
2480
+ await captureMetrics("before", args);
2376
2481
  }
2377
- function isTerminal(status2) {
2378
- return TRANSITIONS[status2].length === 0;
2482
+ async function measure(args) {
2483
+ await captureMetrics("after", args);
2379
2484
  }
2380
- function determineNextStep(exp, valid, hasDoubts2, hasChallenges2) {
2381
- if (valid.length === 0) {
2382
- throw new Error(`Experiment ${exp.slug} is terminal (${exp.status})`);
2485
+ async function captureMetrics(phase, args) {
2486
+ const root = findProjectRoot();
2487
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2488
+ const db = getDb(root);
2489
+ const config = loadConfig(root);
2490
+ const expIdStr = getFlagValue(args, "--experiment");
2491
+ let exp;
2492
+ if (expIdStr !== void 0) {
2493
+ exp = getExperimentById(db, Number(expIdStr));
2494
+ } else {
2495
+ exp = getLatestExperiment(db);
2383
2496
  }
2384
- const status2 = exp.status;
2385
- if (status2 === "classified" /* CLASSIFIED */ || status2 === "reframed" /* REFRAMED */) {
2386
- return valid.includes("gated" /* GATED */) ? "gated" /* GATED */ : valid[0];
2497
+ if (!exp) throw new Error('No active experiment. Run `majlis new "hypothesis"` first.');
2498
+ if (config.build.pre_measure) {
2499
+ info(`Running pre-measure: ${config.build.pre_measure}`);
2500
+ try {
2501
+ (0, import_node_child_process.execSync)(config.build.pre_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
2502
+ } catch {
2503
+ warn("Pre-measure command failed \u2014 continuing anyway.");
2504
+ }
2387
2505
  }
2388
- if (status2 === "gated" /* GATED */) {
2389
- return valid.includes("building" /* BUILDING */) ? "building" /* BUILDING */ : valid[0];
2506
+ if (!config.metrics.command) {
2507
+ throw new Error("No metrics.command configured in .majlis/config.json");
2390
2508
  }
2391
- if (status2 === "built" /* BUILT */ && !hasDoubts2) {
2392
- return valid.includes("doubted" /* DOUBTED */) ? "doubted" /* DOUBTED */ : valid[0];
2509
+ info(`Running metrics: ${config.metrics.command}`);
2510
+ let metricsOutput;
2511
+ try {
2512
+ metricsOutput = (0, import_node_child_process.execSync)(config.metrics.command, {
2513
+ cwd: root,
2514
+ encoding: "utf-8",
2515
+ stdio: ["pipe", "pipe", "pipe"]
2516
+ });
2517
+ } catch (err) {
2518
+ throw new Error(`Metrics command failed: ${err instanceof Error ? err.message : String(err)}`);
2393
2519
  }
2394
- if (status2 === "doubted" /* DOUBTED */ && !hasChallenges2) {
2395
- return valid.includes("challenged" /* CHALLENGED */) ? "challenged" /* CHALLENGED */ : valid[0];
2520
+ const parsed = parseMetricsOutput(metricsOutput);
2521
+ if (parsed.length === 0) {
2522
+ warn("Metrics command returned no data.");
2523
+ return;
2396
2524
  }
2397
- if (status2 === "doubted" /* DOUBTED */ || status2 === "challenged" /* CHALLENGED */) {
2398
- if (valid.includes("verifying" /* VERIFYING */)) {
2399
- return "verifying" /* VERIFYING */;
2525
+ for (const m of parsed) {
2526
+ insertMetric(db, exp.id, phase, m.fixture, m.metric_name, m.metric_value);
2527
+ }
2528
+ success(`Captured ${parsed.length} metric(s) for ${exp.slug} (phase: ${phase})`);
2529
+ if (config.build.post_measure) {
2530
+ try {
2531
+ (0, import_node_child_process.execSync)(config.build.post_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
2532
+ } catch {
2533
+ warn("Post-measure command failed.");
2400
2534
  }
2401
2535
  }
2402
- return valid[0];
2403
2536
  }
2404
- var init_machine = __esm({
2405
- "src/state/machine.ts"() {
2406
- "use strict";
2407
- init_types();
2537
+ async function compare(args, isJson) {
2538
+ const root = findProjectRoot();
2539
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2540
+ const db = getDb(root);
2541
+ const config = loadConfig(root);
2542
+ const expIdStr = getFlagValue(args, "--experiment");
2543
+ let exp;
2544
+ if (expIdStr !== void 0) {
2545
+ exp = getExperimentById(db, Number(expIdStr));
2546
+ } else {
2547
+ exp = getLatestExperiment(db);
2408
2548
  }
2409
- });
2410
-
2411
- // src/agents/types.ts
2412
- function getExtractionSchema(role) {
2413
- switch (role) {
2414
- case "builder":
2415
- return '{"decisions": [{"description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string"}]}';
2416
- case "critic":
2417
- return '{"doubts": [{"claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical"}]}';
2418
- case "adversary":
2419
- return '{"challenges": [{"description": "string", "reasoning": "string"}]}';
2420
- case "verifier":
2421
- return '{"grades": [{"component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string"}], "doubt_resolutions": [{"doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive"}]}';
2422
- case "gatekeeper":
2423
- return '{"gate_decision": "approve|reject|flag", "reason": "string", "stale_references": ["string"], "overlapping_dead_ends": [0]}';
2424
- case "reframer":
2425
- return '{"reframe": {"decomposition": "string", "divergences": ["string"], "recommendation": "string"}}';
2426
- case "scout":
2427
- return '{"findings": [{"approach": "string", "source": "string", "relevance": "string", "contradicts_current": true}]}';
2428
- case "compressor":
2429
- return '{"compression_report": {"synthesis_delta": "string", "new_dead_ends": ["string"], "fragility_changes": ["string"]}}';
2430
- default:
2431
- return EXTRACTION_SCHEMA;
2549
+ if (!exp) throw new Error("No active experiment.");
2550
+ const comparisons = compareMetrics(db, exp.id, config);
2551
+ if (comparisons.length === 0) {
2552
+ warn(`No before/after metrics to compare for ${exp.slug}. Run baseline and measure first.`);
2553
+ return;
2554
+ }
2555
+ if (isJson) {
2556
+ console.log(JSON.stringify({ experiment: exp.slug, comparisons }, null, 2));
2557
+ return;
2558
+ }
2559
+ header(`Metric Comparison \u2014 ${exp.slug}`);
2560
+ const regressions = comparisons.filter((c) => c.regression);
2561
+ const rows = comparisons.map((c) => [
2562
+ c.fixture,
2563
+ c.metric,
2564
+ String(c.before),
2565
+ String(c.after),
2566
+ formatDelta(c.delta),
2567
+ c.regression ? red("REGRESSION") : green("OK")
2568
+ ]);
2569
+ console.log(table(["Fixture", "Metric", "Before", "After", "Delta", "Status"], rows));
2570
+ if (regressions.length > 0) {
2571
+ console.log();
2572
+ warn(`${regressions.length} regression(s) detected!`);
2573
+ } else {
2574
+ console.log();
2575
+ success("No regressions detected.");
2432
2576
  }
2433
2577
  }
2434
- var EXTRACTION_SCHEMA, ROLE_REQUIRED_FIELDS;
2435
- var init_types2 = __esm({
2436
- "src/agents/types.ts"() {
2578
+ function formatDelta(delta) {
2579
+ const prefix = delta > 0 ? "+" : "";
2580
+ return `${prefix}${delta.toFixed(4)}`;
2581
+ }
2582
+ var import_node_child_process;
2583
+ var init_measure = __esm({
2584
+ "src/commands/measure.ts"() {
2437
2585
  "use strict";
2438
- EXTRACTION_SCHEMA = `{
2439
- "decisions": [{ "description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string" }],
2440
- "grades": [{ "component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string" }],
2441
- "doubts": [{ "claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical" }],
2442
- "guidance": "string (actionable builder guidance)",
2443
- "doubt_resolutions": [{ "doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive" }]
2444
- }`;
2445
- ROLE_REQUIRED_FIELDS = {
2446
- builder: ["decisions"],
2447
- critic: ["doubts"],
2448
- adversary: ["challenges"],
2449
- verifier: ["grades"],
2450
- gatekeeper: ["gate_decision"],
2451
- reframer: ["reframe"],
2452
- scout: ["findings"],
2453
- compressor: ["compression_report"]
2454
- };
2586
+ import_node_child_process = require("child_process");
2587
+ init_connection();
2588
+ init_queries();
2589
+ init_metrics();
2590
+ init_config();
2591
+ init_format();
2455
2592
  }
2456
2593
  });
2457
2594
 
2458
- // src/agents/parse.ts
2459
- async function extractStructuredData(role, markdown) {
2460
- const tier1 = extractMajlisJsonBlock(markdown);
2461
- if (tier1) {
2462
- const parsed = tryParseJson(tier1);
2463
- if (parsed) return parsed;
2464
- console.warn(`[majlis] Malformed JSON in <!-- majlis-json --> block for ${role}. Falling back.`);
2465
- } else {
2466
- console.warn(`[majlis] No <!-- majlis-json --> block found in ${role} output. Falling back.`);
2595
+ // src/commands/experiment.ts
2596
+ var experiment_exports = {};
2597
+ __export(experiment_exports, {
2598
+ newExperiment: () => newExperiment,
2599
+ revert: () => revert
2600
+ });
2601
+ async function newExperiment(args) {
2602
+ const root = findProjectRoot();
2603
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2604
+ const hypothesis = args.filter((a) => !a.startsWith("--")).join(" ");
2605
+ if (!hypothesis) {
2606
+ throw new Error('Usage: majlis new "hypothesis"');
2467
2607
  }
2468
- const tier2 = extractViaPatterns(role, markdown);
2469
- if (tier2 && hasData(tier2)) {
2470
- console.warn(`[majlis] Used regex fallback for ${role}. Review extracted data.`);
2471
- return tier2;
2608
+ const db = getDb(root);
2609
+ const config = loadConfig(root);
2610
+ const slug = getFlagValue(args, "--slug") ?? await generateSlug(hypothesis, root);
2611
+ if (getExperimentBySlug(db, slug)) {
2612
+ throw new Error(`Experiment with slug "${slug}" already exists.`);
2472
2613
  }
2473
- console.warn(`[majlis] Regex fallback insufficient for ${role}. Using Haiku extraction.`);
2474
- const tier3 = await extractViaHaiku(role, markdown);
2475
- if (tier3) return tier3;
2476
- console.error(
2477
- `[majlis] FAILED to extract structured data from ${role} output. State machine will continue but data is missing. Manual review required.`
2478
- );
2479
- return null;
2480
- }
2481
- function extractMajlisJsonBlock(markdown) {
2482
- const match = markdown.match(/<!--\s*majlis-json\s*\n([\s\S]*?)-->/);
2483
- if (!match) return null;
2484
- return match[1].trim();
2485
- }
2486
- function tryParseJson(jsonStr) {
2614
+ const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
2615
+ const num = allExps.count + 1;
2616
+ const paddedNum = String(num).padStart(3, "0");
2617
+ const branch = `exp/${paddedNum}-${slug}`;
2487
2618
  try {
2488
- return JSON.parse(jsonStr);
2489
- } catch {
2490
- return null;
2491
- }
2492
- }
2493
- function extractViaPatterns(role, markdown) {
2494
- const result = {};
2495
- const decisionPattern = /\[(?:decision|Decision)\].*?(?:description|Description):\s*(.+?)(?:\n|$).*?(?:evidence.?level|Evidence.?Level|level):\s*(proof|test|strong_consensus|consensus|analogy|judgment).*?(?:justification|Justification):\s*(.+?)(?:\n|$)/gis;
2496
- const decisions = [];
2497
- const evidenceMarkers = /(?:^|\n)\s*[-*]\s*\*?\*?(?:Decision|DECISION)\*?\*?:\s*(.+?)(?:\n|$).*?(?:Evidence|EVIDENCE|Level):\s*(proof|test|strong_consensus|consensus|analogy|judgment)/gim;
2498
- let match;
2499
- while ((match = evidenceMarkers.exec(markdown)) !== null) {
2500
- decisions.push({
2501
- description: match[1].trim(),
2502
- evidence_level: match[2].toLowerCase().trim(),
2503
- justification: "Extracted via regex \u2014 review"
2619
+ (0, import_node_child_process2.execSync)(`git checkout -b ${branch}`, {
2620
+ cwd: root,
2621
+ encoding: "utf-8",
2622
+ stdio: ["pipe", "pipe", "pipe"]
2504
2623
  });
2624
+ info(`Created branch: ${branch}`);
2625
+ } catch (err) {
2626
+ warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
2505
2627
  }
2506
- const inlineTagPattern = /\[(proof|test|strong_consensus|consensus|analogy|judgment)\]\s*(.+?)(?:\n|$)/gi;
2507
- while ((match = inlineTagPattern.exec(markdown)) !== null) {
2508
- const desc = match[2].trim();
2509
- if (!decisions.some((d) => d.description === desc)) {
2510
- decisions.push({
2511
- description: desc,
2512
- evidence_level: match[1].toLowerCase(),
2513
- justification: "Extracted via regex \u2014 review"
2514
- });
2628
+ const subType = getFlagValue(args, "--sub-type") ?? null;
2629
+ const exp = createExperiment(db, slug, branch, hypothesis, subType, null);
2630
+ success(`Created experiment #${exp.id}: ${exp.slug}`);
2631
+ const docsDir = path5.join(root, "docs", "experiments");
2632
+ const templatePath = path5.join(docsDir, "_TEMPLATE.md");
2633
+ if (fs5.existsSync(templatePath)) {
2634
+ const template = fs5.readFileSync(templatePath, "utf-8");
2635
+ const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, subType ?? "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
2636
+ const logPath = path5.join(docsDir, `${paddedNum}-${slug}.md`);
2637
+ fs5.writeFileSync(logPath, logContent);
2638
+ info(`Created experiment log: docs/experiments/${paddedNum}-${slug}.md`);
2639
+ }
2640
+ if (config.cycle.auto_baseline_on_new_experiment && config.metrics.command) {
2641
+ info("Auto-baselining... (run `majlis baseline` to do this manually)");
2642
+ try {
2643
+ const { baseline: baseline2 } = await Promise.resolve().then(() => (init_measure(), measure_exports));
2644
+ await baseline2(["--experiment", String(exp.id)]);
2645
+ } catch (err) {
2646
+ warn("Auto-baseline failed \u2014 run `majlis baseline` manually.");
2515
2647
  }
2516
2648
  }
2517
- if (decisions.length > 0) result.decisions = decisions;
2518
- const grades = [];
2519
- const gradePattern = /(?:^|\n)\s*[-*]?\s*\*?\*?(?:Grade|GRADE|Component)\*?\*?.*?(?:component|Component)?\s*[:=]\s*(.+?)(?:\n|,).*?(?:grade|Grade)\s*[:=]\s*(sound|good|weak|rejected)/gim;
2520
- while ((match = gradePattern.exec(markdown)) !== null) {
2521
- grades.push({
2522
- component: match[1].trim(),
2523
- grade: match[2].toLowerCase().trim()
2524
- });
2649
+ }
2650
+ async function revert(args) {
2651
+ const root = findProjectRoot();
2652
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2653
+ const db = getDb(root);
2654
+ let exp;
2655
+ const slugArg = args.filter((a) => !a.startsWith("--"))[0];
2656
+ if (slugArg) {
2657
+ exp = getExperimentBySlug(db, slugArg);
2658
+ if (!exp) throw new Error(`Experiment not found: ${slugArg}`);
2659
+ } else {
2660
+ exp = getLatestExperiment(db);
2661
+ if (!exp) throw new Error("No active experiments to revert.");
2525
2662
  }
2526
- const simpleGradePattern = /(?:^|\n)\s*[-*]\s*\*?\*?(.+?)\*?\*?\s*[:—–-]\s*\*?\*?(sound|good|weak|rejected)\*?\*?/gim;
2527
- while ((match = simpleGradePattern.exec(markdown)) !== null) {
2528
- const comp = match[1].trim();
2529
- if (!grades.some((g) => g.component === comp)) {
2530
- grades.push({
2531
- component: comp,
2532
- grade: match[2].toLowerCase().trim()
2663
+ const reason = getFlagValue(args, "--reason") ?? "Manually reverted";
2664
+ const category = args.includes("--structural") ? "structural" : "procedural";
2665
+ insertDeadEnd(
2666
+ db,
2667
+ exp.id,
2668
+ exp.hypothesis ?? exp.slug,
2669
+ reason,
2670
+ `Reverted: ${reason}`,
2671
+ exp.sub_type,
2672
+ category
2673
+ );
2674
+ updateExperimentStatus(db, exp.id, "dead_end");
2675
+ try {
2676
+ const currentBranch = (0, import_node_child_process2.execSync)("git rev-parse --abbrev-ref HEAD", {
2677
+ cwd: root,
2678
+ encoding: "utf-8"
2679
+ }).trim();
2680
+ if (currentBranch === exp.branch) {
2681
+ (0, import_node_child_process2.execSync)("git checkout main 2>/dev/null || git checkout master", {
2682
+ cwd: root,
2683
+ encoding: "utf-8",
2684
+ stdio: ["pipe", "pipe", "pipe"]
2533
2685
  });
2534
2686
  }
2687
+ } catch {
2688
+ warn("Could not switch git branches \u2014 do this manually.");
2535
2689
  }
2536
- if (grades.length > 0) result.grades = grades;
2537
- const doubts = [];
2538
- const doubtPattern = /(?:Doubt|DOUBT|Claim doubted|CLAIM)\s*(?:\d+)?[:.]?\s*(.+?)(?:\n|$)[\s\S]*?(?:Severity|SEVERITY)\s*[:=]\s*(minor|moderate|critical)/gim;
2539
- while ((match = doubtPattern.exec(markdown)) !== null) {
2540
- doubts.push({
2541
- claim_doubted: match[1].trim(),
2542
- evidence_level_of_claim: "unknown",
2543
- // Don't fabricate — mark as unknown for review
2544
- evidence_for_doubt: "Extracted via regex \u2014 review original document",
2545
- severity: match[2].toLowerCase().trim()
2546
- });
2547
- }
2548
- if (doubts.length > 0) result.doubts = doubts;
2549
- return result;
2690
+ info(`Experiment ${exp.slug} reverted to dead-end. Reason: ${reason}`);
2550
2691
  }
2551
- async function extractViaHaiku(role, markdown) {
2552
- try {
2553
- const truncated = markdown.length > 8e3 ? markdown.slice(0, 8e3) + "\n[truncated]" : markdown;
2554
- const schema = getExtractionSchema(role);
2555
- const prompt = `Extract structured data from this ${role} document as JSON. Follow this schema exactly: ${schema}
2692
+ var fs5, path5, import_node_child_process2;
2693
+ var init_experiment = __esm({
2694
+ "src/commands/experiment.ts"() {
2695
+ "use strict";
2696
+ fs5 = __toESM(require("fs"));
2697
+ path5 = __toESM(require("path"));
2698
+ import_node_child_process2 = require("child_process");
2699
+ init_connection();
2700
+ init_queries();
2701
+ init_config();
2702
+ init_spawn();
2703
+ init_format();
2704
+ }
2705
+ });
2556
2706
 
2557
- Document:
2558
- ${truncated}`;
2559
- const conversation = (0, import_claude_agent_sdk.query)({
2560
- prompt,
2561
- options: {
2562
- model: "haiku",
2563
- tools: [],
2564
- systemPrompt: "You are a JSON extraction assistant. Output only valid JSON matching the requested schema. No markdown, no explanation, just JSON.",
2565
- permissionMode: "bypassPermissions",
2566
- allowDangerouslySkipPermissions: true,
2567
- maxTurns: 1,
2568
- persistSession: false
2569
- }
2570
- });
2571
- let resultText = "";
2572
- for await (const message of conversation) {
2573
- if (message.type === "assistant") {
2574
- for (const block of message.message.content) {
2575
- if (block.type === "text") {
2576
- resultText += block.text;
2577
- }
2578
- }
2579
- }
2707
+ // src/commands/session.ts
2708
+ var session_exports = {};
2709
+ __export(session_exports, {
2710
+ session: () => session
2711
+ });
2712
+ async function session(args) {
2713
+ const subcommand = args[0];
2714
+ if (!subcommand || subcommand !== "start" && subcommand !== "end") {
2715
+ throw new Error('Usage: majlis session start "intent" | majlis session end');
2716
+ }
2717
+ const root = findProjectRoot();
2718
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2719
+ const db = getDb(root);
2720
+ if (subcommand === "start") {
2721
+ const intent = args.slice(1).filter((a) => !a.startsWith("--")).join(" ");
2722
+ if (!intent) {
2723
+ throw new Error('Usage: majlis session start "intent"');
2724
+ }
2725
+ const existing = getActiveSession(db);
2726
+ if (existing) {
2727
+ warn(`Session already active: "${existing.intent}" (started ${existing.started_at})`);
2728
+ warn("End it first with `majlis session end`.");
2729
+ return;
2730
+ }
2731
+ const latestExp = getLatestExperiment(db);
2732
+ const sess = startSession(db, intent, latestExp?.id ?? null);
2733
+ success(`Session started: "${intent}" (id: ${sess.id})`);
2734
+ if (latestExp) {
2735
+ info(`Linked to experiment: ${latestExp.slug} (${latestExp.status})`);
2736
+ }
2737
+ } else {
2738
+ const active = getActiveSession(db);
2739
+ if (!active) {
2740
+ throw new Error("No active session to end.");
2580
2741
  }
2581
- return tryParseJson(resultText.trim());
2582
- } catch (err) {
2583
- console.warn(`[majlis] Haiku extraction failed for ${role}: ${err instanceof Error ? err.message : String(err)}`);
2584
- return null;
2742
+ const accomplished = getFlagValue(args, "--accomplished") ?? null;
2743
+ const unfinished = getFlagValue(args, "--unfinished") ?? null;
2744
+ const fragility = getFlagValue(args, "--fragility") ?? null;
2745
+ endSession(db, active.id, accomplished, unfinished, fragility);
2746
+ success(`Session ended: "${active.intent}"`);
2747
+ if (accomplished) info(`Accomplished: ${accomplished}`);
2748
+ if (unfinished) info(`Unfinished: ${unfinished}`);
2749
+ if (fragility) warn(`New fragility: ${fragility}`);
2585
2750
  }
2586
2751
  }
2587
- function hasData(output) {
2588
- return !!(output.decisions && output.decisions.length > 0 || output.grades && output.grades.length > 0 || output.doubts && output.doubts.length > 0 || output.challenges && output.challenges.length > 0 || output.findings && output.findings.length > 0 || output.guidance || output.reframe || output.compression_report || output.gate_decision);
2589
- }
2590
- function validateForRole(role, output) {
2591
- const required = ROLE_REQUIRED_FIELDS[role];
2592
- if (!required) return { valid: true, missing: [] };
2593
- const missing = required.filter((field) => {
2594
- const value = output[field];
2595
- if (value === void 0 || value === null) return true;
2596
- if (Array.isArray(value) && value.length === 0) return true;
2597
- return false;
2598
- });
2599
- return { valid: missing.length === 0, missing };
2600
- }
2601
- var import_claude_agent_sdk;
2602
- var init_parse = __esm({
2603
- "src/agents/parse.ts"() {
2752
+ var init_session = __esm({
2753
+ "src/commands/session.ts"() {
2604
2754
  "use strict";
2605
- init_types2();
2606
- import_claude_agent_sdk = require("@anthropic-ai/claude-agent-sdk");
2755
+ init_connection();
2756
+ init_queries();
2757
+ init_config();
2758
+ init_format();
2607
2759
  }
2608
2760
  });
2609
2761
 
2610
- // src/agents/spawn.ts
2611
- function loadAgentDefinition(role, projectRoot) {
2612
- const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2613
- const filePath = path6.join(root, ".majlis", "agents", `${role}.md`);
2614
- if (!fs6.existsSync(filePath)) {
2615
- throw new Error(`Agent definition not found: ${filePath}`);
2616
- }
2617
- const content = fs6.readFileSync(filePath, "utf-8");
2618
- const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
2619
- if (!frontmatterMatch) {
2620
- throw new Error(`Invalid agent definition (missing YAML frontmatter): ${filePath}`);
2762
+ // src/commands/query.ts
2763
+ var query_exports = {};
2764
+ __export(query_exports, {
2765
+ query: () => query3
2766
+ });
2767
+ async function query3(command, args, isJson) {
2768
+ const root = findProjectRoot();
2769
+ if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
2770
+ const db = getDb(root);
2771
+ switch (command) {
2772
+ case "decisions":
2773
+ return queryDecisions(db, args, isJson);
2774
+ case "dead-ends":
2775
+ return queryDeadEnds(db, args, isJson);
2776
+ case "fragility":
2777
+ return queryFragility(root, isJson);
2778
+ case "history":
2779
+ return queryHistory(db, args, isJson);
2780
+ case "circuit-breakers":
2781
+ return queryCircuitBreakers(db, root, isJson);
2782
+ case "check-commit":
2783
+ return checkCommit(db);
2621
2784
  }
2622
- const frontmatter = frontmatterMatch[1];
2623
- const body = frontmatterMatch[2].trim();
2624
- const name = extractYamlField(frontmatter, "name") ?? role;
2625
- const model = extractYamlField(frontmatter, "model") ?? "opus";
2626
- const toolsStr = extractYamlField(frontmatter, "tools") ?? "[]";
2627
- const tools = toolsStr.replace(/[\[\]]/g, "").split(",").map((t) => t.trim()).filter(Boolean);
2628
- return { name, model, tools, systemPrompt: body };
2629
- }
2630
- function extractYamlField(yaml, field) {
2631
- const match = yaml.match(new RegExp(`^${field}:\\s*(.+)$`, "m"));
2632
- return match ? match[1].trim() : null;
2633
2785
  }
2634
- async function spawnAgent(role, context, projectRoot) {
2635
- const agentDef = loadAgentDefinition(role, projectRoot);
2636
- const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2637
- const taskPrompt = context.taskPrompt ?? `Perform your role as ${agentDef.name}.`;
2638
- const contextJson = JSON.stringify(context);
2639
- const prompt = `Here is your context:
2640
-
2641
- \`\`\`json
2642
- ${contextJson}
2643
- \`\`\`
2644
-
2645
- ${taskPrompt}`;
2646
- const turns = ROLE_MAX_TURNS[role] ?? 15;
2647
- console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
2648
- const { text: markdown, costUsd, truncated } = await runQuery({
2649
- prompt,
2650
- model: agentDef.model,
2651
- tools: agentDef.tools,
2652
- systemPrompt: agentDef.systemPrompt,
2653
- cwd: root,
2654
- maxTurns: turns,
2655
- label: role
2656
- });
2657
- console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
2658
- const artifactPath = writeArtifact(role, context, markdown, root);
2659
- if (artifactPath) {
2660
- console.log(`[${role}] Artifact written to ${artifactPath}`);
2786
+ function queryDecisions(db, args, isJson) {
2787
+ const level = getFlagValue(args, "--level");
2788
+ const expIdStr = getFlagValue(args, "--experiment");
2789
+ const experimentId = expIdStr !== void 0 ? Number(expIdStr) : void 0;
2790
+ const decisions = listAllDecisions(db, level, experimentId);
2791
+ if (isJson) {
2792
+ console.log(JSON.stringify(decisions, null, 2));
2793
+ return;
2661
2794
  }
2662
- const structured = await extractStructuredData(role, markdown);
2663
- if (structured) {
2664
- const { valid, missing } = validateForRole(role, structured);
2665
- if (!valid) {
2666
- console.warn(`[${role}] Output missing expected fields: ${missing.join(", ")}`);
2667
- }
2795
+ if (decisions.length === 0) {
2796
+ info("No decisions found.");
2797
+ return;
2668
2798
  }
2669
- return { output: markdown, structured, truncated };
2670
- }
2671
- async function spawnSynthesiser(context, projectRoot) {
2672
- const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2673
- const contextJson = JSON.stringify(context);
2674
- const taskPrompt = context.taskPrompt ?? "Synthesise the findings into actionable builder guidance.";
2675
- const prompt = `Here is your context:
2676
-
2677
- \`\`\`json
2678
- ${contextJson}
2679
- \`\`\`
2680
-
2681
- ${taskPrompt}`;
2682
- const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2683
- console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
2684
- const { text: markdown, costUsd, truncated } = await runQuery({
2685
- prompt,
2686
- model: "sonnet",
2687
- tools: ["Read", "Glob", "Grep"],
2688
- systemPrompt,
2689
- cwd: root,
2690
- maxTurns: 5,
2691
- label: "synthesiser"
2692
- });
2693
- console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
2694
- return { output: markdown, structured: { guidance: markdown }, truncated };
2695
- }
2696
- async function spawnRecovery(role, partialOutput, context, projectRoot) {
2697
- const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2698
- const expSlug = context.experiment?.slug ?? "unknown";
2699
- console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
2700
- const expDocPath = path6.join(
2701
- root,
2702
- "docs",
2703
- "experiments",
2704
- `${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
2705
- );
2706
- const templatePath = path6.join(root, "docs", "experiments", "_TEMPLATE.md");
2707
- const template = fs6.existsSync(templatePath) ? fs6.readFileSync(templatePath, "utf-8") : "";
2708
- const currentDoc = fs6.existsSync(expDocPath) ? fs6.readFileSync(expDocPath, "utf-8") : "";
2709
- const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
2710
-
2711
- Here is the partial agent output (reasoning + tool calls):
2712
- <partial_output>
2713
- ${partialOutput.slice(-3e3)}
2714
- </partial_output>
2715
-
2716
- Here is the current experiment doc:
2717
- <current_doc>
2718
- ${currentDoc}
2719
- </current_doc>
2720
-
2721
- Here is the template that the experiment doc should follow:
2722
- <template>
2723
- ${template}
2724
- </template>
2725
-
2726
- Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
2727
- - Keep any valid content from the current doc
2728
- - Fill in what you can infer from the partial output
2729
- - Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
2730
- - The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
2731
- - Do NOT include agent reasoning or thinking \u2014 only structured experiment content
2732
- - Be concise. This is cleanup, not new work.`;
2733
- const { text: _markdown } = await runQuery({
2734
- prompt,
2735
- model: "haiku",
2736
- tools: ["Read", "Write"],
2737
- systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
2738
- cwd: root,
2739
- maxTurns: 5,
2740
- label: "recovery"
2741
- });
2742
- console.log(`[recovery] Cleanup complete for ${expSlug}.`);
2743
- }
2744
- async function runQuery(opts) {
2745
- let truncated = false;
2746
- const tag = opts.label ?? "majlis";
2747
- const conversation = (0, import_claude_agent_sdk2.query)({
2748
- prompt: opts.prompt,
2749
- options: {
2750
- model: opts.model,
2751
- tools: opts.tools,
2752
- systemPrompt: {
2753
- type: "preset",
2754
- preset: "claude_code",
2755
- append: opts.systemPrompt
2756
- },
2757
- cwd: opts.cwd,
2758
- permissionMode: "bypassPermissions",
2759
- allowDangerouslySkipPermissions: true,
2760
- maxTurns: opts.maxTurns ?? 15,
2761
- persistSession: false,
2762
- settingSources: ["project"]
2763
- }
2764
- });
2765
- const textParts = [];
2766
- let costUsd = 0;
2767
- let turnCount = 0;
2768
- for await (const message of conversation) {
2769
- if (message.type === "assistant") {
2770
- turnCount++;
2771
- let hasText = false;
2772
- for (const block of message.message.content) {
2773
- if (block.type === "text") {
2774
- textParts.push(block.text);
2775
- hasText = true;
2776
- } else if (block.type === "tool_use") {
2777
- const toolName = block.name ?? "tool";
2778
- const input = block.input ?? {};
2779
- const detail = formatToolDetail(toolName, input);
2780
- process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
2781
- `);
2782
- }
2783
- }
2784
- if (hasText) {
2785
- const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
2786
- if (preview) {
2787
- process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
2788
- `);
2789
- }
2790
- }
2791
- } else if (message.type === "tool_progress") {
2792
- const elapsed = Math.round(message.elapsed_time_seconds);
2793
- if (elapsed > 0 && elapsed % 5 === 0) {
2794
- process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
2795
- `);
2796
- }
2797
- } else if (message.type === "result") {
2798
- if (message.subtype === "success") {
2799
- costUsd = message.total_cost_usd;
2800
- } else if (message.subtype === "error_max_turns") {
2801
- truncated = true;
2802
- costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
2803
- console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
2804
- } else {
2805
- const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
2806
- throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
2807
- }
2808
- }
2799
+ header("Decisions");
2800
+ const rows = decisions.map((d) => [
2801
+ String(d.id),
2802
+ String(d.experiment_id),
2803
+ evidenceColor(d.evidence_level),
2804
+ d.description.slice(0, 60) + (d.description.length > 60 ? "..." : ""),
2805
+ d.status
2806
+ ]);
2807
+ console.log(table(["ID", "Exp", "Level", "Description", "Status"], rows));
2808
+ }
2809
+ function queryDeadEnds(db, args, isJson) {
2810
+ const subType = getFlagValue(args, "--sub-type");
2811
+ const searchTerm = getFlagValue(args, "--search");
2812
+ let deadEnds;
2813
+ if (subType) {
2814
+ deadEnds = listDeadEndsBySubType(db, subType);
2815
+ } else if (searchTerm) {
2816
+ deadEnds = searchDeadEnds(db, searchTerm);
2817
+ } else {
2818
+ deadEnds = listAllDeadEnds(db);
2809
2819
  }
2810
- return { text: textParts.join("\n\n"), costUsd, truncated };
2820
+ if (isJson) {
2821
+ console.log(JSON.stringify(deadEnds, null, 2));
2822
+ return;
2823
+ }
2824
+ if (deadEnds.length === 0) {
2825
+ info("No dead-ends recorded.");
2826
+ return;
2827
+ }
2828
+ header("Dead-End Registry");
2829
+ const rows = deadEnds.map((d) => [
2830
+ String(d.id),
2831
+ d.sub_type ?? "\u2014",
2832
+ d.approach.slice(0, 40) + (d.approach.length > 40 ? "..." : ""),
2833
+ d.structural_constraint.slice(0, 40) + (d.structural_constraint.length > 40 ? "..." : "")
2834
+ ]);
2835
+ console.log(table(["ID", "Sub-Type", "Approach", "Constraint"], rows));
2811
2836
  }
2812
- function formatToolDetail(toolName, input) {
2813
- switch (toolName) {
2814
- case "Read":
2815
- return input.file_path ? ` ${input.file_path}` : "";
2816
- case "Write":
2817
- return input.file_path ? ` \u2192 ${input.file_path}` : "";
2818
- case "Edit":
2819
- return input.file_path ? ` ${input.file_path}` : "";
2820
- case "Glob":
2821
- return input.pattern ? ` ${input.pattern}` : "";
2822
- case "Grep":
2823
- return input.pattern ? ` /${input.pattern}/` : "";
2824
- case "Bash":
2825
- return input.command ? ` $ ${input.command.slice(0, 80)}` : "";
2826
- case "WebSearch":
2827
- return input.query ? ` "${input.query}"` : "";
2828
- default:
2829
- return "";
2837
+ function queryFragility(root, isJson) {
2838
+ const fragPath = path6.join(root, "docs", "synthesis", "fragility.md");
2839
+ if (!fs6.existsSync(fragPath)) {
2840
+ info("No fragility map found.");
2841
+ return;
2842
+ }
2843
+ const content = fs6.readFileSync(fragPath, "utf-8");
2844
+ if (isJson) {
2845
+ console.log(JSON.stringify({ content }, null, 2));
2846
+ return;
2830
2847
  }
2848
+ header("Fragility Map");
2849
+ console.log(content);
2831
2850
  }
2832
- function writeArtifact(role, context, markdown, projectRoot) {
2833
- const dirMap = {
2834
- builder: "docs/experiments",
2835
- critic: "docs/doubts",
2836
- adversary: "docs/challenges",
2837
- verifier: "docs/verification",
2838
- reframer: "docs/reframes",
2839
- compressor: "docs/synthesis",
2840
- scout: "docs/rihla"
2841
- };
2842
- const dir = dirMap[role];
2843
- if (!dir) return null;
2844
- if (role === "builder" || role === "compressor") return null;
2845
- const fullDir = path6.join(projectRoot, dir);
2846
- if (!fs6.existsSync(fullDir)) {
2847
- fs6.mkdirSync(fullDir, { recursive: true });
2851
+ function queryHistory(db, args, isJson) {
2852
+ const fixture = args.filter((a) => !a.startsWith("--"))[0];
2853
+ if (!fixture) {
2854
+ throw new Error("Usage: majlis history <fixture>");
2848
2855
  }
2849
- const expSlug = context.experiment?.slug ?? "general";
2850
- const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
2851
- const filename = `${nextNum}-${role}-${expSlug}.md`;
2852
- const target = path6.join(fullDir, filename);
2853
- fs6.writeFileSync(target, markdown);
2854
- return target;
2856
+ const history = getMetricHistoryByFixture(db, fixture);
2857
+ if (isJson) {
2858
+ console.log(JSON.stringify(history, null, 2));
2859
+ return;
2860
+ }
2861
+ if (history.length === 0) {
2862
+ info(`No metric history for fixture: ${fixture}`);
2863
+ return;
2864
+ }
2865
+ header(`Metric History \u2014 ${fixture}`);
2866
+ const rows = history.map((h) => [
2867
+ String(h.experiment_id),
2868
+ h.experiment_slug ?? "\u2014",
2869
+ h.phase,
2870
+ h.metric_name,
2871
+ String(h.metric_value),
2872
+ h.captured_at
2873
+ ]);
2874
+ console.log(table(["Exp", "Slug", "Phase", "Metric", "Value", "Captured"], rows));
2855
2875
  }
2856
- var fs6, path6, import_claude_agent_sdk2, ROLE_MAX_TURNS, DIM2, RESET2, CYAN2;
2857
- var init_spawn = __esm({
2858
- "src/agents/spawn.ts"() {
2876
+ function queryCircuitBreakers(db, root, isJson) {
2877
+ const config = loadConfig(root);
2878
+ const states = getAllCircuitBreakerStates(db, config.cycle.circuit_breaker_threshold);
2879
+ if (isJson) {
2880
+ console.log(JSON.stringify(states, null, 2));
2881
+ return;
2882
+ }
2883
+ if (states.length === 0) {
2884
+ info("No circuit breaker data.");
2885
+ return;
2886
+ }
2887
+ header("Circuit Breakers");
2888
+ const rows = states.map((s) => [
2889
+ s.sub_type,
2890
+ String(s.failure_count),
2891
+ String(config.cycle.circuit_breaker_threshold),
2892
+ s.tripped ? red("TRIPPED") : green("OK")
2893
+ ]);
2894
+ console.log(table(["Sub-Type", "Failures", "Threshold", "Status"], rows));
2895
+ }
2896
+ function checkCommit(db) {
2897
+ let stdinData = "";
2898
+ try {
2899
+ stdinData = fs6.readFileSync(0, "utf-8");
2900
+ } catch {
2901
+ }
2902
+ if (stdinData) {
2903
+ try {
2904
+ const hookInput = JSON.parse(stdinData);
2905
+ const command = hookInput?.tool_input?.command ?? "";
2906
+ if (!command.includes("git commit")) {
2907
+ return;
2908
+ }
2909
+ } catch {
2910
+ }
2911
+ }
2912
+ const active = listActiveExperiments(db);
2913
+ const unverified = active.filter(
2914
+ (e) => !["merged", "dead_end", "verified", "resolved", "compressed"].includes(e.status)
2915
+ );
2916
+ if (unverified.length > 0) {
2917
+ console.error(`[majlis] ${unverified.length} unverified experiment(s):`);
2918
+ for (const e of unverified) {
2919
+ console.error(` - ${e.slug} (${e.status})`);
2920
+ }
2921
+ process.exit(1);
2922
+ }
2923
+ }
2924
+ var fs6, path6;
2925
+ var init_query = __esm({
2926
+ "src/commands/query.ts"() {
2859
2927
  "use strict";
2860
2928
  fs6 = __toESM(require("fs"));
2861
2929
  path6 = __toESM(require("path"));
2862
- import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
2863
- init_parse();
2864
2930
  init_connection();
2865
- ROLE_MAX_TURNS = {
2866
- builder: 50,
2867
- critic: 30,
2868
- adversary: 30,
2869
- verifier: 50,
2870
- compressor: 30,
2871
- reframer: 20,
2872
- scout: 20,
2873
- gatekeeper: 10
2931
+ init_queries();
2932
+ init_config();
2933
+ init_format();
2934
+ }
2935
+ });
2936
+
2937
+ // src/state/types.ts
2938
+ var TRANSITIONS, GRADE_ORDER;
2939
+ var init_types2 = __esm({
2940
+ "src/state/types.ts"() {
2941
+ "use strict";
2942
+ TRANSITIONS = {
2943
+ ["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "gated" /* GATED */],
2944
+ ["reframed" /* REFRAMED */]: ["gated" /* GATED */],
2945
+ ["gated" /* GATED */]: ["building" /* BUILDING */, "gated" /* GATED */],
2946
+ // self-loop for rejected hypotheses
2947
+ ["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
2948
+ // self-loop for retry after truncation
2949
+ ["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
2950
+ ["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
2951
+ ["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
2952
+ ["scouted" /* SCOUTED */]: ["verifying" /* VERIFYING */],
2953
+ ["verifying" /* VERIFYING */]: ["verified" /* VERIFIED */],
2954
+ ["verified" /* VERIFIED */]: ["resolved" /* RESOLVED */],
2955
+ ["resolved" /* RESOLVED */]: ["compressed" /* COMPRESSED */, "building" /* BUILDING */],
2956
+ // cycle-back skips gate
2957
+ ["compressed" /* COMPRESSED */]: ["merged" /* MERGED */, "building" /* BUILDING */],
2958
+ // cycle-back skips gate
2959
+ ["merged" /* MERGED */]: [],
2960
+ ["dead_end" /* DEAD_END */]: []
2874
2961
  };
2875
- DIM2 = "\x1B[2m";
2876
- RESET2 = "\x1B[0m";
2877
- CYAN2 = "\x1B[36m";
2962
+ GRADE_ORDER = ["rejected", "weak", "good", "sound"];
2963
+ }
2964
+ });
2965
+
2966
+ // src/state/machine.ts
2967
+ function transition(current, target) {
2968
+ const valid = TRANSITIONS[current];
2969
+ if (!valid.includes(target)) {
2970
+ throw new Error(
2971
+ `Invalid transition: ${current} \u2192 ${target}. Valid: [${valid.join(", ")}]`
2972
+ );
2973
+ }
2974
+ return target;
2975
+ }
2976
+ function validNext(current) {
2977
+ return TRANSITIONS[current];
2978
+ }
2979
+ function isTerminal(status2) {
2980
+ return TRANSITIONS[status2].length === 0;
2981
+ }
2982
+ function determineNextStep(exp, valid, hasDoubts2, hasChallenges2) {
2983
+ if (valid.length === 0) {
2984
+ throw new Error(`Experiment ${exp.slug} is terminal (${exp.status})`);
2985
+ }
2986
+ const status2 = exp.status;
2987
+ if (status2 === "classified" /* CLASSIFIED */ || status2 === "reframed" /* REFRAMED */) {
2988
+ return valid.includes("gated" /* GATED */) ? "gated" /* GATED */ : valid[0];
2989
+ }
2990
+ if (status2 === "gated" /* GATED */) {
2991
+ return valid.includes("building" /* BUILDING */) ? "building" /* BUILDING */ : valid[0];
2992
+ }
2993
+ if (status2 === "built" /* BUILT */ && !hasDoubts2) {
2994
+ return valid.includes("doubted" /* DOUBTED */) ? "doubted" /* DOUBTED */ : valid[0];
2995
+ }
2996
+ if (status2 === "doubted" /* DOUBTED */ && !hasChallenges2) {
2997
+ return valid.includes("challenged" /* CHALLENGED */) ? "challenged" /* CHALLENGED */ : valid[0];
2998
+ }
2999
+ if (status2 === "doubted" /* DOUBTED */ || status2 === "challenged" /* CHALLENGED */) {
3000
+ if (valid.includes("verifying" /* VERIFYING */)) {
3001
+ return "verifying" /* VERIFYING */;
3002
+ }
3003
+ }
3004
+ if (status2 === "compressed" /* COMPRESSED */) {
3005
+ return valid.includes("merged" /* MERGED */) ? "merged" /* MERGED */ : valid[0];
3006
+ }
3007
+ return valid[0];
3008
+ }
3009
+ var init_machine = __esm({
3010
+ "src/state/machine.ts"() {
3011
+ "use strict";
3012
+ init_types2();
2878
3013
  }
2879
3014
  });
2880
3015
 
@@ -3016,7 +3151,7 @@ var init_resolve = __esm({
3016
3151
  "use strict";
3017
3152
  fs7 = __toESM(require("fs"));
3018
3153
  path7 = __toESM(require("path"));
3019
- init_types();
3154
+ init_types2();
3020
3155
  init_queries();
3021
3156
  init_spawn();
3022
3157
  import_node_child_process3 = require("child_process");
@@ -3059,7 +3194,6 @@ async function resolveCmd(args) {
3059
3194
  const exp = resolveExperimentArg(db, args);
3060
3195
  transition(exp.status, "resolved" /* RESOLVED */);
3061
3196
  await resolve(db, exp, root);
3062
- updateExperimentStatus(db, exp.id, "resolved");
3063
3197
  }
3064
3198
  async function doGate(db, exp, root) {
3065
3199
  transition(exp.status, "gated" /* GATED */);
@@ -3507,7 +3641,7 @@ var init_cycle = __esm({
3507
3641
  init_connection();
3508
3642
  init_queries();
3509
3643
  init_machine();
3510
- init_types();
3644
+ init_types2();
3511
3645
  init_spawn();
3512
3646
  init_resolve();
3513
3647
  init_config();
@@ -3830,6 +3964,13 @@ async function executeStep(step, exp, root) {
3830
3964
  updateExperimentStatus(getDb(root), exp.id, "reframed");
3831
3965
  info(`Reframe acknowledged for ${exp.slug}. Proceeding to gate.`);
3832
3966
  break;
3967
+ case "merged" /* MERGED */:
3968
+ updateExperimentStatus(getDb(root), exp.id, "merged");
3969
+ success(`Experiment ${exp.slug} merged.`);
3970
+ break;
3971
+ case "dead_end" /* DEAD_END */:
3972
+ info(`Experiment ${exp.slug} is dead-ended. No further action.`);
3973
+ break;
3833
3974
  default:
3834
3975
  warn(`Don't know how to execute step: ${step}`);
3835
3976
  }
@@ -3840,7 +3981,7 @@ var init_next = __esm({
3840
3981
  init_connection();
3841
3982
  init_queries();
3842
3983
  init_machine();
3843
- init_types();
3984
+ init_types2();
3844
3985
  init_queries();
3845
3986
  init_config();
3846
3987
  init_cycle();
@@ -3900,7 +4041,7 @@ async function run(args) {
3900
4041
  }
3901
4042
  usedHypotheses.add(hypothesis);
3902
4043
  info(`Next hypothesis: ${hypothesis}`);
3903
- exp = createNewExperiment(db, root, hypothesis);
4044
+ exp = await createNewExperiment(db, root, hypothesis);
3904
4045
  success(`Created experiment #${exp.id}: ${exp.slug}`);
3905
4046
  }
3906
4047
  if (isTerminal(exp.status)) {
@@ -4038,8 +4179,8 @@ ${result.output.slice(-2e3)}
4038
4179
  warn("Could not extract hypothesis. Using goal as fallback.");
4039
4180
  return goal;
4040
4181
  }
4041
- function createNewExperiment(db, root, hypothesis) {
4042
- const slug = slugify2(hypothesis);
4182
+ async function createNewExperiment(db, root, hypothesis) {
4183
+ const slug = await generateSlug(hypothesis, root);
4043
4184
  let finalSlug = slug;
4044
4185
  let attempt = 0;
4045
4186
  while (getExperimentBySlug(db, finalSlug)) {
@@ -4074,9 +4215,6 @@ function createNewExperiment(db, root, hypothesis) {
4074
4215
  }
4075
4216
  return exp;
4076
4217
  }
4077
- function slugify2(text) {
4078
- return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
4079
- }
4080
4218
  var fs11, path11, import_node_child_process5;
4081
4219
  var init_run = __esm({
4082
4220
  "src/commands/run.ts"() {