majlis 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +1104 -966
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -575,6 +575,10 @@ Read as much code as you need to understand the problem. Reading is free \u2014
|
|
|
575
575
|
as many turns as necessary on Read, Grep, and Glob to build full context before
|
|
576
576
|
you touch anything.
|
|
577
577
|
|
|
578
|
+
Do NOT read raw data files (fixtures/, ground truth JSON/STL). The synthesis
|
|
579
|
+
has the relevant facts. Reading raw data wastes turns re-deriving what the
|
|
580
|
+
doubt/challenge/verify cycle already established.
|
|
581
|
+
|
|
578
582
|
## The Rule: ONE Change, Then Document
|
|
579
583
|
|
|
580
584
|
You make ONE code change per cycle. Not two, not "one more quick fix." ONE.
|
|
@@ -738,6 +742,9 @@ If the builder claims improvement but the framework metrics show regression, fla
|
|
|
738
742
|
- Run at most 3-5 targeted diagnostic scripts, focused on the critical doubts/challenges.
|
|
739
743
|
- Do NOT run exhaustive diagnostics on every claim.
|
|
740
744
|
|
|
745
|
+
Framework-captured metrics are ground truth \u2014 if they show regression, that
|
|
746
|
+
alone justifies a "rejected" grade. Do not re-derive from raw fixture data.
|
|
747
|
+
|
|
741
748
|
Grade each component: sound / good / weak / rejected
|
|
742
749
|
Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
|
|
743
750
|
|
|
@@ -817,6 +824,13 @@ the database export.
|
|
|
817
824
|
The framework does NOT auto-save your output for these files.
|
|
818
825
|
7. Review classification: new sub-types? resolved sub-types?
|
|
819
826
|
|
|
827
|
+
You may ONLY write to these three files:
|
|
828
|
+
- docs/synthesis/current.md
|
|
829
|
+
- docs/synthesis/fragility.md
|
|
830
|
+
- docs/synthesis/dead-ends.md
|
|
831
|
+
|
|
832
|
+
Do NOT modify MEMORY.md, .claude/, classification/, experiments/, or any other paths.
|
|
833
|
+
|
|
820
834
|
You may NOT write code, make decisions, or run experiments.
|
|
821
835
|
|
|
822
836
|
## Structured Output Format
|
|
@@ -1805,1076 +1819,1197 @@ var init_status = __esm({
|
|
|
1805
1819
|
}
|
|
1806
1820
|
});
|
|
1807
1821
|
|
|
1808
|
-
// src/
|
|
1809
|
-
function
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
delta: a.metric_value - b.metric_value,
|
|
1828
|
-
regression
|
|
1829
|
-
});
|
|
1830
|
-
}
|
|
1831
|
-
}
|
|
1832
|
-
}
|
|
1833
|
-
return comparisons;
|
|
1834
|
-
}
|
|
1835
|
-
function isRegression(before, after, direction) {
|
|
1836
|
-
switch (direction) {
|
|
1837
|
-
case "lower_is_better":
|
|
1838
|
-
return after > before;
|
|
1839
|
-
case "higher_is_better":
|
|
1840
|
-
return after < before;
|
|
1841
|
-
case "closer_to_gt":
|
|
1842
|
-
return false;
|
|
1822
|
+
// src/agents/types.ts
|
|
1823
|
+
function getExtractionSchema(role) {
|
|
1824
|
+
switch (role) {
|
|
1825
|
+
case "builder":
|
|
1826
|
+
return '{"decisions": [{"description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string"}]}';
|
|
1827
|
+
case "critic":
|
|
1828
|
+
return '{"doubts": [{"claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical"}]}';
|
|
1829
|
+
case "adversary":
|
|
1830
|
+
return '{"challenges": [{"description": "string", "reasoning": "string"}]}';
|
|
1831
|
+
case "verifier":
|
|
1832
|
+
return '{"grades": [{"component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string"}], "doubt_resolutions": [{"doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive"}]}';
|
|
1833
|
+
case "gatekeeper":
|
|
1834
|
+
return '{"gate_decision": "approve|reject|flag", "reason": "string", "stale_references": ["string"], "overlapping_dead_ends": [0]}';
|
|
1835
|
+
case "reframer":
|
|
1836
|
+
return '{"reframe": {"decomposition": "string", "divergences": ["string"], "recommendation": "string"}}';
|
|
1837
|
+
case "scout":
|
|
1838
|
+
return '{"findings": [{"approach": "string", "source": "string", "relevance": "string", "contradicts_current": true}]}';
|
|
1839
|
+
case "compressor":
|
|
1840
|
+
return '{"compression_report": {"synthesis_delta": "string", "new_dead_ends": ["string"], "fragility_changes": ["string"]}}';
|
|
1843
1841
|
default:
|
|
1844
|
-
return
|
|
1845
|
-
}
|
|
1846
|
-
}
|
|
1847
|
-
function parseMetricsOutput(jsonStr) {
|
|
1848
|
-
const data = JSON.parse(jsonStr);
|
|
1849
|
-
const results = [];
|
|
1850
|
-
if (data.fixtures && typeof data.fixtures === "object") {
|
|
1851
|
-
for (const [fixture, metrics] of Object.entries(data.fixtures)) {
|
|
1852
|
-
for (const [metricName, metricValue] of Object.entries(metrics)) {
|
|
1853
|
-
if (typeof metricValue === "number") {
|
|
1854
|
-
results.push({ fixture, metric_name: metricName, metric_value: metricValue });
|
|
1855
|
-
}
|
|
1856
|
-
}
|
|
1857
|
-
}
|
|
1842
|
+
return EXTRACTION_SCHEMA;
|
|
1858
1843
|
}
|
|
1859
|
-
return results;
|
|
1860
1844
|
}
|
|
1861
|
-
var
|
|
1862
|
-
|
|
1845
|
+
var EXTRACTION_SCHEMA, ROLE_REQUIRED_FIELDS;
|
|
1846
|
+
var init_types = __esm({
|
|
1847
|
+
"src/agents/types.ts"() {
|
|
1863
1848
|
"use strict";
|
|
1864
|
-
|
|
1849
|
+
EXTRACTION_SCHEMA = `{
|
|
1850
|
+
"decisions": [{ "description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string" }],
|
|
1851
|
+
"grades": [{ "component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string" }],
|
|
1852
|
+
"doubts": [{ "claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical" }],
|
|
1853
|
+
"guidance": "string (actionable builder guidance)",
|
|
1854
|
+
"doubt_resolutions": [{ "doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive" }]
|
|
1855
|
+
}`;
|
|
1856
|
+
ROLE_REQUIRED_FIELDS = {
|
|
1857
|
+
builder: ["decisions"],
|
|
1858
|
+
critic: ["doubts"],
|
|
1859
|
+
adversary: ["challenges"],
|
|
1860
|
+
verifier: ["grades"],
|
|
1861
|
+
gatekeeper: ["gate_decision"],
|
|
1862
|
+
reframer: ["reframe"],
|
|
1863
|
+
scout: ["findings"],
|
|
1864
|
+
compressor: ["compression_report"]
|
|
1865
|
+
};
|
|
1865
1866
|
}
|
|
1866
1867
|
});
|
|
1867
1868
|
|
|
1868
|
-
// src/
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
});
|
|
1875
|
-
async function baseline(args) {
|
|
1876
|
-
await captureMetrics("before", args);
|
|
1877
|
-
}
|
|
1878
|
-
async function measure(args) {
|
|
1879
|
-
await captureMetrics("after", args);
|
|
1880
|
-
}
|
|
1881
|
-
async function captureMetrics(phase, args) {
|
|
1882
|
-
const root = findProjectRoot();
|
|
1883
|
-
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
1884
|
-
const db = getDb(root);
|
|
1885
|
-
const config = loadConfig(root);
|
|
1886
|
-
const expIdStr = getFlagValue(args, "--experiment");
|
|
1887
|
-
let exp;
|
|
1888
|
-
if (expIdStr !== void 0) {
|
|
1889
|
-
exp = getExperimentById(db, Number(expIdStr));
|
|
1869
|
+
// src/agents/parse.ts
|
|
1870
|
+
async function extractStructuredData(role, markdown) {
|
|
1871
|
+
const tier1 = extractMajlisJsonBlock(markdown);
|
|
1872
|
+
if (tier1) {
|
|
1873
|
+
const parsed = tryParseJson(tier1);
|
|
1874
|
+
if (parsed) return parsed;
|
|
1875
|
+
console.warn(`[majlis] Malformed JSON in <!-- majlis-json --> block for ${role}. Falling back.`);
|
|
1890
1876
|
} else {
|
|
1891
|
-
|
|
1892
|
-
}
|
|
1893
|
-
if (!exp) throw new Error('No active experiment. Run `majlis new "hypothesis"` first.');
|
|
1894
|
-
if (config.build.pre_measure) {
|
|
1895
|
-
info(`Running pre-measure: ${config.build.pre_measure}`);
|
|
1896
|
-
try {
|
|
1897
|
-
(0, import_node_child_process.execSync)(config.build.pre_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
|
|
1898
|
-
} catch {
|
|
1899
|
-
warn("Pre-measure command failed \u2014 continuing anyway.");
|
|
1900
|
-
}
|
|
1877
|
+
console.warn(`[majlis] No <!-- majlis-json --> block found in ${role} output. Falling back.`);
|
|
1901
1878
|
}
|
|
1902
|
-
|
|
1903
|
-
|
|
1879
|
+
const tier2 = extractViaPatterns(role, markdown);
|
|
1880
|
+
if (tier2 && hasData(tier2)) {
|
|
1881
|
+
console.warn(`[majlis] Used regex fallback for ${role}. Review extracted data.`);
|
|
1882
|
+
return tier2;
|
|
1904
1883
|
}
|
|
1905
|
-
|
|
1906
|
-
|
|
1884
|
+
console.warn(`[majlis] Regex fallback insufficient for ${role}. Using Haiku extraction.`);
|
|
1885
|
+
const tier3 = await extractViaHaiku(role, markdown);
|
|
1886
|
+
if (tier3) return tier3;
|
|
1887
|
+
console.error(
|
|
1888
|
+
`[majlis] FAILED to extract structured data from ${role} output. State machine will continue but data is missing. Manual review required.`
|
|
1889
|
+
);
|
|
1890
|
+
return null;
|
|
1891
|
+
}
|
|
1892
|
+
function extractMajlisJsonBlock(markdown) {
|
|
1893
|
+
const match = markdown.match(/<!--\s*majlis-json\s*\n([\s\S]*?)-->/);
|
|
1894
|
+
if (!match) return null;
|
|
1895
|
+
return match[1].trim();
|
|
1896
|
+
}
|
|
1897
|
+
function tryParseJson(jsonStr) {
|
|
1907
1898
|
try {
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
1912
|
-
});
|
|
1913
|
-
} catch (err) {
|
|
1914
|
-
throw new Error(`Metrics command failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1915
|
-
}
|
|
1916
|
-
const parsed = parseMetricsOutput(metricsOutput);
|
|
1917
|
-
if (parsed.length === 0) {
|
|
1918
|
-
warn("Metrics command returned no data.");
|
|
1919
|
-
return;
|
|
1899
|
+
return JSON.parse(jsonStr);
|
|
1900
|
+
} catch {
|
|
1901
|
+
return null;
|
|
1920
1902
|
}
|
|
1921
|
-
|
|
1922
|
-
|
|
1903
|
+
}
|
|
1904
|
+
function extractViaPatterns(role, markdown) {
|
|
1905
|
+
const result = {};
|
|
1906
|
+
const decisionPattern = /\[(?:decision|Decision)\].*?(?:description|Description):\s*(.+?)(?:\n|$).*?(?:evidence.?level|Evidence.?Level|level):\s*(proof|test|strong_consensus|consensus|analogy|judgment).*?(?:justification|Justification):\s*(.+?)(?:\n|$)/gis;
|
|
1907
|
+
const decisions = [];
|
|
1908
|
+
const evidenceMarkers = /(?:^|\n)\s*[-*]\s*\*?\*?(?:Decision|DECISION)\*?\*?:\s*(.+?)(?:\n|$).*?(?:Evidence|EVIDENCE|Level):\s*(proof|test|strong_consensus|consensus|analogy|judgment)/gim;
|
|
1909
|
+
let match;
|
|
1910
|
+
while ((match = evidenceMarkers.exec(markdown)) !== null) {
|
|
1911
|
+
decisions.push({
|
|
1912
|
+
description: match[1].trim(),
|
|
1913
|
+
evidence_level: match[2].toLowerCase().trim(),
|
|
1914
|
+
justification: "Extracted via regex \u2014 review"
|
|
1915
|
+
});
|
|
1923
1916
|
}
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1917
|
+
const inlineTagPattern = /\[(proof|test|strong_consensus|consensus|analogy|judgment)\]\s*(.+?)(?:\n|$)/gi;
|
|
1918
|
+
while ((match = inlineTagPattern.exec(markdown)) !== null) {
|
|
1919
|
+
const desc = match[2].trim();
|
|
1920
|
+
if (!decisions.some((d) => d.description === desc)) {
|
|
1921
|
+
decisions.push({
|
|
1922
|
+
description: desc,
|
|
1923
|
+
evidence_level: match[1].toLowerCase(),
|
|
1924
|
+
justification: "Extracted via regex \u2014 review"
|
|
1925
|
+
});
|
|
1930
1926
|
}
|
|
1931
1927
|
}
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
const
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
if (expIdStr !== void 0) {
|
|
1941
|
-
exp = getExperimentById(db, Number(expIdStr));
|
|
1942
|
-
} else {
|
|
1943
|
-
exp = getLatestExperiment(db);
|
|
1928
|
+
if (decisions.length > 0) result.decisions = decisions;
|
|
1929
|
+
const grades = [];
|
|
1930
|
+
const gradePattern = /(?:^|\n)\s*[-*]?\s*\*?\*?(?:Grade|GRADE|Component)\*?\*?.*?(?:component|Component)?\s*[:=]\s*(.+?)(?:\n|,).*?(?:grade|Grade)\s*[:=]\s*(sound|good|weak|rejected)/gim;
|
|
1931
|
+
while ((match = gradePattern.exec(markdown)) !== null) {
|
|
1932
|
+
grades.push({
|
|
1933
|
+
component: match[1].trim(),
|
|
1934
|
+
grade: match[2].toLowerCase().trim()
|
|
1935
|
+
});
|
|
1944
1936
|
}
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1937
|
+
const simpleGradePattern = /(?:^|\n)\s*[-*]\s*\*?\*?(.+?)\*?\*?\s*[:—–-]\s*\*?\*?(sound|good|weak|rejected)\*?\*?/gim;
|
|
1938
|
+
while ((match = simpleGradePattern.exec(markdown)) !== null) {
|
|
1939
|
+
const comp = match[1].trim();
|
|
1940
|
+
if (!grades.some((g) => g.component === comp)) {
|
|
1941
|
+
grades.push({
|
|
1942
|
+
component: comp,
|
|
1943
|
+
grade: match[2].toLowerCase().trim()
|
|
1944
|
+
});
|
|
1945
|
+
}
|
|
1950
1946
|
}
|
|
1951
|
-
if (
|
|
1952
|
-
|
|
1953
|
-
|
|
1947
|
+
if (grades.length > 0) result.grades = grades;
|
|
1948
|
+
const doubts = [];
|
|
1949
|
+
const doubtPattern = /(?:Doubt|DOUBT|Claim doubted|CLAIM)\s*(?:\d+)?[:.]?\s*(.+?)(?:\n|$)[\s\S]*?(?:Severity|SEVERITY)\s*[:=]\s*(minor|moderate|critical)/gim;
|
|
1950
|
+
while ((match = doubtPattern.exec(markdown)) !== null) {
|
|
1951
|
+
doubts.push({
|
|
1952
|
+
claim_doubted: match[1].trim(),
|
|
1953
|
+
evidence_level_of_claim: "unknown",
|
|
1954
|
+
// Don't fabricate — mark as unknown for review
|
|
1955
|
+
evidence_for_doubt: "Extracted via regex \u2014 review original document",
|
|
1956
|
+
severity: match[2].toLowerCase().trim()
|
|
1957
|
+
});
|
|
1954
1958
|
}
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1959
|
+
if (doubts.length > 0) result.doubts = doubts;
|
|
1960
|
+
return result;
|
|
1961
|
+
}
|
|
1962
|
+
async function extractViaHaiku(role, markdown) {
|
|
1963
|
+
try {
|
|
1964
|
+
const truncated = markdown.length > 8e3 ? markdown.slice(0, 8e3) + "\n[truncated]" : markdown;
|
|
1965
|
+
const schema = getExtractionSchema(role);
|
|
1966
|
+
const prompt = `Extract structured data from this ${role} document as JSON. Follow this schema exactly: ${schema}
|
|
1967
|
+
|
|
1968
|
+
Document:
|
|
1969
|
+
${truncated}`;
|
|
1970
|
+
const conversation = (0, import_claude_agent_sdk.query)({
|
|
1971
|
+
prompt,
|
|
1972
|
+
options: {
|
|
1973
|
+
model: "haiku",
|
|
1974
|
+
tools: [],
|
|
1975
|
+
systemPrompt: "You are a JSON extraction assistant. Output only valid JSON matching the requested schema. No markdown, no explanation, just JSON.",
|
|
1976
|
+
permissionMode: "bypassPermissions",
|
|
1977
|
+
allowDangerouslySkipPermissions: true,
|
|
1978
|
+
maxTurns: 1,
|
|
1979
|
+
persistSession: false
|
|
1980
|
+
}
|
|
1981
|
+
});
|
|
1982
|
+
let resultText = "";
|
|
1983
|
+
for await (const message of conversation) {
|
|
1984
|
+
if (message.type === "assistant") {
|
|
1985
|
+
for (const block of message.message.content) {
|
|
1986
|
+
if (block.type === "text") {
|
|
1987
|
+
resultText += block.text;
|
|
1988
|
+
}
|
|
1989
|
+
}
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
return tryParseJson(resultText.trim());
|
|
1993
|
+
} catch (err) {
|
|
1994
|
+
console.warn(`[majlis] Haiku extraction failed for ${role}: ${err instanceof Error ? err.message : String(err)}`);
|
|
1995
|
+
return null;
|
|
1972
1996
|
}
|
|
1973
1997
|
}
|
|
1974
|
-
function
|
|
1975
|
-
|
|
1976
|
-
return `${prefix}${delta.toFixed(4)}`;
|
|
1998
|
+
function hasData(output) {
|
|
1999
|
+
return !!(output.decisions && output.decisions.length > 0 || output.grades && output.grades.length > 0 || output.doubts && output.doubts.length > 0 || output.challenges && output.challenges.length > 0 || output.findings && output.findings.length > 0 || output.guidance || output.reframe || output.compression_report || output.gate_decision);
|
|
1977
2000
|
}
|
|
1978
|
-
|
|
1979
|
-
|
|
1980
|
-
|
|
2001
|
+
function validateForRole(role, output) {
|
|
2002
|
+
const required = ROLE_REQUIRED_FIELDS[role];
|
|
2003
|
+
if (!required) return { valid: true, missing: [] };
|
|
2004
|
+
const missing = required.filter((field) => {
|
|
2005
|
+
const value = output[field];
|
|
2006
|
+
if (value === void 0 || value === null) return true;
|
|
2007
|
+
if (Array.isArray(value) && value.length === 0) return true;
|
|
2008
|
+
return false;
|
|
2009
|
+
});
|
|
2010
|
+
return { valid: missing.length === 0, missing };
|
|
2011
|
+
}
|
|
2012
|
+
var import_claude_agent_sdk;
|
|
2013
|
+
var init_parse = __esm({
|
|
2014
|
+
"src/agents/parse.ts"() {
|
|
1981
2015
|
"use strict";
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
init_queries();
|
|
1985
|
-
init_metrics();
|
|
1986
|
-
init_config();
|
|
1987
|
-
init_format();
|
|
2016
|
+
init_types();
|
|
2017
|
+
import_claude_agent_sdk = require("@anthropic-ai/claude-agent-sdk");
|
|
1988
2018
|
}
|
|
1989
2019
|
});
|
|
1990
2020
|
|
|
1991
|
-
// src/
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
});
|
|
1997
|
-
async function newExperiment(args) {
|
|
1998
|
-
const root = findProjectRoot();
|
|
1999
|
-
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2000
|
-
const hypothesis = args.filter((a) => !a.startsWith("--")).join(" ");
|
|
2001
|
-
if (!hypothesis) {
|
|
2002
|
-
throw new Error('Usage: majlis new "hypothesis"');
|
|
2003
|
-
}
|
|
2004
|
-
const db = getDb(root);
|
|
2005
|
-
const config = loadConfig(root);
|
|
2006
|
-
const slug = slugify(hypothesis);
|
|
2007
|
-
if (getExperimentBySlug(db, slug)) {
|
|
2008
|
-
throw new Error(`Experiment with slug "${slug}" already exists.`);
|
|
2009
|
-
}
|
|
2010
|
-
const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
|
|
2011
|
-
const num = allExps.count + 1;
|
|
2012
|
-
const paddedNum = String(num).padStart(3, "0");
|
|
2013
|
-
const branch = `exp/${paddedNum}-${slug}`;
|
|
2014
|
-
try {
|
|
2015
|
-
(0, import_node_child_process2.execSync)(`git checkout -b ${branch}`, {
|
|
2016
|
-
cwd: root,
|
|
2017
|
-
encoding: "utf-8",
|
|
2018
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
2019
|
-
});
|
|
2020
|
-
info(`Created branch: ${branch}`);
|
|
2021
|
-
} catch (err) {
|
|
2022
|
-
warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
|
|
2021
|
+
// src/agents/spawn.ts
|
|
2022
|
+
function loadAgentDefinition(role, projectRoot) {
|
|
2023
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2024
|
+
const filePath = path4.join(root, ".majlis", "agents", `${role}.md`);
|
|
2025
|
+
if (!fs4.existsSync(filePath)) {
|
|
2026
|
+
throw new Error(`Agent definition not found: ${filePath}`);
|
|
2023
2027
|
}
|
|
2024
|
-
const
|
|
2025
|
-
const
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
const templatePath = path4.join(docsDir, "_TEMPLATE.md");
|
|
2029
|
-
if (fs4.existsSync(templatePath)) {
|
|
2030
|
-
const template = fs4.readFileSync(templatePath, "utf-8");
|
|
2031
|
-
const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, subType ?? "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
|
|
2032
|
-
const logPath = path4.join(docsDir, `${paddedNum}-${slug}.md`);
|
|
2033
|
-
fs4.writeFileSync(logPath, logContent);
|
|
2034
|
-
info(`Created experiment log: docs/experiments/${paddedNum}-${slug}.md`);
|
|
2028
|
+
const content = fs4.readFileSync(filePath, "utf-8");
|
|
2029
|
+
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
2030
|
+
if (!frontmatterMatch) {
|
|
2031
|
+
throw new Error(`Invalid agent definition (missing YAML frontmatter): ${filePath}`);
|
|
2035
2032
|
}
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2033
|
+
const frontmatter = frontmatterMatch[1];
|
|
2034
|
+
const body = frontmatterMatch[2].trim();
|
|
2035
|
+
const name = extractYamlField(frontmatter, "name") ?? role;
|
|
2036
|
+
const model = extractYamlField(frontmatter, "model") ?? "opus";
|
|
2037
|
+
const toolsStr = extractYamlField(frontmatter, "tools") ?? "[]";
|
|
2038
|
+
const tools = toolsStr.replace(/[\[\]]/g, "").split(",").map((t) => t.trim()).filter(Boolean);
|
|
2039
|
+
return { name, model, tools, systemPrompt: body };
|
|
2040
|
+
}
|
|
2041
|
+
function buildCheckpointMessage(role, toolUseCount, maxTurns) {
|
|
2042
|
+
const approxTurn = Math.round(toolUseCount / 2);
|
|
2043
|
+
const header2 = `[MAJLIS CHECKPOINT \u2014 ~${approxTurn} of ${maxTurns} turns used]`;
|
|
2044
|
+
switch (role) {
|
|
2045
|
+
case "builder":
|
|
2046
|
+
return `${header2}
|
|
2047
|
+
Reminder: ONE code change per cycle.
|
|
2048
|
+
- Have you run the benchmark? YES \u2192 document results + output JSON + STOP.
|
|
2049
|
+
- If NO \u2192 run it now, then wrap up.
|
|
2050
|
+
Do NOT start a second change or investigate unrelated failures.`;
|
|
2051
|
+
case "verifier":
|
|
2052
|
+
return `${header2}
|
|
2053
|
+
AT MOST 3 diagnostic scripts total.
|
|
2054
|
+
- If \u22653 scripts run \u2192 produce grades + output JSON now.
|
|
2055
|
+
- Trust framework metrics. Do not re-derive from raw data.`;
|
|
2056
|
+
case "critic":
|
|
2057
|
+
return `${header2}
|
|
2058
|
+
Focus on the SINGLE weakest assumption.
|
|
2059
|
+
- Have you identified the core doubt? YES \u2192 write it up + output JSON.
|
|
2060
|
+
- Do not enumerate every possible concern \u2014 pick the most dangerous one.`;
|
|
2061
|
+
case "adversary":
|
|
2062
|
+
return `${header2}
|
|
2063
|
+
Design ONE targeted challenge, not a test suite.
|
|
2064
|
+
- Have you defined the challenge? YES \u2192 write it up + output JSON.
|
|
2065
|
+
- Focus on what would DISPROVE the hypothesis, not general testing.`;
|
|
2066
|
+
case "compressor":
|
|
2067
|
+
return `${header2}
|
|
2068
|
+
You may ONLY write to docs/synthesis/.
|
|
2069
|
+
- Have you updated current.md, fragility.md, dead-ends.md?
|
|
2070
|
+
- If yes \u2192 output compression report JSON.
|
|
2071
|
+
- Do NOT write to MEMORY.md or files outside docs/synthesis/.`;
|
|
2072
|
+
default:
|
|
2073
|
+
return `${header2}
|
|
2074
|
+
Check: is your core task done? If yes, wrap up and output JSON.`;
|
|
2075
|
+
}
|
|
2076
|
+
}
|
|
2077
|
+
function buildPreToolUseGuards(role) {
|
|
2078
|
+
if (role === "compressor") {
|
|
2079
|
+
const guardHook = async (input) => {
|
|
2080
|
+
const toolInput = input.tool_input ?? {};
|
|
2081
|
+
const filePath = toolInput.file_path ?? "";
|
|
2082
|
+
if (filePath && !filePath.includes("/docs/synthesis/")) {
|
|
2083
|
+
return {
|
|
2084
|
+
decision: "block",
|
|
2085
|
+
reason: `Compressor may only write to docs/synthesis/. Blocked: ${filePath}`
|
|
2086
|
+
};
|
|
2087
|
+
}
|
|
2088
|
+
return {};
|
|
2089
|
+
};
|
|
2090
|
+
return [
|
|
2091
|
+
{ matcher: "Write", hooks: [guardHook] },
|
|
2092
|
+
{ matcher: "Edit", hooks: [guardHook] }
|
|
2093
|
+
];
|
|
2044
2094
|
}
|
|
2095
|
+
return void 0;
|
|
2045
2096
|
}
|
|
2046
|
-
|
|
2047
|
-
const
|
|
2048
|
-
|
|
2049
|
-
const
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2097
|
+
function buildAgentHooks(role, maxTurns) {
|
|
2098
|
+
const result = {};
|
|
2099
|
+
let hasHooks = false;
|
|
2100
|
+
const interval = CHECKPOINT_INTERVAL[role];
|
|
2101
|
+
if (interval) {
|
|
2102
|
+
let toolUseCount = 0;
|
|
2103
|
+
const checkpointHook = async () => {
|
|
2104
|
+
toolUseCount++;
|
|
2105
|
+
if (toolUseCount % interval === 0) {
|
|
2106
|
+
const msg = buildCheckpointMessage(role, toolUseCount, maxTurns);
|
|
2107
|
+
return {
|
|
2108
|
+
hookSpecificOutput: {
|
|
2109
|
+
hookEventName: "PostToolUse",
|
|
2110
|
+
additionalContext: msg
|
|
2111
|
+
}
|
|
2112
|
+
};
|
|
2113
|
+
}
|
|
2114
|
+
return {};
|
|
2115
|
+
};
|
|
2116
|
+
result.PostToolUse = [{ hooks: [checkpointHook] }];
|
|
2117
|
+
hasHooks = true;
|
|
2058
2118
|
}
|
|
2059
|
-
const
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
exp.id,
|
|
2064
|
-
exp.hypothesis ?? exp.slug,
|
|
2065
|
-
reason,
|
|
2066
|
-
`Reverted: ${reason}`,
|
|
2067
|
-
exp.sub_type,
|
|
2068
|
-
category
|
|
2069
|
-
);
|
|
2070
|
-
updateExperimentStatus(db, exp.id, "dead_end");
|
|
2071
|
-
try {
|
|
2072
|
-
const currentBranch = (0, import_node_child_process2.execSync)("git rev-parse --abbrev-ref HEAD", {
|
|
2073
|
-
cwd: root,
|
|
2074
|
-
encoding: "utf-8"
|
|
2075
|
-
}).trim();
|
|
2076
|
-
if (currentBranch === exp.branch) {
|
|
2077
|
-
(0, import_node_child_process2.execSync)("git checkout main 2>/dev/null || git checkout master", {
|
|
2078
|
-
cwd: root,
|
|
2079
|
-
encoding: "utf-8",
|
|
2080
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
2081
|
-
});
|
|
2082
|
-
}
|
|
2083
|
-
} catch {
|
|
2084
|
-
warn("Could not switch git branches \u2014 do this manually.");
|
|
2119
|
+
const guards = buildPreToolUseGuards(role);
|
|
2120
|
+
if (guards) {
|
|
2121
|
+
result.PreToolUse = guards;
|
|
2122
|
+
hasHooks = true;
|
|
2085
2123
|
}
|
|
2086
|
-
|
|
2124
|
+
return hasHooks ? result : void 0;
|
|
2087
2125
|
}
|
|
2088
|
-
function
|
|
2089
|
-
|
|
2126
|
+
function extractYamlField(yaml, field) {
|
|
2127
|
+
const match = yaml.match(new RegExp(`^${field}:\\s*(.+)$`, "m"));
|
|
2128
|
+
return match ? match[1].trim() : null;
|
|
2090
2129
|
}
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
import_node_child_process2 = require("child_process");
|
|
2098
|
-
init_connection();
|
|
2099
|
-
init_queries();
|
|
2100
|
-
init_config();
|
|
2101
|
-
init_format();
|
|
2102
|
-
}
|
|
2103
|
-
});
|
|
2130
|
+
async function spawnAgent(role, context, projectRoot) {
|
|
2131
|
+
const agentDef = loadAgentDefinition(role, projectRoot);
|
|
2132
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2133
|
+
const taskPrompt = context.taskPrompt ?? `Perform your role as ${agentDef.name}.`;
|
|
2134
|
+
const contextJson = JSON.stringify(context);
|
|
2135
|
+
const prompt = `Here is your context:
|
|
2104
2136
|
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
}
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2137
|
+
\`\`\`json
|
|
2138
|
+
${contextJson}
|
|
2139
|
+
\`\`\`
|
|
2140
|
+
|
|
2141
|
+
${taskPrompt}`;
|
|
2142
|
+
const turns = ROLE_MAX_TURNS[role] ?? 15;
|
|
2143
|
+
console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
|
|
2144
|
+
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2145
|
+
prompt,
|
|
2146
|
+
model: agentDef.model,
|
|
2147
|
+
tools: agentDef.tools,
|
|
2148
|
+
systemPrompt: agentDef.systemPrompt,
|
|
2149
|
+
cwd: root,
|
|
2150
|
+
maxTurns: turns,
|
|
2151
|
+
label: role,
|
|
2152
|
+
role
|
|
2153
|
+
});
|
|
2154
|
+
console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
|
|
2155
|
+
const artifactPath = writeArtifact(role, context, markdown, root);
|
|
2156
|
+
if (artifactPath) {
|
|
2157
|
+
console.log(`[${role}] Artifact written to ${artifactPath}`);
|
|
2114
2158
|
}
|
|
2115
|
-
const
|
|
2116
|
-
if (
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
if (!intent) {
|
|
2121
|
-
throw new Error('Usage: majlis session start "intent"');
|
|
2159
|
+
const structured = await extractStructuredData(role, markdown);
|
|
2160
|
+
if (structured) {
|
|
2161
|
+
const { valid, missing } = validateForRole(role, structured);
|
|
2162
|
+
if (!valid) {
|
|
2163
|
+
console.warn(`[${role}] Output missing expected fields: ${missing.join(", ")}`);
|
|
2122
2164
|
}
|
|
2123
|
-
const existing = getActiveSession(db);
|
|
2124
|
-
if (existing) {
|
|
2125
|
-
warn(`Session already active: "${existing.intent}" (started ${existing.started_at})`);
|
|
2126
|
-
warn("End it first with `majlis session end`.");
|
|
2127
|
-
return;
|
|
2128
|
-
}
|
|
2129
|
-
const latestExp = getLatestExperiment(db);
|
|
2130
|
-
const sess = startSession(db, intent, latestExp?.id ?? null);
|
|
2131
|
-
success(`Session started: "${intent}" (id: ${sess.id})`);
|
|
2132
|
-
if (latestExp) {
|
|
2133
|
-
info(`Linked to experiment: ${latestExp.slug} (${latestExp.status})`);
|
|
2134
|
-
}
|
|
2135
|
-
} else {
|
|
2136
|
-
const active = getActiveSession(db);
|
|
2137
|
-
if (!active) {
|
|
2138
|
-
throw new Error("No active session to end.");
|
|
2139
|
-
}
|
|
2140
|
-
const accomplished = getFlagValue(args, "--accomplished") ?? null;
|
|
2141
|
-
const unfinished = getFlagValue(args, "--unfinished") ?? null;
|
|
2142
|
-
const fragility = getFlagValue(args, "--fragility") ?? null;
|
|
2143
|
-
endSession(db, active.id, accomplished, unfinished, fragility);
|
|
2144
|
-
success(`Session ended: "${active.intent}"`);
|
|
2145
|
-
if (accomplished) info(`Accomplished: ${accomplished}`);
|
|
2146
|
-
if (unfinished) info(`Unfinished: ${unfinished}`);
|
|
2147
|
-
if (fragility) warn(`New fragility: ${fragility}`);
|
|
2148
2165
|
}
|
|
2166
|
+
return { output: markdown, structured, truncated };
|
|
2149
2167
|
}
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
init_config();
|
|
2156
|
-
init_format();
|
|
2157
|
-
}
|
|
2158
|
-
});
|
|
2168
|
+
async function spawnSynthesiser(context, projectRoot) {
|
|
2169
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2170
|
+
const contextJson = JSON.stringify(context);
|
|
2171
|
+
const taskPrompt = context.taskPrompt ?? "Synthesise the findings into actionable builder guidance.";
|
|
2172
|
+
const prompt = `Here is your context:
|
|
2159
2173
|
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
|
|
2164
|
-
}
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
return queryCircuitBreakers(db, root, isJson);
|
|
2180
|
-
case "check-commit":
|
|
2181
|
-
return checkCommit(db);
|
|
2182
|
-
}
|
|
2174
|
+
\`\`\`json
|
|
2175
|
+
${contextJson}
|
|
2176
|
+
\`\`\`
|
|
2177
|
+
|
|
2178
|
+
${taskPrompt}`;
|
|
2179
|
+
const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
|
|
2180
|
+
console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
|
|
2181
|
+
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2182
|
+
prompt,
|
|
2183
|
+
model: "sonnet",
|
|
2184
|
+
tools: ["Read", "Glob", "Grep"],
|
|
2185
|
+
systemPrompt,
|
|
2186
|
+
cwd: root,
|
|
2187
|
+
maxTurns: 5,
|
|
2188
|
+
label: "synthesiser",
|
|
2189
|
+
role: "synthesiser"
|
|
2190
|
+
});
|
|
2191
|
+
console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
2192
|
+
return { output: markdown, structured: { guidance: markdown }, truncated };
|
|
2183
2193
|
}
|
|
2184
|
-
function
|
|
2185
|
-
const
|
|
2186
|
-
const
|
|
2187
|
-
|
|
2188
|
-
const
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2194
|
+
async function spawnRecovery(role, partialOutput, context, projectRoot) {
|
|
2195
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2196
|
+
const expSlug = context.experiment?.slug ?? "unknown";
|
|
2197
|
+
console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
|
|
2198
|
+
const expDocPath = path4.join(
|
|
2199
|
+
root,
|
|
2200
|
+
"docs",
|
|
2201
|
+
"experiments",
|
|
2202
|
+
`${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
|
|
2203
|
+
);
|
|
2204
|
+
const templatePath = path4.join(root, "docs", "experiments", "_TEMPLATE.md");
|
|
2205
|
+
const template = fs4.existsSync(templatePath) ? fs4.readFileSync(templatePath, "utf-8") : "";
|
|
2206
|
+
const currentDoc = fs4.existsSync(expDocPath) ? fs4.readFileSync(expDocPath, "utf-8") : "";
|
|
2207
|
+
const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
|
|
2208
|
+
|
|
2209
|
+
Here is the partial agent output (reasoning + tool calls):
|
|
2210
|
+
<partial_output>
|
|
2211
|
+
${partialOutput.slice(-3e3)}
|
|
2212
|
+
</partial_output>
|
|
2213
|
+
|
|
2214
|
+
Here is the current experiment doc:
|
|
2215
|
+
<current_doc>
|
|
2216
|
+
${currentDoc}
|
|
2217
|
+
</current_doc>
|
|
2218
|
+
|
|
2219
|
+
Here is the template that the experiment doc should follow:
|
|
2220
|
+
<template>
|
|
2221
|
+
${template}
|
|
2222
|
+
</template>
|
|
2223
|
+
|
|
2224
|
+
Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
|
|
2225
|
+
- Keep any valid content from the current doc
|
|
2226
|
+
- Fill in what you can infer from the partial output
|
|
2227
|
+
- Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
|
|
2228
|
+
- The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
|
|
2229
|
+
- Do NOT include agent reasoning or thinking \u2014 only structured experiment content
|
|
2230
|
+
- Be concise. This is cleanup, not new work.`;
|
|
2231
|
+
const { text: _markdown } = await runQuery({
|
|
2232
|
+
prompt,
|
|
2233
|
+
model: "haiku",
|
|
2234
|
+
tools: ["Read", "Write"],
|
|
2235
|
+
systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
|
|
2236
|
+
cwd: root,
|
|
2237
|
+
maxTurns: 5,
|
|
2238
|
+
label: "recovery",
|
|
2239
|
+
role: "recovery"
|
|
2240
|
+
});
|
|
2241
|
+
console.log(`[recovery] Cleanup complete for ${expSlug}.`);
|
|
2206
2242
|
}
|
|
2207
|
-
function
|
|
2208
|
-
|
|
2209
|
-
const
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2243
|
+
async function runQuery(opts) {
|
|
2244
|
+
let truncated = false;
|
|
2245
|
+
const tag = opts.label ?? "majlis";
|
|
2246
|
+
const hooks = opts.role ? buildAgentHooks(opts.role, opts.maxTurns ?? 15) : void 0;
|
|
2247
|
+
const conversation = (0, import_claude_agent_sdk2.query)({
|
|
2248
|
+
prompt: opts.prompt,
|
|
2249
|
+
options: {
|
|
2250
|
+
model: opts.model,
|
|
2251
|
+
tools: opts.tools,
|
|
2252
|
+
systemPrompt: {
|
|
2253
|
+
type: "preset",
|
|
2254
|
+
preset: "claude_code",
|
|
2255
|
+
append: opts.systemPrompt
|
|
2256
|
+
},
|
|
2257
|
+
cwd: opts.cwd,
|
|
2258
|
+
permissionMode: "bypassPermissions",
|
|
2259
|
+
allowDangerouslySkipPermissions: true,
|
|
2260
|
+
maxTurns: opts.maxTurns ?? 15,
|
|
2261
|
+
persistSession: false,
|
|
2262
|
+
settingSources: ["project"],
|
|
2263
|
+
hooks
|
|
2264
|
+
}
|
|
2265
|
+
});
|
|
2266
|
+
const textParts = [];
|
|
2267
|
+
let costUsd = 0;
|
|
2268
|
+
let turnCount = 0;
|
|
2269
|
+
for await (const message of conversation) {
|
|
2270
|
+
if (message.type === "assistant") {
|
|
2271
|
+
turnCount++;
|
|
2272
|
+
let hasText = false;
|
|
2273
|
+
for (const block of message.message.content) {
|
|
2274
|
+
if (block.type === "text") {
|
|
2275
|
+
textParts.push(block.text);
|
|
2276
|
+
hasText = true;
|
|
2277
|
+
} else if (block.type === "tool_use") {
|
|
2278
|
+
const toolName = block.name ?? "tool";
|
|
2279
|
+
const input = block.input ?? {};
|
|
2280
|
+
const detail = formatToolDetail(toolName, input);
|
|
2281
|
+
process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
|
|
2282
|
+
`);
|
|
2283
|
+
}
|
|
2284
|
+
}
|
|
2285
|
+
if (hasText) {
|
|
2286
|
+
const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
|
|
2287
|
+
if (preview) {
|
|
2288
|
+
process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
|
|
2289
|
+
`);
|
|
2290
|
+
}
|
|
2291
|
+
}
|
|
2292
|
+
} else if (message.type === "tool_progress") {
|
|
2293
|
+
const elapsed = Math.round(message.elapsed_time_seconds);
|
|
2294
|
+
if (elapsed > 0 && elapsed % 5 === 0) {
|
|
2295
|
+
process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
|
|
2296
|
+
`);
|
|
2297
|
+
}
|
|
2298
|
+
} else if (message.type === "result") {
|
|
2299
|
+
if (message.subtype === "success") {
|
|
2300
|
+
costUsd = message.total_cost_usd;
|
|
2301
|
+
} else if (message.subtype === "error_max_turns") {
|
|
2302
|
+
truncated = true;
|
|
2303
|
+
costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
|
|
2304
|
+
console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
|
|
2305
|
+
} else {
|
|
2306
|
+
const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
|
|
2307
|
+
throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
|
|
2308
|
+
}
|
|
2309
|
+
}
|
|
2225
2310
|
}
|
|
2226
|
-
|
|
2227
|
-
const rows = deadEnds.map((d) => [
|
|
2228
|
-
String(d.id),
|
|
2229
|
-
d.sub_type ?? "\u2014",
|
|
2230
|
-
d.approach.slice(0, 40) + (d.approach.length > 40 ? "..." : ""),
|
|
2231
|
-
d.structural_constraint.slice(0, 40) + (d.structural_constraint.length > 40 ? "..." : "")
|
|
2232
|
-
]);
|
|
2233
|
-
console.log(table(["ID", "Sub-Type", "Approach", "Constraint"], rows));
|
|
2311
|
+
return { text: textParts.join("\n\n"), costUsd, truncated };
|
|
2234
2312
|
}
|
|
2235
|
-
function
|
|
2236
|
-
const
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2313
|
+
async function generateSlug(hypothesis, projectRoot) {
|
|
2314
|
+
const fallback = hypothesis.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 30).replace(/-$/, "");
|
|
2315
|
+
try {
|
|
2316
|
+
const { text } = await runQuery({
|
|
2317
|
+
prompt: `Generate a short, descriptive git branch slug (2-4 words, lowercase, hyphen-separated) for this experiment hypothesis:
|
|
2318
|
+
|
|
2319
|
+
"${hypothesis.slice(0, 500)}"
|
|
2320
|
+
|
|
2321
|
+
Output ONLY the slug, nothing else. Examples: uv-containment-filter, skip-degenerate-faces, fix-edge-sewing-order`,
|
|
2322
|
+
model: "haiku",
|
|
2323
|
+
tools: [],
|
|
2324
|
+
systemPrompt: "Output only a short hyphenated slug. No explanation, no quotes, no punctuation except hyphens.",
|
|
2325
|
+
cwd: projectRoot,
|
|
2326
|
+
maxTurns: 1,
|
|
2327
|
+
label: "slug",
|
|
2328
|
+
role: "slug"
|
|
2329
|
+
});
|
|
2330
|
+
const slug = text.trim().toLowerCase().replace(/[^a-z0-9-]+/g, "").replace(/^-|-$/g, "").slice(0, 40);
|
|
2331
|
+
return slug.length >= 3 ? slug : fallback;
|
|
2332
|
+
} catch {
|
|
2333
|
+
return fallback;
|
|
2245
2334
|
}
|
|
2246
|
-
header("Fragility Map");
|
|
2247
|
-
console.log(content);
|
|
2248
2335
|
}
|
|
2249
|
-
function
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2336
|
+
function formatToolDetail(toolName, input) {
|
|
2337
|
+
switch (toolName) {
|
|
2338
|
+
case "Read":
|
|
2339
|
+
return input.file_path ? ` ${input.file_path}` : "";
|
|
2340
|
+
case "Write":
|
|
2341
|
+
return input.file_path ? ` \u2192 ${input.file_path}` : "";
|
|
2342
|
+
case "Edit":
|
|
2343
|
+
return input.file_path ? ` ${input.file_path}` : "";
|
|
2344
|
+
case "Glob":
|
|
2345
|
+
return input.pattern ? ` ${input.pattern}` : "";
|
|
2346
|
+
case "Grep":
|
|
2347
|
+
return input.pattern ? ` /${input.pattern}/` : "";
|
|
2348
|
+
case "Bash":
|
|
2349
|
+
return input.command ? ` $ ${input.command.slice(0, 80)}` : "";
|
|
2350
|
+
case "WebSearch":
|
|
2351
|
+
return input.query ? ` "${input.query}"` : "";
|
|
2352
|
+
default:
|
|
2353
|
+
return "";
|
|
2258
2354
|
}
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2355
|
+
}
|
|
2356
|
+
function writeArtifact(role, context, markdown, projectRoot) {
|
|
2357
|
+
const dirMap = {
|
|
2358
|
+
builder: "docs/experiments",
|
|
2359
|
+
critic: "docs/doubts",
|
|
2360
|
+
adversary: "docs/challenges",
|
|
2361
|
+
verifier: "docs/verification",
|
|
2362
|
+
reframer: "docs/reframes",
|
|
2363
|
+
compressor: "docs/synthesis",
|
|
2364
|
+
scout: "docs/rihla"
|
|
2365
|
+
};
|
|
2366
|
+
const dir = dirMap[role];
|
|
2367
|
+
if (!dir) return null;
|
|
2368
|
+
if (role === "builder" || role === "compressor") return null;
|
|
2369
|
+
const fullDir = path4.join(projectRoot, dir);
|
|
2370
|
+
if (!fs4.existsSync(fullDir)) {
|
|
2371
|
+
fs4.mkdirSync(fullDir, { recursive: true });
|
|
2262
2372
|
}
|
|
2263
|
-
|
|
2264
|
-
const
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
String(h.metric_value),
|
|
2270
|
-
h.captured_at
|
|
2271
|
-
]);
|
|
2272
|
-
console.log(table(["Exp", "Slug", "Phase", "Metric", "Value", "Captured"], rows));
|
|
2373
|
+
const expSlug = context.experiment?.slug ?? "general";
|
|
2374
|
+
const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
|
|
2375
|
+
const filename = `${nextNum}-${role}-${expSlug}.md`;
|
|
2376
|
+
const target = path4.join(fullDir, filename);
|
|
2377
|
+
fs4.writeFileSync(target, markdown);
|
|
2378
|
+
return target;
|
|
2273
2379
|
}
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2380
|
+
var fs4, path4, import_claude_agent_sdk2, ROLE_MAX_TURNS, CHECKPOINT_INTERVAL, DIM2, RESET2, CYAN2;
|
|
2381
|
+
var init_spawn = __esm({
|
|
2382
|
+
"src/agents/spawn.ts"() {
|
|
2383
|
+
"use strict";
|
|
2384
|
+
fs4 = __toESM(require("fs"));
|
|
2385
|
+
path4 = __toESM(require("path"));
|
|
2386
|
+
import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
|
|
2387
|
+
init_parse();
|
|
2388
|
+
init_connection();
|
|
2389
|
+
ROLE_MAX_TURNS = {
|
|
2390
|
+
builder: 50,
|
|
2391
|
+
critic: 30,
|
|
2392
|
+
adversary: 30,
|
|
2393
|
+
verifier: 50,
|
|
2394
|
+
compressor: 30,
|
|
2395
|
+
reframer: 20,
|
|
2396
|
+
scout: 20,
|
|
2397
|
+
gatekeeper: 10
|
|
2398
|
+
};
|
|
2399
|
+
CHECKPOINT_INTERVAL = {
|
|
2400
|
+
builder: 15,
|
|
2401
|
+
verifier: 12,
|
|
2402
|
+
critic: 15,
|
|
2403
|
+
adversary: 15,
|
|
2404
|
+
compressor: 15
|
|
2405
|
+
};
|
|
2406
|
+
DIM2 = "\x1B[2m";
|
|
2407
|
+
RESET2 = "\x1B[0m";
|
|
2408
|
+
CYAN2 = "\x1B[36m";
|
|
2280
2409
|
}
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2410
|
+
});
|
|
2411
|
+
|
|
2412
|
+
// src/metrics.ts
|
|
2413
|
+
function compareMetrics(db, experimentId, config) {
|
|
2414
|
+
const before = getMetricsByExperimentAndPhase(db, experimentId, "before");
|
|
2415
|
+
const after = getMetricsByExperimentAndPhase(db, experimentId, "after");
|
|
2416
|
+
const fixtures = new Set([...before, ...after].map((m) => m.fixture));
|
|
2417
|
+
const trackedMetrics = Object.keys(config.metrics.tracked);
|
|
2418
|
+
const comparisons = [];
|
|
2419
|
+
for (const fixture of fixtures) {
|
|
2420
|
+
for (const metric of trackedMetrics) {
|
|
2421
|
+
const b = before.find((m) => m.fixture === fixture && m.metric_name === metric);
|
|
2422
|
+
const a = after.find((m) => m.fixture === fixture && m.metric_name === metric);
|
|
2423
|
+
if (b && a) {
|
|
2424
|
+
const direction = config.metrics.tracked[metric]?.direction ?? "lower_is_better";
|
|
2425
|
+
const regression = isRegression(b.metric_value, a.metric_value, direction);
|
|
2426
|
+
comparisons.push({
|
|
2427
|
+
fixture,
|
|
2428
|
+
metric,
|
|
2429
|
+
before: b.metric_value,
|
|
2430
|
+
after: a.metric_value,
|
|
2431
|
+
delta: a.metric_value - b.metric_value,
|
|
2432
|
+
regression
|
|
2433
|
+
});
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2284
2436
|
}
|
|
2285
|
-
|
|
2286
|
-
const rows = states.map((s) => [
|
|
2287
|
-
s.sub_type,
|
|
2288
|
-
String(s.failure_count),
|
|
2289
|
-
String(config.cycle.circuit_breaker_threshold),
|
|
2290
|
-
s.tripped ? red("TRIPPED") : green("OK")
|
|
2291
|
-
]);
|
|
2292
|
-
console.log(table(["Sub-Type", "Failures", "Threshold", "Status"], rows));
|
|
2437
|
+
return comparisons;
|
|
2293
2438
|
}
|
|
2294
|
-
function
|
|
2295
|
-
|
|
2296
|
-
|
|
2297
|
-
|
|
2298
|
-
|
|
2439
|
+
function isRegression(before, after, direction) {
|
|
2440
|
+
switch (direction) {
|
|
2441
|
+
case "lower_is_better":
|
|
2442
|
+
return after > before;
|
|
2443
|
+
case "higher_is_better":
|
|
2444
|
+
return after < before;
|
|
2445
|
+
case "closer_to_gt":
|
|
2446
|
+
return false;
|
|
2447
|
+
default:
|
|
2448
|
+
return false;
|
|
2299
2449
|
}
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2450
|
+
}
|
|
2451
|
+
function parseMetricsOutput(jsonStr) {
|
|
2452
|
+
const data = JSON.parse(jsonStr);
|
|
2453
|
+
const results = [];
|
|
2454
|
+
if (data.fixtures && typeof data.fixtures === "object") {
|
|
2455
|
+
for (const [fixture, metrics] of Object.entries(data.fixtures)) {
|
|
2456
|
+
for (const [metricName, metricValue] of Object.entries(metrics)) {
|
|
2457
|
+
if (typeof metricValue === "number") {
|
|
2458
|
+
results.push({ fixture, metric_name: metricName, metric_value: metricValue });
|
|
2459
|
+
}
|
|
2306
2460
|
}
|
|
2307
|
-
} catch {
|
|
2308
|
-
}
|
|
2309
|
-
}
|
|
2310
|
-
const active = listActiveExperiments(db);
|
|
2311
|
-
const unverified = active.filter(
|
|
2312
|
-
(e) => !["merged", "dead_end", "verified", "resolved", "compressed"].includes(e.status)
|
|
2313
|
-
);
|
|
2314
|
-
if (unverified.length > 0) {
|
|
2315
|
-
console.error(`[majlis] ${unverified.length} unverified experiment(s):`);
|
|
2316
|
-
for (const e of unverified) {
|
|
2317
|
-
console.error(` - ${e.slug} (${e.status})`);
|
|
2318
2461
|
}
|
|
2319
|
-
process.exit(1);
|
|
2320
2462
|
}
|
|
2463
|
+
return results;
|
|
2321
2464
|
}
|
|
2322
|
-
var
|
|
2323
|
-
|
|
2324
|
-
"src/commands/query.ts"() {
|
|
2465
|
+
var init_metrics = __esm({
|
|
2466
|
+
"src/metrics.ts"() {
|
|
2325
2467
|
"use strict";
|
|
2326
|
-
fs5 = __toESM(require("fs"));
|
|
2327
|
-
path5 = __toESM(require("path"));
|
|
2328
|
-
init_connection();
|
|
2329
2468
|
init_queries();
|
|
2330
|
-
init_config();
|
|
2331
|
-
init_format();
|
|
2332
2469
|
}
|
|
2333
2470
|
});
|
|
2334
2471
|
|
|
2335
|
-
// src/
|
|
2336
|
-
var
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "gated" /* GATED */],
|
|
2342
|
-
["reframed" /* REFRAMED */]: ["gated" /* GATED */],
|
|
2343
|
-
["gated" /* GATED */]: ["building" /* BUILDING */, "gated" /* GATED */],
|
|
2344
|
-
// self-loop for rejected hypotheses
|
|
2345
|
-
["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
|
|
2346
|
-
// self-loop for retry after truncation
|
|
2347
|
-
["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
|
|
2348
|
-
["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
|
|
2349
|
-
["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
|
|
2350
|
-
["scouted" /* SCOUTED */]: ["verifying" /* VERIFYING */],
|
|
2351
|
-
["verifying" /* VERIFYING */]: ["verified" /* VERIFIED */],
|
|
2352
|
-
["verified" /* VERIFIED */]: ["resolved" /* RESOLVED */],
|
|
2353
|
-
["resolved" /* RESOLVED */]: ["compressed" /* COMPRESSED */, "building" /* BUILDING */],
|
|
2354
|
-
// cycle-back skips gate
|
|
2355
|
-
["compressed" /* COMPRESSED */]: ["merged" /* MERGED */, "building" /* BUILDING */],
|
|
2356
|
-
// cycle-back skips gate
|
|
2357
|
-
["merged" /* MERGED */]: [],
|
|
2358
|
-
["dead_end" /* DEAD_END */]: []
|
|
2359
|
-
};
|
|
2360
|
-
GRADE_ORDER = ["rejected", "weak", "good", "sound"];
|
|
2361
|
-
}
|
|
2472
|
+
// src/commands/measure.ts
|
|
2473
|
+
var measure_exports = {};
|
|
2474
|
+
__export(measure_exports, {
|
|
2475
|
+
baseline: () => baseline,
|
|
2476
|
+
compare: () => compare,
|
|
2477
|
+
measure: () => measure
|
|
2362
2478
|
});
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
function transition(current, target) {
|
|
2366
|
-
const valid = TRANSITIONS[current];
|
|
2367
|
-
if (!valid.includes(target)) {
|
|
2368
|
-
throw new Error(
|
|
2369
|
-
`Invalid transition: ${current} \u2192 ${target}. Valid: [${valid.join(", ")}]`
|
|
2370
|
-
);
|
|
2371
|
-
}
|
|
2372
|
-
return target;
|
|
2373
|
-
}
|
|
2374
|
-
function validNext(current) {
|
|
2375
|
-
return TRANSITIONS[current];
|
|
2479
|
+
async function baseline(args) {
|
|
2480
|
+
await captureMetrics("before", args);
|
|
2376
2481
|
}
|
|
2377
|
-
function
|
|
2378
|
-
|
|
2482
|
+
async function measure(args) {
|
|
2483
|
+
await captureMetrics("after", args);
|
|
2379
2484
|
}
|
|
2380
|
-
function
|
|
2381
|
-
|
|
2382
|
-
|
|
2485
|
+
async function captureMetrics(phase, args) {
|
|
2486
|
+
const root = findProjectRoot();
|
|
2487
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2488
|
+
const db = getDb(root);
|
|
2489
|
+
const config = loadConfig(root);
|
|
2490
|
+
const expIdStr = getFlagValue(args, "--experiment");
|
|
2491
|
+
let exp;
|
|
2492
|
+
if (expIdStr !== void 0) {
|
|
2493
|
+
exp = getExperimentById(db, Number(expIdStr));
|
|
2494
|
+
} else {
|
|
2495
|
+
exp = getLatestExperiment(db);
|
|
2383
2496
|
}
|
|
2384
|
-
|
|
2385
|
-
if (
|
|
2386
|
-
|
|
2497
|
+
if (!exp) throw new Error('No active experiment. Run `majlis new "hypothesis"` first.');
|
|
2498
|
+
if (config.build.pre_measure) {
|
|
2499
|
+
info(`Running pre-measure: ${config.build.pre_measure}`);
|
|
2500
|
+
try {
|
|
2501
|
+
(0, import_node_child_process.execSync)(config.build.pre_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
|
|
2502
|
+
} catch {
|
|
2503
|
+
warn("Pre-measure command failed \u2014 continuing anyway.");
|
|
2504
|
+
}
|
|
2387
2505
|
}
|
|
2388
|
-
if (
|
|
2389
|
-
|
|
2506
|
+
if (!config.metrics.command) {
|
|
2507
|
+
throw new Error("No metrics.command configured in .majlis/config.json");
|
|
2390
2508
|
}
|
|
2391
|
-
|
|
2392
|
-
|
|
2509
|
+
info(`Running metrics: ${config.metrics.command}`);
|
|
2510
|
+
let metricsOutput;
|
|
2511
|
+
try {
|
|
2512
|
+
metricsOutput = (0, import_node_child_process.execSync)(config.metrics.command, {
|
|
2513
|
+
cwd: root,
|
|
2514
|
+
encoding: "utf-8",
|
|
2515
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2516
|
+
});
|
|
2517
|
+
} catch (err) {
|
|
2518
|
+
throw new Error(`Metrics command failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2393
2519
|
}
|
|
2394
|
-
|
|
2395
|
-
|
|
2520
|
+
const parsed = parseMetricsOutput(metricsOutput);
|
|
2521
|
+
if (parsed.length === 0) {
|
|
2522
|
+
warn("Metrics command returned no data.");
|
|
2523
|
+
return;
|
|
2396
2524
|
}
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2525
|
+
for (const m of parsed) {
|
|
2526
|
+
insertMetric(db, exp.id, phase, m.fixture, m.metric_name, m.metric_value);
|
|
2527
|
+
}
|
|
2528
|
+
success(`Captured ${parsed.length} metric(s) for ${exp.slug} (phase: ${phase})`);
|
|
2529
|
+
if (config.build.post_measure) {
|
|
2530
|
+
try {
|
|
2531
|
+
(0, import_node_child_process.execSync)(config.build.post_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
|
|
2532
|
+
} catch {
|
|
2533
|
+
warn("Post-measure command failed.");
|
|
2400
2534
|
}
|
|
2401
2535
|
}
|
|
2402
|
-
return valid[0];
|
|
2403
2536
|
}
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2537
|
+
async function compare(args, isJson) {
|
|
2538
|
+
const root = findProjectRoot();
|
|
2539
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2540
|
+
const db = getDb(root);
|
|
2541
|
+
const config = loadConfig(root);
|
|
2542
|
+
const expIdStr = getFlagValue(args, "--experiment");
|
|
2543
|
+
let exp;
|
|
2544
|
+
if (expIdStr !== void 0) {
|
|
2545
|
+
exp = getExperimentById(db, Number(expIdStr));
|
|
2546
|
+
} else {
|
|
2547
|
+
exp = getLatestExperiment(db);
|
|
2408
2548
|
}
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2415
|
-
|
|
2416
|
-
|
|
2417
|
-
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
|
|
2421
|
-
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
|
|
2429
|
-
|
|
2430
|
-
|
|
2431
|
-
|
|
2549
|
+
if (!exp) throw new Error("No active experiment.");
|
|
2550
|
+
const comparisons = compareMetrics(db, exp.id, config);
|
|
2551
|
+
if (comparisons.length === 0) {
|
|
2552
|
+
warn(`No before/after metrics to compare for ${exp.slug}. Run baseline and measure first.`);
|
|
2553
|
+
return;
|
|
2554
|
+
}
|
|
2555
|
+
if (isJson) {
|
|
2556
|
+
console.log(JSON.stringify({ experiment: exp.slug, comparisons }, null, 2));
|
|
2557
|
+
return;
|
|
2558
|
+
}
|
|
2559
|
+
header(`Metric Comparison \u2014 ${exp.slug}`);
|
|
2560
|
+
const regressions = comparisons.filter((c) => c.regression);
|
|
2561
|
+
const rows = comparisons.map((c) => [
|
|
2562
|
+
c.fixture,
|
|
2563
|
+
c.metric,
|
|
2564
|
+
String(c.before),
|
|
2565
|
+
String(c.after),
|
|
2566
|
+
formatDelta(c.delta),
|
|
2567
|
+
c.regression ? red("REGRESSION") : green("OK")
|
|
2568
|
+
]);
|
|
2569
|
+
console.log(table(["Fixture", "Metric", "Before", "After", "Delta", "Status"], rows));
|
|
2570
|
+
if (regressions.length > 0) {
|
|
2571
|
+
console.log();
|
|
2572
|
+
warn(`${regressions.length} regression(s) detected!`);
|
|
2573
|
+
} else {
|
|
2574
|
+
console.log();
|
|
2575
|
+
success("No regressions detected.");
|
|
2432
2576
|
}
|
|
2433
2577
|
}
|
|
2434
|
-
|
|
2435
|
-
|
|
2436
|
-
|
|
2578
|
+
function formatDelta(delta) {
|
|
2579
|
+
const prefix = delta > 0 ? "+" : "";
|
|
2580
|
+
return `${prefix}${delta.toFixed(4)}`;
|
|
2581
|
+
}
|
|
2582
|
+
var import_node_child_process;
|
|
2583
|
+
var init_measure = __esm({
|
|
2584
|
+
"src/commands/measure.ts"() {
|
|
2437
2585
|
"use strict";
|
|
2438
|
-
|
|
2439
|
-
|
|
2440
|
-
|
|
2441
|
-
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
}`;
|
|
2445
|
-
ROLE_REQUIRED_FIELDS = {
|
|
2446
|
-
builder: ["decisions"],
|
|
2447
|
-
critic: ["doubts"],
|
|
2448
|
-
adversary: ["challenges"],
|
|
2449
|
-
verifier: ["grades"],
|
|
2450
|
-
gatekeeper: ["gate_decision"],
|
|
2451
|
-
reframer: ["reframe"],
|
|
2452
|
-
scout: ["findings"],
|
|
2453
|
-
compressor: ["compression_report"]
|
|
2454
|
-
};
|
|
2586
|
+
import_node_child_process = require("child_process");
|
|
2587
|
+
init_connection();
|
|
2588
|
+
init_queries();
|
|
2589
|
+
init_metrics();
|
|
2590
|
+
init_config();
|
|
2591
|
+
init_format();
|
|
2455
2592
|
}
|
|
2456
2593
|
});
|
|
2457
2594
|
|
|
2458
|
-
// src/
|
|
2459
|
-
|
|
2460
|
-
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2595
|
+
// src/commands/experiment.ts
|
|
2596
|
+
var experiment_exports = {};
|
|
2597
|
+
__export(experiment_exports, {
|
|
2598
|
+
newExperiment: () => newExperiment,
|
|
2599
|
+
revert: () => revert
|
|
2600
|
+
});
|
|
2601
|
+
async function newExperiment(args) {
|
|
2602
|
+
const root = findProjectRoot();
|
|
2603
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2604
|
+
const hypothesis = args.filter((a) => !a.startsWith("--")).join(" ");
|
|
2605
|
+
if (!hypothesis) {
|
|
2606
|
+
throw new Error('Usage: majlis new "hypothesis"');
|
|
2467
2607
|
}
|
|
2468
|
-
const
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2608
|
+
const db = getDb(root);
|
|
2609
|
+
const config = loadConfig(root);
|
|
2610
|
+
const slug = getFlagValue(args, "--slug") ?? await generateSlug(hypothesis, root);
|
|
2611
|
+
if (getExperimentBySlug(db, slug)) {
|
|
2612
|
+
throw new Error(`Experiment with slug "${slug}" already exists.`);
|
|
2472
2613
|
}
|
|
2473
|
-
|
|
2474
|
-
const
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
`[majlis] FAILED to extract structured data from ${role} output. State machine will continue but data is missing. Manual review required.`
|
|
2478
|
-
);
|
|
2479
|
-
return null;
|
|
2480
|
-
}
|
|
2481
|
-
function extractMajlisJsonBlock(markdown) {
|
|
2482
|
-
const match = markdown.match(/<!--\s*majlis-json\s*\n([\s\S]*?)-->/);
|
|
2483
|
-
if (!match) return null;
|
|
2484
|
-
return match[1].trim();
|
|
2485
|
-
}
|
|
2486
|
-
function tryParseJson(jsonStr) {
|
|
2614
|
+
const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
|
|
2615
|
+
const num = allExps.count + 1;
|
|
2616
|
+
const paddedNum = String(num).padStart(3, "0");
|
|
2617
|
+
const branch = `exp/${paddedNum}-${slug}`;
|
|
2487
2618
|
try {
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
}
|
|
2493
|
-
function extractViaPatterns(role, markdown) {
|
|
2494
|
-
const result = {};
|
|
2495
|
-
const decisionPattern = /\[(?:decision|Decision)\].*?(?:description|Description):\s*(.+?)(?:\n|$).*?(?:evidence.?level|Evidence.?Level|level):\s*(proof|test|strong_consensus|consensus|analogy|judgment).*?(?:justification|Justification):\s*(.+?)(?:\n|$)/gis;
|
|
2496
|
-
const decisions = [];
|
|
2497
|
-
const evidenceMarkers = /(?:^|\n)\s*[-*]\s*\*?\*?(?:Decision|DECISION)\*?\*?:\s*(.+?)(?:\n|$).*?(?:Evidence|EVIDENCE|Level):\s*(proof|test|strong_consensus|consensus|analogy|judgment)/gim;
|
|
2498
|
-
let match;
|
|
2499
|
-
while ((match = evidenceMarkers.exec(markdown)) !== null) {
|
|
2500
|
-
decisions.push({
|
|
2501
|
-
description: match[1].trim(),
|
|
2502
|
-
evidence_level: match[2].toLowerCase().trim(),
|
|
2503
|
-
justification: "Extracted via regex \u2014 review"
|
|
2619
|
+
(0, import_node_child_process2.execSync)(`git checkout -b ${branch}`, {
|
|
2620
|
+
cwd: root,
|
|
2621
|
+
encoding: "utf-8",
|
|
2622
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2504
2623
|
});
|
|
2624
|
+
info(`Created branch: ${branch}`);
|
|
2625
|
+
} catch (err) {
|
|
2626
|
+
warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
|
|
2505
2627
|
}
|
|
2506
|
-
const
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2628
|
+
const subType = getFlagValue(args, "--sub-type") ?? null;
|
|
2629
|
+
const exp = createExperiment(db, slug, branch, hypothesis, subType, null);
|
|
2630
|
+
success(`Created experiment #${exp.id}: ${exp.slug}`);
|
|
2631
|
+
const docsDir = path5.join(root, "docs", "experiments");
|
|
2632
|
+
const templatePath = path5.join(docsDir, "_TEMPLATE.md");
|
|
2633
|
+
if (fs5.existsSync(templatePath)) {
|
|
2634
|
+
const template = fs5.readFileSync(templatePath, "utf-8");
|
|
2635
|
+
const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, subType ?? "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
|
|
2636
|
+
const logPath = path5.join(docsDir, `${paddedNum}-${slug}.md`);
|
|
2637
|
+
fs5.writeFileSync(logPath, logContent);
|
|
2638
|
+
info(`Created experiment log: docs/experiments/${paddedNum}-${slug}.md`);
|
|
2639
|
+
}
|
|
2640
|
+
if (config.cycle.auto_baseline_on_new_experiment && config.metrics.command) {
|
|
2641
|
+
info("Auto-baselining... (run `majlis baseline` to do this manually)");
|
|
2642
|
+
try {
|
|
2643
|
+
const { baseline: baseline2 } = await Promise.resolve().then(() => (init_measure(), measure_exports));
|
|
2644
|
+
await baseline2(["--experiment", String(exp.id)]);
|
|
2645
|
+
} catch (err) {
|
|
2646
|
+
warn("Auto-baseline failed \u2014 run `majlis baseline` manually.");
|
|
2515
2647
|
}
|
|
2516
2648
|
}
|
|
2517
|
-
|
|
2518
|
-
|
|
2519
|
-
const
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2649
|
+
}
|
|
2650
|
+
async function revert(args) {
|
|
2651
|
+
const root = findProjectRoot();
|
|
2652
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2653
|
+
const db = getDb(root);
|
|
2654
|
+
let exp;
|
|
2655
|
+
const slugArg = args.filter((a) => !a.startsWith("--"))[0];
|
|
2656
|
+
if (slugArg) {
|
|
2657
|
+
exp = getExperimentBySlug(db, slugArg);
|
|
2658
|
+
if (!exp) throw new Error(`Experiment not found: ${slugArg}`);
|
|
2659
|
+
} else {
|
|
2660
|
+
exp = getLatestExperiment(db);
|
|
2661
|
+
if (!exp) throw new Error("No active experiments to revert.");
|
|
2525
2662
|
}
|
|
2526
|
-
const
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2663
|
+
const reason = getFlagValue(args, "--reason") ?? "Manually reverted";
|
|
2664
|
+
const category = args.includes("--structural") ? "structural" : "procedural";
|
|
2665
|
+
insertDeadEnd(
|
|
2666
|
+
db,
|
|
2667
|
+
exp.id,
|
|
2668
|
+
exp.hypothesis ?? exp.slug,
|
|
2669
|
+
reason,
|
|
2670
|
+
`Reverted: ${reason}`,
|
|
2671
|
+
exp.sub_type,
|
|
2672
|
+
category
|
|
2673
|
+
);
|
|
2674
|
+
updateExperimentStatus(db, exp.id, "dead_end");
|
|
2675
|
+
try {
|
|
2676
|
+
const currentBranch = (0, import_node_child_process2.execSync)("git rev-parse --abbrev-ref HEAD", {
|
|
2677
|
+
cwd: root,
|
|
2678
|
+
encoding: "utf-8"
|
|
2679
|
+
}).trim();
|
|
2680
|
+
if (currentBranch === exp.branch) {
|
|
2681
|
+
(0, import_node_child_process2.execSync)("git checkout main 2>/dev/null || git checkout master", {
|
|
2682
|
+
cwd: root,
|
|
2683
|
+
encoding: "utf-8",
|
|
2684
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2533
2685
|
});
|
|
2534
2686
|
}
|
|
2687
|
+
} catch {
|
|
2688
|
+
warn("Could not switch git branches \u2014 do this manually.");
|
|
2535
2689
|
}
|
|
2536
|
-
|
|
2537
|
-
const doubts = [];
|
|
2538
|
-
const doubtPattern = /(?:Doubt|DOUBT|Claim doubted|CLAIM)\s*(?:\d+)?[:.]?\s*(.+?)(?:\n|$)[\s\S]*?(?:Severity|SEVERITY)\s*[:=]\s*(minor|moderate|critical)/gim;
|
|
2539
|
-
while ((match = doubtPattern.exec(markdown)) !== null) {
|
|
2540
|
-
doubts.push({
|
|
2541
|
-
claim_doubted: match[1].trim(),
|
|
2542
|
-
evidence_level_of_claim: "unknown",
|
|
2543
|
-
// Don't fabricate — mark as unknown for review
|
|
2544
|
-
evidence_for_doubt: "Extracted via regex \u2014 review original document",
|
|
2545
|
-
severity: match[2].toLowerCase().trim()
|
|
2546
|
-
});
|
|
2547
|
-
}
|
|
2548
|
-
if (doubts.length > 0) result.doubts = doubts;
|
|
2549
|
-
return result;
|
|
2690
|
+
info(`Experiment ${exp.slug} reverted to dead-end. Reason: ${reason}`);
|
|
2550
2691
|
}
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
|
|
2555
|
-
|
|
2692
|
+
var fs5, path5, import_node_child_process2;
|
|
2693
|
+
var init_experiment = __esm({
|
|
2694
|
+
"src/commands/experiment.ts"() {
|
|
2695
|
+
"use strict";
|
|
2696
|
+
fs5 = __toESM(require("fs"));
|
|
2697
|
+
path5 = __toESM(require("path"));
|
|
2698
|
+
import_node_child_process2 = require("child_process");
|
|
2699
|
+
init_connection();
|
|
2700
|
+
init_queries();
|
|
2701
|
+
init_config();
|
|
2702
|
+
init_spawn();
|
|
2703
|
+
init_format();
|
|
2704
|
+
}
|
|
2705
|
+
});
|
|
2556
2706
|
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
|
|
2578
|
-
|
|
2579
|
-
|
|
2707
|
+
// src/commands/session.ts
|
|
2708
|
+
var session_exports = {};
|
|
2709
|
+
__export(session_exports, {
|
|
2710
|
+
session: () => session
|
|
2711
|
+
});
|
|
2712
|
+
async function session(args) {
|
|
2713
|
+
const subcommand = args[0];
|
|
2714
|
+
if (!subcommand || subcommand !== "start" && subcommand !== "end") {
|
|
2715
|
+
throw new Error('Usage: majlis session start "intent" | majlis session end');
|
|
2716
|
+
}
|
|
2717
|
+
const root = findProjectRoot();
|
|
2718
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2719
|
+
const db = getDb(root);
|
|
2720
|
+
if (subcommand === "start") {
|
|
2721
|
+
const intent = args.slice(1).filter((a) => !a.startsWith("--")).join(" ");
|
|
2722
|
+
if (!intent) {
|
|
2723
|
+
throw new Error('Usage: majlis session start "intent"');
|
|
2724
|
+
}
|
|
2725
|
+
const existing = getActiveSession(db);
|
|
2726
|
+
if (existing) {
|
|
2727
|
+
warn(`Session already active: "${existing.intent}" (started ${existing.started_at})`);
|
|
2728
|
+
warn("End it first with `majlis session end`.");
|
|
2729
|
+
return;
|
|
2730
|
+
}
|
|
2731
|
+
const latestExp = getLatestExperiment(db);
|
|
2732
|
+
const sess = startSession(db, intent, latestExp?.id ?? null);
|
|
2733
|
+
success(`Session started: "${intent}" (id: ${sess.id})`);
|
|
2734
|
+
if (latestExp) {
|
|
2735
|
+
info(`Linked to experiment: ${latestExp.slug} (${latestExp.status})`);
|
|
2736
|
+
}
|
|
2737
|
+
} else {
|
|
2738
|
+
const active = getActiveSession(db);
|
|
2739
|
+
if (!active) {
|
|
2740
|
+
throw new Error("No active session to end.");
|
|
2580
2741
|
}
|
|
2581
|
-
|
|
2582
|
-
|
|
2583
|
-
|
|
2584
|
-
|
|
2742
|
+
const accomplished = getFlagValue(args, "--accomplished") ?? null;
|
|
2743
|
+
const unfinished = getFlagValue(args, "--unfinished") ?? null;
|
|
2744
|
+
const fragility = getFlagValue(args, "--fragility") ?? null;
|
|
2745
|
+
endSession(db, active.id, accomplished, unfinished, fragility);
|
|
2746
|
+
success(`Session ended: "${active.intent}"`);
|
|
2747
|
+
if (accomplished) info(`Accomplished: ${accomplished}`);
|
|
2748
|
+
if (unfinished) info(`Unfinished: ${unfinished}`);
|
|
2749
|
+
if (fragility) warn(`New fragility: ${fragility}`);
|
|
2585
2750
|
}
|
|
2586
2751
|
}
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
}
|
|
2590
|
-
function validateForRole(role, output) {
|
|
2591
|
-
const required = ROLE_REQUIRED_FIELDS[role];
|
|
2592
|
-
if (!required) return { valid: true, missing: [] };
|
|
2593
|
-
const missing = required.filter((field) => {
|
|
2594
|
-
const value = output[field];
|
|
2595
|
-
if (value === void 0 || value === null) return true;
|
|
2596
|
-
if (Array.isArray(value) && value.length === 0) return true;
|
|
2597
|
-
return false;
|
|
2598
|
-
});
|
|
2599
|
-
return { valid: missing.length === 0, missing };
|
|
2600
|
-
}
|
|
2601
|
-
var import_claude_agent_sdk;
|
|
2602
|
-
var init_parse = __esm({
|
|
2603
|
-
"src/agents/parse.ts"() {
|
|
2752
|
+
var init_session = __esm({
|
|
2753
|
+
"src/commands/session.ts"() {
|
|
2604
2754
|
"use strict";
|
|
2605
|
-
|
|
2606
|
-
|
|
2755
|
+
init_connection();
|
|
2756
|
+
init_queries();
|
|
2757
|
+
init_config();
|
|
2758
|
+
init_format();
|
|
2607
2759
|
}
|
|
2608
2760
|
});
|
|
2609
2761
|
|
|
2610
|
-
// src/
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
|
|
2618
|
-
const
|
|
2619
|
-
|
|
2620
|
-
|
|
2762
|
+
// src/commands/query.ts
|
|
2763
|
+
var query_exports = {};
|
|
2764
|
+
__export(query_exports, {
|
|
2765
|
+
query: () => query3
|
|
2766
|
+
});
|
|
2767
|
+
async function query3(command, args, isJson) {
|
|
2768
|
+
const root = findProjectRoot();
|
|
2769
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2770
|
+
const db = getDb(root);
|
|
2771
|
+
switch (command) {
|
|
2772
|
+
case "decisions":
|
|
2773
|
+
return queryDecisions(db, args, isJson);
|
|
2774
|
+
case "dead-ends":
|
|
2775
|
+
return queryDeadEnds(db, args, isJson);
|
|
2776
|
+
case "fragility":
|
|
2777
|
+
return queryFragility(root, isJson);
|
|
2778
|
+
case "history":
|
|
2779
|
+
return queryHistory(db, args, isJson);
|
|
2780
|
+
case "circuit-breakers":
|
|
2781
|
+
return queryCircuitBreakers(db, root, isJson);
|
|
2782
|
+
case "check-commit":
|
|
2783
|
+
return checkCommit(db);
|
|
2621
2784
|
}
|
|
2622
|
-
const frontmatter = frontmatterMatch[1];
|
|
2623
|
-
const body = frontmatterMatch[2].trim();
|
|
2624
|
-
const name = extractYamlField(frontmatter, "name") ?? role;
|
|
2625
|
-
const model = extractYamlField(frontmatter, "model") ?? "opus";
|
|
2626
|
-
const toolsStr = extractYamlField(frontmatter, "tools") ?? "[]";
|
|
2627
|
-
const tools = toolsStr.replace(/[\[\]]/g, "").split(",").map((t) => t.trim()).filter(Boolean);
|
|
2628
|
-
return { name, model, tools, systemPrompt: body };
|
|
2629
|
-
}
|
|
2630
|
-
function extractYamlField(yaml, field) {
|
|
2631
|
-
const match = yaml.match(new RegExp(`^${field}:\\s*(.+)$`, "m"));
|
|
2632
|
-
return match ? match[1].trim() : null;
|
|
2633
2785
|
}
|
|
2634
|
-
|
|
2635
|
-
const
|
|
2636
|
-
const
|
|
2637
|
-
const
|
|
2638
|
-
const
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
${contextJson}
|
|
2643
|
-
\`\`\`
|
|
2644
|
-
|
|
2645
|
-
${taskPrompt}`;
|
|
2646
|
-
const turns = ROLE_MAX_TURNS[role] ?? 15;
|
|
2647
|
-
console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
|
|
2648
|
-
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2649
|
-
prompt,
|
|
2650
|
-
model: agentDef.model,
|
|
2651
|
-
tools: agentDef.tools,
|
|
2652
|
-
systemPrompt: agentDef.systemPrompt,
|
|
2653
|
-
cwd: root,
|
|
2654
|
-
maxTurns: turns,
|
|
2655
|
-
label: role
|
|
2656
|
-
});
|
|
2657
|
-
console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
|
|
2658
|
-
const artifactPath = writeArtifact(role, context, markdown, root);
|
|
2659
|
-
if (artifactPath) {
|
|
2660
|
-
console.log(`[${role}] Artifact written to ${artifactPath}`);
|
|
2786
|
+
function queryDecisions(db, args, isJson) {
|
|
2787
|
+
const level = getFlagValue(args, "--level");
|
|
2788
|
+
const expIdStr = getFlagValue(args, "--experiment");
|
|
2789
|
+
const experimentId = expIdStr !== void 0 ? Number(expIdStr) : void 0;
|
|
2790
|
+
const decisions = listAllDecisions(db, level, experimentId);
|
|
2791
|
+
if (isJson) {
|
|
2792
|
+
console.log(JSON.stringify(decisions, null, 2));
|
|
2793
|
+
return;
|
|
2661
2794
|
}
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
if (!valid) {
|
|
2666
|
-
console.warn(`[${role}] Output missing expected fields: ${missing.join(", ")}`);
|
|
2667
|
-
}
|
|
2795
|
+
if (decisions.length === 0) {
|
|
2796
|
+
info("No decisions found.");
|
|
2797
|
+
return;
|
|
2668
2798
|
}
|
|
2669
|
-
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
cwd: root,
|
|
2690
|
-
maxTurns: 5,
|
|
2691
|
-
label: "synthesiser"
|
|
2692
|
-
});
|
|
2693
|
-
console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
2694
|
-
return { output: markdown, structured: { guidance: markdown }, truncated };
|
|
2695
|
-
}
|
|
2696
|
-
async function spawnRecovery(role, partialOutput, context, projectRoot) {
|
|
2697
|
-
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2698
|
-
const expSlug = context.experiment?.slug ?? "unknown";
|
|
2699
|
-
console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
|
|
2700
|
-
const expDocPath = path6.join(
|
|
2701
|
-
root,
|
|
2702
|
-
"docs",
|
|
2703
|
-
"experiments",
|
|
2704
|
-
`${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
|
|
2705
|
-
);
|
|
2706
|
-
const templatePath = path6.join(root, "docs", "experiments", "_TEMPLATE.md");
|
|
2707
|
-
const template = fs6.existsSync(templatePath) ? fs6.readFileSync(templatePath, "utf-8") : "";
|
|
2708
|
-
const currentDoc = fs6.existsSync(expDocPath) ? fs6.readFileSync(expDocPath, "utf-8") : "";
|
|
2709
|
-
const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
|
|
2710
|
-
|
|
2711
|
-
Here is the partial agent output (reasoning + tool calls):
|
|
2712
|
-
<partial_output>
|
|
2713
|
-
${partialOutput.slice(-3e3)}
|
|
2714
|
-
</partial_output>
|
|
2715
|
-
|
|
2716
|
-
Here is the current experiment doc:
|
|
2717
|
-
<current_doc>
|
|
2718
|
-
${currentDoc}
|
|
2719
|
-
</current_doc>
|
|
2720
|
-
|
|
2721
|
-
Here is the template that the experiment doc should follow:
|
|
2722
|
-
<template>
|
|
2723
|
-
${template}
|
|
2724
|
-
</template>
|
|
2725
|
-
|
|
2726
|
-
Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
|
|
2727
|
-
- Keep any valid content from the current doc
|
|
2728
|
-
- Fill in what you can infer from the partial output
|
|
2729
|
-
- Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
|
|
2730
|
-
- The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
|
|
2731
|
-
- Do NOT include agent reasoning or thinking \u2014 only structured experiment content
|
|
2732
|
-
- Be concise. This is cleanup, not new work.`;
|
|
2733
|
-
const { text: _markdown } = await runQuery({
|
|
2734
|
-
prompt,
|
|
2735
|
-
model: "haiku",
|
|
2736
|
-
tools: ["Read", "Write"],
|
|
2737
|
-
systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
|
|
2738
|
-
cwd: root,
|
|
2739
|
-
maxTurns: 5,
|
|
2740
|
-
label: "recovery"
|
|
2741
|
-
});
|
|
2742
|
-
console.log(`[recovery] Cleanup complete for ${expSlug}.`);
|
|
2743
|
-
}
|
|
2744
|
-
async function runQuery(opts) {
|
|
2745
|
-
let truncated = false;
|
|
2746
|
-
const tag = opts.label ?? "majlis";
|
|
2747
|
-
const conversation = (0, import_claude_agent_sdk2.query)({
|
|
2748
|
-
prompt: opts.prompt,
|
|
2749
|
-
options: {
|
|
2750
|
-
model: opts.model,
|
|
2751
|
-
tools: opts.tools,
|
|
2752
|
-
systemPrompt: {
|
|
2753
|
-
type: "preset",
|
|
2754
|
-
preset: "claude_code",
|
|
2755
|
-
append: opts.systemPrompt
|
|
2756
|
-
},
|
|
2757
|
-
cwd: opts.cwd,
|
|
2758
|
-
permissionMode: "bypassPermissions",
|
|
2759
|
-
allowDangerouslySkipPermissions: true,
|
|
2760
|
-
maxTurns: opts.maxTurns ?? 15,
|
|
2761
|
-
persistSession: false,
|
|
2762
|
-
settingSources: ["project"]
|
|
2763
|
-
}
|
|
2764
|
-
});
|
|
2765
|
-
const textParts = [];
|
|
2766
|
-
let costUsd = 0;
|
|
2767
|
-
let turnCount = 0;
|
|
2768
|
-
for await (const message of conversation) {
|
|
2769
|
-
if (message.type === "assistant") {
|
|
2770
|
-
turnCount++;
|
|
2771
|
-
let hasText = false;
|
|
2772
|
-
for (const block of message.message.content) {
|
|
2773
|
-
if (block.type === "text") {
|
|
2774
|
-
textParts.push(block.text);
|
|
2775
|
-
hasText = true;
|
|
2776
|
-
} else if (block.type === "tool_use") {
|
|
2777
|
-
const toolName = block.name ?? "tool";
|
|
2778
|
-
const input = block.input ?? {};
|
|
2779
|
-
const detail = formatToolDetail(toolName, input);
|
|
2780
|
-
process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
|
|
2781
|
-
`);
|
|
2782
|
-
}
|
|
2783
|
-
}
|
|
2784
|
-
if (hasText) {
|
|
2785
|
-
const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
|
|
2786
|
-
if (preview) {
|
|
2787
|
-
process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
|
|
2788
|
-
`);
|
|
2789
|
-
}
|
|
2790
|
-
}
|
|
2791
|
-
} else if (message.type === "tool_progress") {
|
|
2792
|
-
const elapsed = Math.round(message.elapsed_time_seconds);
|
|
2793
|
-
if (elapsed > 0 && elapsed % 5 === 0) {
|
|
2794
|
-
process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
|
|
2795
|
-
`);
|
|
2796
|
-
}
|
|
2797
|
-
} else if (message.type === "result") {
|
|
2798
|
-
if (message.subtype === "success") {
|
|
2799
|
-
costUsd = message.total_cost_usd;
|
|
2800
|
-
} else if (message.subtype === "error_max_turns") {
|
|
2801
|
-
truncated = true;
|
|
2802
|
-
costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
|
|
2803
|
-
console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
|
|
2804
|
-
} else {
|
|
2805
|
-
const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
|
|
2806
|
-
throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
|
|
2807
|
-
}
|
|
2808
|
-
}
|
|
2799
|
+
header("Decisions");
|
|
2800
|
+
const rows = decisions.map((d) => [
|
|
2801
|
+
String(d.id),
|
|
2802
|
+
String(d.experiment_id),
|
|
2803
|
+
evidenceColor(d.evidence_level),
|
|
2804
|
+
d.description.slice(0, 60) + (d.description.length > 60 ? "..." : ""),
|
|
2805
|
+
d.status
|
|
2806
|
+
]);
|
|
2807
|
+
console.log(table(["ID", "Exp", "Level", "Description", "Status"], rows));
|
|
2808
|
+
}
|
|
2809
|
+
function queryDeadEnds(db, args, isJson) {
|
|
2810
|
+
const subType = getFlagValue(args, "--sub-type");
|
|
2811
|
+
const searchTerm = getFlagValue(args, "--search");
|
|
2812
|
+
let deadEnds;
|
|
2813
|
+
if (subType) {
|
|
2814
|
+
deadEnds = listDeadEndsBySubType(db, subType);
|
|
2815
|
+
} else if (searchTerm) {
|
|
2816
|
+
deadEnds = searchDeadEnds(db, searchTerm);
|
|
2817
|
+
} else {
|
|
2818
|
+
deadEnds = listAllDeadEnds(db);
|
|
2809
2819
|
}
|
|
2810
|
-
|
|
2820
|
+
if (isJson) {
|
|
2821
|
+
console.log(JSON.stringify(deadEnds, null, 2));
|
|
2822
|
+
return;
|
|
2823
|
+
}
|
|
2824
|
+
if (deadEnds.length === 0) {
|
|
2825
|
+
info("No dead-ends recorded.");
|
|
2826
|
+
return;
|
|
2827
|
+
}
|
|
2828
|
+
header("Dead-End Registry");
|
|
2829
|
+
const rows = deadEnds.map((d) => [
|
|
2830
|
+
String(d.id),
|
|
2831
|
+
d.sub_type ?? "\u2014",
|
|
2832
|
+
d.approach.slice(0, 40) + (d.approach.length > 40 ? "..." : ""),
|
|
2833
|
+
d.structural_constraint.slice(0, 40) + (d.structural_constraint.length > 40 ? "..." : "")
|
|
2834
|
+
]);
|
|
2835
|
+
console.log(table(["ID", "Sub-Type", "Approach", "Constraint"], rows));
|
|
2811
2836
|
}
|
|
2812
|
-
function
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
|
|
2822
|
-
case "Grep":
|
|
2823
|
-
return input.pattern ? ` /${input.pattern}/` : "";
|
|
2824
|
-
case "Bash":
|
|
2825
|
-
return input.command ? ` $ ${input.command.slice(0, 80)}` : "";
|
|
2826
|
-
case "WebSearch":
|
|
2827
|
-
return input.query ? ` "${input.query}"` : "";
|
|
2828
|
-
default:
|
|
2829
|
-
return "";
|
|
2837
|
+
function queryFragility(root, isJson) {
|
|
2838
|
+
const fragPath = path6.join(root, "docs", "synthesis", "fragility.md");
|
|
2839
|
+
if (!fs6.existsSync(fragPath)) {
|
|
2840
|
+
info("No fragility map found.");
|
|
2841
|
+
return;
|
|
2842
|
+
}
|
|
2843
|
+
const content = fs6.readFileSync(fragPath, "utf-8");
|
|
2844
|
+
if (isJson) {
|
|
2845
|
+
console.log(JSON.stringify({ content }, null, 2));
|
|
2846
|
+
return;
|
|
2830
2847
|
}
|
|
2848
|
+
header("Fragility Map");
|
|
2849
|
+
console.log(content);
|
|
2831
2850
|
}
|
|
2832
|
-
function
|
|
2833
|
-
const
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
adversary: "docs/challenges",
|
|
2837
|
-
verifier: "docs/verification",
|
|
2838
|
-
reframer: "docs/reframes",
|
|
2839
|
-
compressor: "docs/synthesis",
|
|
2840
|
-
scout: "docs/rihla"
|
|
2841
|
-
};
|
|
2842
|
-
const dir = dirMap[role];
|
|
2843
|
-
if (!dir) return null;
|
|
2844
|
-
if (role === "builder" || role === "compressor") return null;
|
|
2845
|
-
const fullDir = path6.join(projectRoot, dir);
|
|
2846
|
-
if (!fs6.existsSync(fullDir)) {
|
|
2847
|
-
fs6.mkdirSync(fullDir, { recursive: true });
|
|
2851
|
+
function queryHistory(db, args, isJson) {
|
|
2852
|
+
const fixture = args.filter((a) => !a.startsWith("--"))[0];
|
|
2853
|
+
if (!fixture) {
|
|
2854
|
+
throw new Error("Usage: majlis history <fixture>");
|
|
2848
2855
|
}
|
|
2849
|
-
const
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2856
|
+
const history = getMetricHistoryByFixture(db, fixture);
|
|
2857
|
+
if (isJson) {
|
|
2858
|
+
console.log(JSON.stringify(history, null, 2));
|
|
2859
|
+
return;
|
|
2860
|
+
}
|
|
2861
|
+
if (history.length === 0) {
|
|
2862
|
+
info(`No metric history for fixture: ${fixture}`);
|
|
2863
|
+
return;
|
|
2864
|
+
}
|
|
2865
|
+
header(`Metric History \u2014 ${fixture}`);
|
|
2866
|
+
const rows = history.map((h) => [
|
|
2867
|
+
String(h.experiment_id),
|
|
2868
|
+
h.experiment_slug ?? "\u2014",
|
|
2869
|
+
h.phase,
|
|
2870
|
+
h.metric_name,
|
|
2871
|
+
String(h.metric_value),
|
|
2872
|
+
h.captured_at
|
|
2873
|
+
]);
|
|
2874
|
+
console.log(table(["Exp", "Slug", "Phase", "Metric", "Value", "Captured"], rows));
|
|
2855
2875
|
}
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2876
|
+
function queryCircuitBreakers(db, root, isJson) {
|
|
2877
|
+
const config = loadConfig(root);
|
|
2878
|
+
const states = getAllCircuitBreakerStates(db, config.cycle.circuit_breaker_threshold);
|
|
2879
|
+
if (isJson) {
|
|
2880
|
+
console.log(JSON.stringify(states, null, 2));
|
|
2881
|
+
return;
|
|
2882
|
+
}
|
|
2883
|
+
if (states.length === 0) {
|
|
2884
|
+
info("No circuit breaker data.");
|
|
2885
|
+
return;
|
|
2886
|
+
}
|
|
2887
|
+
header("Circuit Breakers");
|
|
2888
|
+
const rows = states.map((s) => [
|
|
2889
|
+
s.sub_type,
|
|
2890
|
+
String(s.failure_count),
|
|
2891
|
+
String(config.cycle.circuit_breaker_threshold),
|
|
2892
|
+
s.tripped ? red("TRIPPED") : green("OK")
|
|
2893
|
+
]);
|
|
2894
|
+
console.log(table(["Sub-Type", "Failures", "Threshold", "Status"], rows));
|
|
2895
|
+
}
|
|
2896
|
+
function checkCommit(db) {
|
|
2897
|
+
let stdinData = "";
|
|
2898
|
+
try {
|
|
2899
|
+
stdinData = fs6.readFileSync(0, "utf-8");
|
|
2900
|
+
} catch {
|
|
2901
|
+
}
|
|
2902
|
+
if (stdinData) {
|
|
2903
|
+
try {
|
|
2904
|
+
const hookInput = JSON.parse(stdinData);
|
|
2905
|
+
const command = hookInput?.tool_input?.command ?? "";
|
|
2906
|
+
if (!command.includes("git commit")) {
|
|
2907
|
+
return;
|
|
2908
|
+
}
|
|
2909
|
+
} catch {
|
|
2910
|
+
}
|
|
2911
|
+
}
|
|
2912
|
+
const active = listActiveExperiments(db);
|
|
2913
|
+
const unverified = active.filter(
|
|
2914
|
+
(e) => !["merged", "dead_end", "verified", "resolved", "compressed"].includes(e.status)
|
|
2915
|
+
);
|
|
2916
|
+
if (unverified.length > 0) {
|
|
2917
|
+
console.error(`[majlis] ${unverified.length} unverified experiment(s):`);
|
|
2918
|
+
for (const e of unverified) {
|
|
2919
|
+
console.error(` - ${e.slug} (${e.status})`);
|
|
2920
|
+
}
|
|
2921
|
+
process.exit(1);
|
|
2922
|
+
}
|
|
2923
|
+
}
|
|
2924
|
+
var fs6, path6;
|
|
2925
|
+
var init_query = __esm({
|
|
2926
|
+
"src/commands/query.ts"() {
|
|
2859
2927
|
"use strict";
|
|
2860
2928
|
fs6 = __toESM(require("fs"));
|
|
2861
2929
|
path6 = __toESM(require("path"));
|
|
2862
|
-
import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
|
|
2863
|
-
init_parse();
|
|
2864
2930
|
init_connection();
|
|
2865
|
-
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
|
|
2869
|
-
|
|
2870
|
-
|
|
2871
|
-
|
|
2872
|
-
|
|
2873
|
-
|
|
2931
|
+
init_queries();
|
|
2932
|
+
init_config();
|
|
2933
|
+
init_format();
|
|
2934
|
+
}
|
|
2935
|
+
});
|
|
2936
|
+
|
|
2937
|
+
// src/state/types.ts
|
|
2938
|
+
var TRANSITIONS, GRADE_ORDER;
|
|
2939
|
+
var init_types2 = __esm({
|
|
2940
|
+
"src/state/types.ts"() {
|
|
2941
|
+
"use strict";
|
|
2942
|
+
TRANSITIONS = {
|
|
2943
|
+
["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "gated" /* GATED */],
|
|
2944
|
+
["reframed" /* REFRAMED */]: ["gated" /* GATED */],
|
|
2945
|
+
["gated" /* GATED */]: ["building" /* BUILDING */, "gated" /* GATED */],
|
|
2946
|
+
// self-loop for rejected hypotheses
|
|
2947
|
+
["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
|
|
2948
|
+
// self-loop for retry after truncation
|
|
2949
|
+
["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
|
|
2950
|
+
["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
|
|
2951
|
+
["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
|
|
2952
|
+
["scouted" /* SCOUTED */]: ["verifying" /* VERIFYING */],
|
|
2953
|
+
["verifying" /* VERIFYING */]: ["verified" /* VERIFIED */],
|
|
2954
|
+
["verified" /* VERIFIED */]: ["resolved" /* RESOLVED */],
|
|
2955
|
+
["resolved" /* RESOLVED */]: ["compressed" /* COMPRESSED */, "building" /* BUILDING */],
|
|
2956
|
+
// cycle-back skips gate
|
|
2957
|
+
["compressed" /* COMPRESSED */]: ["merged" /* MERGED */, "building" /* BUILDING */],
|
|
2958
|
+
// cycle-back skips gate
|
|
2959
|
+
["merged" /* MERGED */]: [],
|
|
2960
|
+
["dead_end" /* DEAD_END */]: []
|
|
2874
2961
|
};
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2962
|
+
GRADE_ORDER = ["rejected", "weak", "good", "sound"];
|
|
2963
|
+
}
|
|
2964
|
+
});
|
|
2965
|
+
|
|
2966
|
+
// src/state/machine.ts
|
|
2967
|
+
function transition(current, target) {
|
|
2968
|
+
const valid = TRANSITIONS[current];
|
|
2969
|
+
if (!valid.includes(target)) {
|
|
2970
|
+
throw new Error(
|
|
2971
|
+
`Invalid transition: ${current} \u2192 ${target}. Valid: [${valid.join(", ")}]`
|
|
2972
|
+
);
|
|
2973
|
+
}
|
|
2974
|
+
return target;
|
|
2975
|
+
}
|
|
2976
|
+
function validNext(current) {
|
|
2977
|
+
return TRANSITIONS[current];
|
|
2978
|
+
}
|
|
2979
|
+
function isTerminal(status2) {
|
|
2980
|
+
return TRANSITIONS[status2].length === 0;
|
|
2981
|
+
}
|
|
2982
|
+
function determineNextStep(exp, valid, hasDoubts2, hasChallenges2) {
|
|
2983
|
+
if (valid.length === 0) {
|
|
2984
|
+
throw new Error(`Experiment ${exp.slug} is terminal (${exp.status})`);
|
|
2985
|
+
}
|
|
2986
|
+
const status2 = exp.status;
|
|
2987
|
+
if (status2 === "classified" /* CLASSIFIED */ || status2 === "reframed" /* REFRAMED */) {
|
|
2988
|
+
return valid.includes("gated" /* GATED */) ? "gated" /* GATED */ : valid[0];
|
|
2989
|
+
}
|
|
2990
|
+
if (status2 === "gated" /* GATED */) {
|
|
2991
|
+
return valid.includes("building" /* BUILDING */) ? "building" /* BUILDING */ : valid[0];
|
|
2992
|
+
}
|
|
2993
|
+
if (status2 === "built" /* BUILT */ && !hasDoubts2) {
|
|
2994
|
+
return valid.includes("doubted" /* DOUBTED */) ? "doubted" /* DOUBTED */ : valid[0];
|
|
2995
|
+
}
|
|
2996
|
+
if (status2 === "doubted" /* DOUBTED */ && !hasChallenges2) {
|
|
2997
|
+
return valid.includes("challenged" /* CHALLENGED */) ? "challenged" /* CHALLENGED */ : valid[0];
|
|
2998
|
+
}
|
|
2999
|
+
if (status2 === "doubted" /* DOUBTED */ || status2 === "challenged" /* CHALLENGED */) {
|
|
3000
|
+
if (valid.includes("verifying" /* VERIFYING */)) {
|
|
3001
|
+
return "verifying" /* VERIFYING */;
|
|
3002
|
+
}
|
|
3003
|
+
}
|
|
3004
|
+
if (status2 === "compressed" /* COMPRESSED */) {
|
|
3005
|
+
return valid.includes("merged" /* MERGED */) ? "merged" /* MERGED */ : valid[0];
|
|
3006
|
+
}
|
|
3007
|
+
return valid[0];
|
|
3008
|
+
}
|
|
3009
|
+
var init_machine = __esm({
|
|
3010
|
+
"src/state/machine.ts"() {
|
|
3011
|
+
"use strict";
|
|
3012
|
+
init_types2();
|
|
2878
3013
|
}
|
|
2879
3014
|
});
|
|
2880
3015
|
|
|
@@ -3016,7 +3151,7 @@ var init_resolve = __esm({
|
|
|
3016
3151
|
"use strict";
|
|
3017
3152
|
fs7 = __toESM(require("fs"));
|
|
3018
3153
|
path7 = __toESM(require("path"));
|
|
3019
|
-
|
|
3154
|
+
init_types2();
|
|
3020
3155
|
init_queries();
|
|
3021
3156
|
init_spawn();
|
|
3022
3157
|
import_node_child_process3 = require("child_process");
|
|
@@ -3059,7 +3194,6 @@ async function resolveCmd(args) {
|
|
|
3059
3194
|
const exp = resolveExperimentArg(db, args);
|
|
3060
3195
|
transition(exp.status, "resolved" /* RESOLVED */);
|
|
3061
3196
|
await resolve(db, exp, root);
|
|
3062
|
-
updateExperimentStatus(db, exp.id, "resolved");
|
|
3063
3197
|
}
|
|
3064
3198
|
async function doGate(db, exp, root) {
|
|
3065
3199
|
transition(exp.status, "gated" /* GATED */);
|
|
@@ -3507,7 +3641,7 @@ var init_cycle = __esm({
|
|
|
3507
3641
|
init_connection();
|
|
3508
3642
|
init_queries();
|
|
3509
3643
|
init_machine();
|
|
3510
|
-
|
|
3644
|
+
init_types2();
|
|
3511
3645
|
init_spawn();
|
|
3512
3646
|
init_resolve();
|
|
3513
3647
|
init_config();
|
|
@@ -3830,6 +3964,13 @@ async function executeStep(step, exp, root) {
|
|
|
3830
3964
|
updateExperimentStatus(getDb(root), exp.id, "reframed");
|
|
3831
3965
|
info(`Reframe acknowledged for ${exp.slug}. Proceeding to gate.`);
|
|
3832
3966
|
break;
|
|
3967
|
+
case "merged" /* MERGED */:
|
|
3968
|
+
updateExperimentStatus(getDb(root), exp.id, "merged");
|
|
3969
|
+
success(`Experiment ${exp.slug} merged.`);
|
|
3970
|
+
break;
|
|
3971
|
+
case "dead_end" /* DEAD_END */:
|
|
3972
|
+
info(`Experiment ${exp.slug} is dead-ended. No further action.`);
|
|
3973
|
+
break;
|
|
3833
3974
|
default:
|
|
3834
3975
|
warn(`Don't know how to execute step: ${step}`);
|
|
3835
3976
|
}
|
|
@@ -3840,7 +3981,7 @@ var init_next = __esm({
|
|
|
3840
3981
|
init_connection();
|
|
3841
3982
|
init_queries();
|
|
3842
3983
|
init_machine();
|
|
3843
|
-
|
|
3984
|
+
init_types2();
|
|
3844
3985
|
init_queries();
|
|
3845
3986
|
init_config();
|
|
3846
3987
|
init_cycle();
|
|
@@ -3900,7 +4041,7 @@ async function run(args) {
|
|
|
3900
4041
|
}
|
|
3901
4042
|
usedHypotheses.add(hypothesis);
|
|
3902
4043
|
info(`Next hypothesis: ${hypothesis}`);
|
|
3903
|
-
exp = createNewExperiment(db, root, hypothesis);
|
|
4044
|
+
exp = await createNewExperiment(db, root, hypothesis);
|
|
3904
4045
|
success(`Created experiment #${exp.id}: ${exp.slug}`);
|
|
3905
4046
|
}
|
|
3906
4047
|
if (isTerminal(exp.status)) {
|
|
@@ -4038,8 +4179,8 @@ ${result.output.slice(-2e3)}
|
|
|
4038
4179
|
warn("Could not extract hypothesis. Using goal as fallback.");
|
|
4039
4180
|
return goal;
|
|
4040
4181
|
}
|
|
4041
|
-
function createNewExperiment(db, root, hypothesis) {
|
|
4042
|
-
const slug =
|
|
4182
|
+
async function createNewExperiment(db, root, hypothesis) {
|
|
4183
|
+
const slug = await generateSlug(hypothesis, root);
|
|
4043
4184
|
let finalSlug = slug;
|
|
4044
4185
|
let attempt = 0;
|
|
4045
4186
|
while (getExperimentBySlug(db, finalSlug)) {
|
|
@@ -4074,9 +4215,6 @@ function createNewExperiment(db, root, hypothesis) {
|
|
|
4074
4215
|
}
|
|
4075
4216
|
return exp;
|
|
4076
4217
|
}
|
|
4077
|
-
function slugify2(text) {
|
|
4078
|
-
return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
|
|
4079
|
-
}
|
|
4080
4218
|
var fs11, path11, import_node_child_process5;
|
|
4081
4219
|
var init_run = __esm({
|
|
4082
4220
|
"src/commands/run.ts"() {
|