majlis 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +1346 -1174
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -29,6 +29,27 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
29
29
|
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
30
30
|
mod
|
|
31
31
|
));
|
|
32
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
33
|
+
|
|
34
|
+
// src/shutdown.ts
|
|
35
|
+
var shutdown_exports = {};
|
|
36
|
+
__export(shutdown_exports, {
|
|
37
|
+
isShutdownRequested: () => isShutdownRequested,
|
|
38
|
+
requestShutdown: () => requestShutdown
|
|
39
|
+
});
|
|
40
|
+
function requestShutdown() {
|
|
41
|
+
_requested = true;
|
|
42
|
+
}
|
|
43
|
+
function isShutdownRequested() {
|
|
44
|
+
return _requested;
|
|
45
|
+
}
|
|
46
|
+
var _requested;
|
|
47
|
+
var init_shutdown = __esm({
|
|
48
|
+
"src/shutdown.ts"() {
|
|
49
|
+
"use strict";
|
|
50
|
+
_requested = false;
|
|
51
|
+
}
|
|
52
|
+
});
|
|
32
53
|
|
|
33
54
|
// src/db/migrations.ts
|
|
34
55
|
function runMigrations(db) {
|
|
@@ -554,6 +575,10 @@ Read as much code as you need to understand the problem. Reading is free \u2014
|
|
|
554
575
|
as many turns as necessary on Read, Grep, and Glob to build full context before
|
|
555
576
|
you touch anything.
|
|
556
577
|
|
|
578
|
+
Do NOT read raw data files (fixtures/, ground truth JSON/STL). The synthesis
|
|
579
|
+
has the relevant facts. Reading raw data wastes turns re-deriving what the
|
|
580
|
+
doubt/challenge/verify cycle already established.
|
|
581
|
+
|
|
557
582
|
## The Rule: ONE Change, Then Document
|
|
558
583
|
|
|
559
584
|
You make ONE code change per cycle. Not two, not "one more quick fix." ONE.
|
|
@@ -717,6 +742,9 @@ If the builder claims improvement but the framework metrics show regression, fla
|
|
|
717
742
|
- Run at most 3-5 targeted diagnostic scripts, focused on the critical doubts/challenges.
|
|
718
743
|
- Do NOT run exhaustive diagnostics on every claim.
|
|
719
744
|
|
|
745
|
+
Framework-captured metrics are ground truth \u2014 if they show regression, that
|
|
746
|
+
alone justifies a "rejected" grade. Do not re-derive from raw fixture data.
|
|
747
|
+
|
|
720
748
|
Grade each component: sound / good / weak / rejected
|
|
721
749
|
Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
|
|
722
750
|
|
|
@@ -796,6 +824,13 @@ the database export.
|
|
|
796
824
|
The framework does NOT auto-save your output for these files.
|
|
797
825
|
7. Review classification: new sub-types? resolved sub-types?
|
|
798
826
|
|
|
827
|
+
You may ONLY write to these three files:
|
|
828
|
+
- docs/synthesis/current.md
|
|
829
|
+
- docs/synthesis/fragility.md
|
|
830
|
+
- docs/synthesis/dead-ends.md
|
|
831
|
+
|
|
832
|
+
Do NOT modify MEMORY.md, .claude/, classification/, experiments/, or any other paths.
|
|
833
|
+
|
|
799
834
|
You may NOT write code, make decisions, or run experiments.
|
|
800
835
|
|
|
801
836
|
## Structured Output Format
|
|
@@ -1618,6 +1653,73 @@ var init_queries = __esm({
|
|
|
1618
1653
|
}
|
|
1619
1654
|
});
|
|
1620
1655
|
|
|
1656
|
+
// src/config.ts
|
|
1657
|
+
function loadConfig(projectRoot) {
|
|
1658
|
+
if (_cachedConfig && _cachedRoot === projectRoot) return _cachedConfig;
|
|
1659
|
+
const configPath = path3.join(projectRoot, ".majlis", "config.json");
|
|
1660
|
+
if (!fs3.existsSync(configPath)) {
|
|
1661
|
+
_cachedConfig = { ...DEFAULT_CONFIG2 };
|
|
1662
|
+
_cachedRoot = projectRoot;
|
|
1663
|
+
return _cachedConfig;
|
|
1664
|
+
}
|
|
1665
|
+
const loaded = JSON.parse(fs3.readFileSync(configPath, "utf-8"));
|
|
1666
|
+
_cachedConfig = {
|
|
1667
|
+
...DEFAULT_CONFIG2,
|
|
1668
|
+
...loaded,
|
|
1669
|
+
project: { ...DEFAULT_CONFIG2.project, ...loaded.project },
|
|
1670
|
+
metrics: { ...DEFAULT_CONFIG2.metrics, ...loaded.metrics },
|
|
1671
|
+
build: { ...DEFAULT_CONFIG2.build, ...loaded.build },
|
|
1672
|
+
cycle: { ...DEFAULT_CONFIG2.cycle, ...loaded.cycle }
|
|
1673
|
+
};
|
|
1674
|
+
_cachedRoot = projectRoot;
|
|
1675
|
+
return _cachedConfig;
|
|
1676
|
+
}
|
|
1677
|
+
function readFileOrEmpty(filePath) {
|
|
1678
|
+
try {
|
|
1679
|
+
return fs3.readFileSync(filePath, "utf-8");
|
|
1680
|
+
} catch {
|
|
1681
|
+
return "";
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
function getFlagValue(args, flag) {
|
|
1685
|
+
const idx = args.indexOf(flag);
|
|
1686
|
+
if (idx < 0 || idx + 1 >= args.length) return void 0;
|
|
1687
|
+
return args[idx + 1];
|
|
1688
|
+
}
|
|
1689
|
+
function truncateContext(content, limit) {
|
|
1690
|
+
if (content.length <= limit) return content;
|
|
1691
|
+
return content.slice(0, limit) + "\n[TRUNCATED]";
|
|
1692
|
+
}
|
|
1693
|
+
var fs3, path3, DEFAULT_CONFIG2, _cachedConfig, _cachedRoot, CONTEXT_LIMITS;
|
|
1694
|
+
var init_config = __esm({
|
|
1695
|
+
"src/config.ts"() {
|
|
1696
|
+
"use strict";
|
|
1697
|
+
fs3 = __toESM(require("fs"));
|
|
1698
|
+
path3 = __toESM(require("path"));
|
|
1699
|
+
DEFAULT_CONFIG2 = {
|
|
1700
|
+
project: { name: "", description: "", objective: "" },
|
|
1701
|
+
metrics: { command: "", fixtures: [], tracked: {} },
|
|
1702
|
+
build: { pre_measure: null, post_measure: null },
|
|
1703
|
+
cycle: {
|
|
1704
|
+
compression_interval: 5,
|
|
1705
|
+
circuit_breaker_threshold: 3,
|
|
1706
|
+
require_doubt_before_verify: true,
|
|
1707
|
+
require_challenge_before_verify: false,
|
|
1708
|
+
auto_baseline_on_new_experiment: true
|
|
1709
|
+
},
|
|
1710
|
+
models: {}
|
|
1711
|
+
};
|
|
1712
|
+
_cachedConfig = null;
|
|
1713
|
+
_cachedRoot = null;
|
|
1714
|
+
CONTEXT_LIMITS = {
|
|
1715
|
+
synthesis: 3e4,
|
|
1716
|
+
fragility: 15e3,
|
|
1717
|
+
experimentDoc: 15e3,
|
|
1718
|
+
deadEnds: 15e3
|
|
1719
|
+
};
|
|
1720
|
+
}
|
|
1721
|
+
});
|
|
1722
|
+
|
|
1621
1723
|
// src/commands/status.ts
|
|
1622
1724
|
var status_exports = {};
|
|
1623
1725
|
__export(status_exports, {
|
|
@@ -1707,1122 +1809,1207 @@ function buildSummary(expCount, activeSession, sessionsSinceCompression, config)
|
|
|
1707
1809
|
}
|
|
1708
1810
|
return parts.join(". ");
|
|
1709
1811
|
}
|
|
1710
|
-
function loadConfig(projectRoot) {
|
|
1711
|
-
const configPath = path3.join(projectRoot, ".majlis", "config.json");
|
|
1712
|
-
if (!fs3.existsSync(configPath)) {
|
|
1713
|
-
throw new Error("Missing .majlis/config.json. Run `majlis init` first.");
|
|
1714
|
-
}
|
|
1715
|
-
return JSON.parse(fs3.readFileSync(configPath, "utf-8"));
|
|
1716
|
-
}
|
|
1717
|
-
var fs3, path3;
|
|
1718
1812
|
var init_status = __esm({
|
|
1719
1813
|
"src/commands/status.ts"() {
|
|
1720
1814
|
"use strict";
|
|
1721
|
-
fs3 = __toESM(require("fs"));
|
|
1722
|
-
path3 = __toESM(require("path"));
|
|
1723
1815
|
init_connection();
|
|
1724
1816
|
init_queries();
|
|
1817
|
+
init_config();
|
|
1725
1818
|
init_format();
|
|
1726
1819
|
}
|
|
1727
1820
|
});
|
|
1728
1821
|
|
|
1729
|
-
// src/
|
|
1730
|
-
function
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
delta: a.metric_value - b.metric_value,
|
|
1749
|
-
regression
|
|
1750
|
-
});
|
|
1751
|
-
}
|
|
1752
|
-
}
|
|
1753
|
-
}
|
|
1754
|
-
return comparisons;
|
|
1755
|
-
}
|
|
1756
|
-
function isRegression(before, after, direction) {
|
|
1757
|
-
switch (direction) {
|
|
1758
|
-
case "lower_is_better":
|
|
1759
|
-
return after > before;
|
|
1760
|
-
case "higher_is_better":
|
|
1761
|
-
return after < before;
|
|
1762
|
-
case "closer_to_gt":
|
|
1763
|
-
return false;
|
|
1822
|
+
// src/agents/types.ts
|
|
1823
|
+
function getExtractionSchema(role) {
|
|
1824
|
+
switch (role) {
|
|
1825
|
+
case "builder":
|
|
1826
|
+
return '{"decisions": [{"description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string"}]}';
|
|
1827
|
+
case "critic":
|
|
1828
|
+
return '{"doubts": [{"claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical"}]}';
|
|
1829
|
+
case "adversary":
|
|
1830
|
+
return '{"challenges": [{"description": "string", "reasoning": "string"}]}';
|
|
1831
|
+
case "verifier":
|
|
1832
|
+
return '{"grades": [{"component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string"}], "doubt_resolutions": [{"doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive"}]}';
|
|
1833
|
+
case "gatekeeper":
|
|
1834
|
+
return '{"gate_decision": "approve|reject|flag", "reason": "string", "stale_references": ["string"], "overlapping_dead_ends": [0]}';
|
|
1835
|
+
case "reframer":
|
|
1836
|
+
return '{"reframe": {"decomposition": "string", "divergences": ["string"], "recommendation": "string"}}';
|
|
1837
|
+
case "scout":
|
|
1838
|
+
return '{"findings": [{"approach": "string", "source": "string", "relevance": "string", "contradicts_current": true}]}';
|
|
1839
|
+
case "compressor":
|
|
1840
|
+
return '{"compression_report": {"synthesis_delta": "string", "new_dead_ends": ["string"], "fragility_changes": ["string"]}}';
|
|
1764
1841
|
default:
|
|
1765
|
-
return
|
|
1766
|
-
}
|
|
1767
|
-
}
|
|
1768
|
-
function parseMetricsOutput(jsonStr) {
|
|
1769
|
-
const data = JSON.parse(jsonStr);
|
|
1770
|
-
const results = [];
|
|
1771
|
-
if (data.fixtures && typeof data.fixtures === "object") {
|
|
1772
|
-
for (const [fixture, metrics] of Object.entries(data.fixtures)) {
|
|
1773
|
-
for (const [metricName, metricValue] of Object.entries(metrics)) {
|
|
1774
|
-
if (typeof metricValue === "number") {
|
|
1775
|
-
results.push({ fixture, metric_name: metricName, metric_value: metricValue });
|
|
1776
|
-
}
|
|
1777
|
-
}
|
|
1778
|
-
}
|
|
1842
|
+
return EXTRACTION_SCHEMA;
|
|
1779
1843
|
}
|
|
1780
|
-
return results;
|
|
1781
1844
|
}
|
|
1782
|
-
var
|
|
1783
|
-
|
|
1845
|
+
var EXTRACTION_SCHEMA, ROLE_REQUIRED_FIELDS;
|
|
1846
|
+
var init_types = __esm({
|
|
1847
|
+
"src/agents/types.ts"() {
|
|
1784
1848
|
"use strict";
|
|
1785
|
-
|
|
1849
|
+
EXTRACTION_SCHEMA = `{
|
|
1850
|
+
"decisions": [{ "description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string" }],
|
|
1851
|
+
"grades": [{ "component": "string", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "string" }],
|
|
1852
|
+
"doubts": [{ "claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical" }],
|
|
1853
|
+
"guidance": "string (actionable builder guidance)",
|
|
1854
|
+
"doubt_resolutions": [{ "doubt_id": 0, "resolution": "confirmed|dismissed|inconclusive" }]
|
|
1855
|
+
}`;
|
|
1856
|
+
ROLE_REQUIRED_FIELDS = {
|
|
1857
|
+
builder: ["decisions"],
|
|
1858
|
+
critic: ["doubts"],
|
|
1859
|
+
adversary: ["challenges"],
|
|
1860
|
+
verifier: ["grades"],
|
|
1861
|
+
gatekeeper: ["gate_decision"],
|
|
1862
|
+
reframer: ["reframe"],
|
|
1863
|
+
scout: ["findings"],
|
|
1864
|
+
compressor: ["compression_report"]
|
|
1865
|
+
};
|
|
1786
1866
|
}
|
|
1787
1867
|
});
|
|
1788
1868
|
|
|
1789
|
-
// src/
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
});
|
|
1796
|
-
async function baseline(args) {
|
|
1797
|
-
await captureMetrics("before", args);
|
|
1798
|
-
}
|
|
1799
|
-
async function measure(args) {
|
|
1800
|
-
await captureMetrics("after", args);
|
|
1801
|
-
}
|
|
1802
|
-
async function captureMetrics(phase, args) {
|
|
1803
|
-
const root = findProjectRoot();
|
|
1804
|
-
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
1805
|
-
const db = getDb(root);
|
|
1806
|
-
const config = loadConfig2(root);
|
|
1807
|
-
const expIdIdx = args.indexOf("--experiment");
|
|
1808
|
-
let exp;
|
|
1809
|
-
if (expIdIdx >= 0) {
|
|
1810
|
-
exp = getExperimentById(db, Number(args[expIdIdx + 1]));
|
|
1869
|
+
// src/agents/parse.ts
|
|
1870
|
+
async function extractStructuredData(role, markdown) {
|
|
1871
|
+
const tier1 = extractMajlisJsonBlock(markdown);
|
|
1872
|
+
if (tier1) {
|
|
1873
|
+
const parsed = tryParseJson(tier1);
|
|
1874
|
+
if (parsed) return parsed;
|
|
1875
|
+
console.warn(`[majlis] Malformed JSON in <!-- majlis-json --> block for ${role}. Falling back.`);
|
|
1811
1876
|
} else {
|
|
1812
|
-
|
|
1813
|
-
}
|
|
1814
|
-
if (!exp) throw new Error('No active experiment. Run `majlis new "hypothesis"` first.');
|
|
1815
|
-
if (config.build.pre_measure) {
|
|
1816
|
-
info(`Running pre-measure: ${config.build.pre_measure}`);
|
|
1817
|
-
try {
|
|
1818
|
-
(0, import_node_child_process.execSync)(config.build.pre_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
|
|
1819
|
-
} catch {
|
|
1820
|
-
warn("Pre-measure command failed \u2014 continuing anyway.");
|
|
1821
|
-
}
|
|
1877
|
+
console.warn(`[majlis] No <!-- majlis-json --> block found in ${role} output. Falling back.`);
|
|
1822
1878
|
}
|
|
1823
|
-
|
|
1824
|
-
|
|
1879
|
+
const tier2 = extractViaPatterns(role, markdown);
|
|
1880
|
+
if (tier2 && hasData(tier2)) {
|
|
1881
|
+
console.warn(`[majlis] Used regex fallback for ${role}. Review extracted data.`);
|
|
1882
|
+
return tier2;
|
|
1825
1883
|
}
|
|
1826
|
-
|
|
1827
|
-
|
|
1884
|
+
console.warn(`[majlis] Regex fallback insufficient for ${role}. Using Haiku extraction.`);
|
|
1885
|
+
const tier3 = await extractViaHaiku(role, markdown);
|
|
1886
|
+
if (tier3) return tier3;
|
|
1887
|
+
console.error(
|
|
1888
|
+
`[majlis] FAILED to extract structured data from ${role} output. State machine will continue but data is missing. Manual review required.`
|
|
1889
|
+
);
|
|
1890
|
+
return null;
|
|
1891
|
+
}
|
|
1892
|
+
function extractMajlisJsonBlock(markdown) {
|
|
1893
|
+
const match = markdown.match(/<!--\s*majlis-json\s*\n([\s\S]*?)-->/);
|
|
1894
|
+
if (!match) return null;
|
|
1895
|
+
return match[1].trim();
|
|
1896
|
+
}
|
|
1897
|
+
function tryParseJson(jsonStr) {
|
|
1828
1898
|
try {
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
1833
|
-
});
|
|
1834
|
-
} catch (err) {
|
|
1835
|
-
throw new Error(`Metrics command failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1836
|
-
}
|
|
1837
|
-
const parsed = parseMetricsOutput(metricsOutput);
|
|
1838
|
-
if (parsed.length === 0) {
|
|
1839
|
-
warn("Metrics command returned no data.");
|
|
1840
|
-
return;
|
|
1899
|
+
return JSON.parse(jsonStr);
|
|
1900
|
+
} catch {
|
|
1901
|
+
return null;
|
|
1841
1902
|
}
|
|
1842
|
-
|
|
1843
|
-
|
|
1903
|
+
}
|
|
1904
|
+
function extractViaPatterns(role, markdown) {
|
|
1905
|
+
const result = {};
|
|
1906
|
+
const decisionPattern = /\[(?:decision|Decision)\].*?(?:description|Description):\s*(.+?)(?:\n|$).*?(?:evidence.?level|Evidence.?Level|level):\s*(proof|test|strong_consensus|consensus|analogy|judgment).*?(?:justification|Justification):\s*(.+?)(?:\n|$)/gis;
|
|
1907
|
+
const decisions = [];
|
|
1908
|
+
const evidenceMarkers = /(?:^|\n)\s*[-*]\s*\*?\*?(?:Decision|DECISION)\*?\*?:\s*(.+?)(?:\n|$).*?(?:Evidence|EVIDENCE|Level):\s*(proof|test|strong_consensus|consensus|analogy|judgment)/gim;
|
|
1909
|
+
let match;
|
|
1910
|
+
while ((match = evidenceMarkers.exec(markdown)) !== null) {
|
|
1911
|
+
decisions.push({
|
|
1912
|
+
description: match[1].trim(),
|
|
1913
|
+
evidence_level: match[2].toLowerCase().trim(),
|
|
1914
|
+
justification: "Extracted via regex \u2014 review"
|
|
1915
|
+
});
|
|
1844
1916
|
}
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1917
|
+
const inlineTagPattern = /\[(proof|test|strong_consensus|consensus|analogy|judgment)\]\s*(.+?)(?:\n|$)/gi;
|
|
1918
|
+
while ((match = inlineTagPattern.exec(markdown)) !== null) {
|
|
1919
|
+
const desc = match[2].trim();
|
|
1920
|
+
if (!decisions.some((d) => d.description === desc)) {
|
|
1921
|
+
decisions.push({
|
|
1922
|
+
description: desc,
|
|
1923
|
+
evidence_level: match[1].toLowerCase(),
|
|
1924
|
+
justification: "Extracted via regex \u2014 review"
|
|
1925
|
+
});
|
|
1851
1926
|
}
|
|
1852
1927
|
}
|
|
1853
|
-
|
|
1854
|
-
|
|
1855
|
-
const
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
if (expIdIdx >= 0) {
|
|
1862
|
-
exp = getExperimentById(db, Number(args[expIdIdx + 1]));
|
|
1863
|
-
} else {
|
|
1864
|
-
exp = getLatestExperiment(db);
|
|
1865
|
-
}
|
|
1866
|
-
if (!exp) throw new Error("No active experiment.");
|
|
1867
|
-
const comparisons = compareMetrics(db, exp.id, config);
|
|
1868
|
-
if (comparisons.length === 0) {
|
|
1869
|
-
warn(`No before/after metrics to compare for ${exp.slug}. Run baseline and measure first.`);
|
|
1870
|
-
return;
|
|
1928
|
+
if (decisions.length > 0) result.decisions = decisions;
|
|
1929
|
+
const grades = [];
|
|
1930
|
+
const gradePattern = /(?:^|\n)\s*[-*]?\s*\*?\*?(?:Grade|GRADE|Component)\*?\*?.*?(?:component|Component)?\s*[:=]\s*(.+?)(?:\n|,).*?(?:grade|Grade)\s*[:=]\s*(sound|good|weak|rejected)/gim;
|
|
1931
|
+
while ((match = gradePattern.exec(markdown)) !== null) {
|
|
1932
|
+
grades.push({
|
|
1933
|
+
component: match[1].trim(),
|
|
1934
|
+
grade: match[2].toLowerCase().trim()
|
|
1935
|
+
});
|
|
1871
1936
|
}
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1937
|
+
const simpleGradePattern = /(?:^|\n)\s*[-*]\s*\*?\*?(.+?)\*?\*?\s*[:—–-]\s*\*?\*?(sound|good|weak|rejected)\*?\*?/gim;
|
|
1938
|
+
while ((match = simpleGradePattern.exec(markdown)) !== null) {
|
|
1939
|
+
const comp = match[1].trim();
|
|
1940
|
+
if (!grades.some((g) => g.component === comp)) {
|
|
1941
|
+
grades.push({
|
|
1942
|
+
component: comp,
|
|
1943
|
+
grade: match[2].toLowerCase().trim()
|
|
1944
|
+
});
|
|
1945
|
+
}
|
|
1875
1946
|
}
|
|
1876
|
-
|
|
1877
|
-
const
|
|
1878
|
-
const
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
if (regressions.length > 0) {
|
|
1888
|
-
console.log();
|
|
1889
|
-
warn(`${regressions.length} regression(s) detected!`);
|
|
1890
|
-
} else {
|
|
1891
|
-
console.log();
|
|
1892
|
-
success("No regressions detected.");
|
|
1947
|
+
if (grades.length > 0) result.grades = grades;
|
|
1948
|
+
const doubts = [];
|
|
1949
|
+
const doubtPattern = /(?:Doubt|DOUBT|Claim doubted|CLAIM)\s*(?:\d+)?[:.]?\s*(.+?)(?:\n|$)[\s\S]*?(?:Severity|SEVERITY)\s*[:=]\s*(minor|moderate|critical)/gim;
|
|
1950
|
+
while ((match = doubtPattern.exec(markdown)) !== null) {
|
|
1951
|
+
doubts.push({
|
|
1952
|
+
claim_doubted: match[1].trim(),
|
|
1953
|
+
evidence_level_of_claim: "unknown",
|
|
1954
|
+
// Don't fabricate — mark as unknown for review
|
|
1955
|
+
evidence_for_doubt: "Extracted via regex \u2014 review original document",
|
|
1956
|
+
severity: match[2].toLowerCase().trim()
|
|
1957
|
+
});
|
|
1893
1958
|
}
|
|
1959
|
+
if (doubts.length > 0) result.doubts = doubts;
|
|
1960
|
+
return result;
|
|
1894
1961
|
}
|
|
1895
|
-
function
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1962
|
+
async function extractViaHaiku(role, markdown) {
|
|
1963
|
+
try {
|
|
1964
|
+
const truncated = markdown.length > 8e3 ? markdown.slice(0, 8e3) + "\n[truncated]" : markdown;
|
|
1965
|
+
const schema = getExtractionSchema(role);
|
|
1966
|
+
const prompt = `Extract structured data from this ${role} document as JSON. Follow this schema exactly: ${schema}
|
|
1967
|
+
|
|
1968
|
+
Document:
|
|
1969
|
+
${truncated}`;
|
|
1970
|
+
const conversation = (0, import_claude_agent_sdk.query)({
|
|
1971
|
+
prompt,
|
|
1972
|
+
options: {
|
|
1973
|
+
model: "haiku",
|
|
1974
|
+
tools: [],
|
|
1975
|
+
systemPrompt: "You are a JSON extraction assistant. Output only valid JSON matching the requested schema. No markdown, no explanation, just JSON.",
|
|
1976
|
+
permissionMode: "bypassPermissions",
|
|
1977
|
+
allowDangerouslySkipPermissions: true,
|
|
1978
|
+
maxTurns: 1,
|
|
1979
|
+
persistSession: false
|
|
1980
|
+
}
|
|
1981
|
+
});
|
|
1982
|
+
let resultText = "";
|
|
1983
|
+
for await (const message of conversation) {
|
|
1984
|
+
if (message.type === "assistant") {
|
|
1985
|
+
for (const block of message.message.content) {
|
|
1986
|
+
if (block.type === "text") {
|
|
1987
|
+
resultText += block.text;
|
|
1988
|
+
}
|
|
1989
|
+
}
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
return tryParseJson(resultText.trim());
|
|
1993
|
+
} catch (err) {
|
|
1994
|
+
console.warn(`[majlis] Haiku extraction failed for ${role}: ${err instanceof Error ? err.message : String(err)}`);
|
|
1995
|
+
return null;
|
|
1903
1996
|
}
|
|
1904
|
-
return JSON.parse(fs4.readFileSync(configPath, "utf-8"));
|
|
1905
1997
|
}
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1998
|
+
function hasData(output) {
|
|
1999
|
+
return !!(output.decisions && output.decisions.length > 0 || output.grades && output.grades.length > 0 || output.doubts && output.doubts.length > 0 || output.challenges && output.challenges.length > 0 || output.findings && output.findings.length > 0 || output.guidance || output.reframe || output.compression_report || output.gate_decision);
|
|
2000
|
+
}
|
|
2001
|
+
function validateForRole(role, output) {
|
|
2002
|
+
const required = ROLE_REQUIRED_FIELDS[role];
|
|
2003
|
+
if (!required) return { valid: true, missing: [] };
|
|
2004
|
+
const missing = required.filter((field) => {
|
|
2005
|
+
const value = output[field];
|
|
2006
|
+
if (value === void 0 || value === null) return true;
|
|
2007
|
+
if (Array.isArray(value) && value.length === 0) return true;
|
|
2008
|
+
return false;
|
|
2009
|
+
});
|
|
2010
|
+
return { valid: missing.length === 0, missing };
|
|
2011
|
+
}
|
|
2012
|
+
var import_claude_agent_sdk;
|
|
2013
|
+
var init_parse = __esm({
|
|
2014
|
+
"src/agents/parse.ts"() {
|
|
1909
2015
|
"use strict";
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
import_node_child_process = require("child_process");
|
|
1913
|
-
init_connection();
|
|
1914
|
-
init_queries();
|
|
1915
|
-
init_metrics();
|
|
1916
|
-
init_format();
|
|
2016
|
+
init_types();
|
|
2017
|
+
import_claude_agent_sdk = require("@anthropic-ai/claude-agent-sdk");
|
|
1917
2018
|
}
|
|
1918
2019
|
});
|
|
1919
2020
|
|
|
1920
|
-
// src/
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
});
|
|
1926
|
-
async function newExperiment(args) {
|
|
1927
|
-
const root = findProjectRoot();
|
|
1928
|
-
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
1929
|
-
const hypothesis = args.filter((a) => !a.startsWith("--")).join(" ");
|
|
1930
|
-
if (!hypothesis) {
|
|
1931
|
-
throw new Error('Usage: majlis new "hypothesis"');
|
|
1932
|
-
}
|
|
1933
|
-
const db = getDb(root);
|
|
1934
|
-
const config = loadConfig3(root);
|
|
1935
|
-
const slug = slugify(hypothesis);
|
|
1936
|
-
if (getExperimentBySlug(db, slug)) {
|
|
1937
|
-
throw new Error(`Experiment with slug "${slug}" already exists.`);
|
|
1938
|
-
}
|
|
1939
|
-
const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
|
|
1940
|
-
const num = allExps.count + 1;
|
|
1941
|
-
const paddedNum = String(num).padStart(3, "0");
|
|
1942
|
-
const branch = `exp/${paddedNum}-${slug}`;
|
|
1943
|
-
try {
|
|
1944
|
-
(0, import_node_child_process2.execSync)(`git checkout -b ${branch}`, {
|
|
1945
|
-
cwd: root,
|
|
1946
|
-
encoding: "utf-8",
|
|
1947
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
1948
|
-
});
|
|
1949
|
-
info(`Created branch: ${branch}`);
|
|
1950
|
-
} catch (err) {
|
|
1951
|
-
warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
|
|
2021
|
+
// src/agents/spawn.ts
|
|
2022
|
+
function loadAgentDefinition(role, projectRoot) {
|
|
2023
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2024
|
+
const filePath = path4.join(root, ".majlis", "agents", `${role}.md`);
|
|
2025
|
+
if (!fs4.existsSync(filePath)) {
|
|
2026
|
+
throw new Error(`Agent definition not found: ${filePath}`);
|
|
1952
2027
|
}
|
|
1953
|
-
const
|
|
1954
|
-
const
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
const docsDir = path5.join(root, "docs", "experiments");
|
|
1958
|
-
const templatePath = path5.join(docsDir, "_TEMPLATE.md");
|
|
1959
|
-
if (fs5.existsSync(templatePath)) {
|
|
1960
|
-
const template = fs5.readFileSync(templatePath, "utf-8");
|
|
1961
|
-
const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, subType ?? "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
|
|
1962
|
-
const logPath = path5.join(docsDir, `${paddedNum}-${slug}.md`);
|
|
1963
|
-
fs5.writeFileSync(logPath, logContent);
|
|
1964
|
-
info(`Created experiment log: docs/experiments/${paddedNum}-${slug}.md`);
|
|
2028
|
+
const content = fs4.readFileSync(filePath, "utf-8");
|
|
2029
|
+
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
2030
|
+
if (!frontmatterMatch) {
|
|
2031
|
+
throw new Error(`Invalid agent definition (missing YAML frontmatter): ${filePath}`);
|
|
1965
2032
|
}
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
|
|
1973
|
-
|
|
2033
|
+
const frontmatter = frontmatterMatch[1];
|
|
2034
|
+
const body = frontmatterMatch[2].trim();
|
|
2035
|
+
const name = extractYamlField(frontmatter, "name") ?? role;
|
|
2036
|
+
const model = extractYamlField(frontmatter, "model") ?? "opus";
|
|
2037
|
+
const toolsStr = extractYamlField(frontmatter, "tools") ?? "[]";
|
|
2038
|
+
const tools = toolsStr.replace(/[\[\]]/g, "").split(",").map((t) => t.trim()).filter(Boolean);
|
|
2039
|
+
return { name, model, tools, systemPrompt: body };
|
|
2040
|
+
}
|
|
2041
|
+
function buildCheckpointMessage(role, toolUseCount, maxTurns) {
|
|
2042
|
+
const approxTurn = Math.round(toolUseCount / 2);
|
|
2043
|
+
const header2 = `[MAJLIS CHECKPOINT \u2014 ~${approxTurn} of ${maxTurns} turns used]`;
|
|
2044
|
+
switch (role) {
|
|
2045
|
+
case "builder":
|
|
2046
|
+
return `${header2}
|
|
2047
|
+
Reminder: ONE code change per cycle.
|
|
2048
|
+
- Have you run the benchmark? YES \u2192 document results + output JSON + STOP.
|
|
2049
|
+
- If NO \u2192 run it now, then wrap up.
|
|
2050
|
+
Do NOT start a second change or investigate unrelated failures.`;
|
|
2051
|
+
case "verifier":
|
|
2052
|
+
return `${header2}
|
|
2053
|
+
AT MOST 3 diagnostic scripts total.
|
|
2054
|
+
- If \u22653 scripts run \u2192 produce grades + output JSON now.
|
|
2055
|
+
- Trust framework metrics. Do not re-derive from raw data.`;
|
|
2056
|
+
case "critic":
|
|
2057
|
+
return `${header2}
|
|
2058
|
+
Focus on the SINGLE weakest assumption.
|
|
2059
|
+
- Have you identified the core doubt? YES \u2192 write it up + output JSON.
|
|
2060
|
+
- Do not enumerate every possible concern \u2014 pick the most dangerous one.`;
|
|
2061
|
+
case "adversary":
|
|
2062
|
+
return `${header2}
|
|
2063
|
+
Design ONE targeted challenge, not a test suite.
|
|
2064
|
+
- Have you defined the challenge? YES \u2192 write it up + output JSON.
|
|
2065
|
+
- Focus on what would DISPROVE the hypothesis, not general testing.`;
|
|
2066
|
+
case "compressor":
|
|
2067
|
+
return `${header2}
|
|
2068
|
+
You may ONLY write to docs/synthesis/.
|
|
2069
|
+
- Have you updated current.md, fragility.md, dead-ends.md?
|
|
2070
|
+
- If yes \u2192 output compression report JSON.
|
|
2071
|
+
- Do NOT write to MEMORY.md or files outside docs/synthesis/.`;
|
|
2072
|
+
default:
|
|
2073
|
+
return `${header2}
|
|
2074
|
+
Check: is your core task done? If yes, wrap up and output JSON.`;
|
|
2075
|
+
}
|
|
2076
|
+
}
|
|
2077
|
+
function buildPreToolUseGuards(role) {
|
|
2078
|
+
if (role === "compressor") {
|
|
2079
|
+
const guardHook = async (input) => {
|
|
2080
|
+
const toolInput = input.tool_input ?? {};
|
|
2081
|
+
const filePath = toolInput.file_path ?? "";
|
|
2082
|
+
if (filePath && !filePath.includes("/docs/synthesis/")) {
|
|
2083
|
+
return {
|
|
2084
|
+
decision: "block",
|
|
2085
|
+
reason: `Compressor may only write to docs/synthesis/. Blocked: ${filePath}`
|
|
2086
|
+
};
|
|
2087
|
+
}
|
|
2088
|
+
return {};
|
|
2089
|
+
};
|
|
2090
|
+
return [
|
|
2091
|
+
{ matcher: "Write", hooks: [guardHook] },
|
|
2092
|
+
{ matcher: "Edit", hooks: [guardHook] }
|
|
2093
|
+
];
|
|
1974
2094
|
}
|
|
2095
|
+
return void 0;
|
|
1975
2096
|
}
|
|
1976
|
-
|
|
1977
|
-
const
|
|
1978
|
-
|
|
1979
|
-
const
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
2097
|
+
function buildAgentHooks(role, maxTurns) {
|
|
2098
|
+
const result = {};
|
|
2099
|
+
let hasHooks = false;
|
|
2100
|
+
const interval = CHECKPOINT_INTERVAL[role];
|
|
2101
|
+
if (interval) {
|
|
2102
|
+
let toolUseCount = 0;
|
|
2103
|
+
const checkpointHook = async () => {
|
|
2104
|
+
toolUseCount++;
|
|
2105
|
+
if (toolUseCount % interval === 0) {
|
|
2106
|
+
const msg = buildCheckpointMessage(role, toolUseCount, maxTurns);
|
|
2107
|
+
return {
|
|
2108
|
+
hookSpecificOutput: {
|
|
2109
|
+
hookEventName: "PostToolUse",
|
|
2110
|
+
additionalContext: msg
|
|
2111
|
+
}
|
|
2112
|
+
};
|
|
2113
|
+
}
|
|
2114
|
+
return {};
|
|
2115
|
+
};
|
|
2116
|
+
result.PostToolUse = [{ hooks: [checkpointHook] }];
|
|
2117
|
+
hasHooks = true;
|
|
1988
2118
|
}
|
|
1989
|
-
const
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
db,
|
|
1994
|
-
exp.id,
|
|
1995
|
-
exp.hypothesis ?? exp.slug,
|
|
1996
|
-
reason,
|
|
1997
|
-
`Reverted: ${reason}`,
|
|
1998
|
-
exp.sub_type,
|
|
1999
|
-
category
|
|
2000
|
-
);
|
|
2001
|
-
updateExperimentStatus(db, exp.id, "dead_end");
|
|
2002
|
-
try {
|
|
2003
|
-
const currentBranch = (0, import_node_child_process2.execSync)("git rev-parse --abbrev-ref HEAD", {
|
|
2004
|
-
cwd: root,
|
|
2005
|
-
encoding: "utf-8"
|
|
2006
|
-
}).trim();
|
|
2007
|
-
if (currentBranch === exp.branch) {
|
|
2008
|
-
(0, import_node_child_process2.execSync)("git checkout main 2>/dev/null || git checkout master", {
|
|
2009
|
-
cwd: root,
|
|
2010
|
-
encoding: "utf-8",
|
|
2011
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
2012
|
-
});
|
|
2013
|
-
}
|
|
2014
|
-
} catch {
|
|
2015
|
-
warn("Could not switch git branches \u2014 do this manually.");
|
|
2119
|
+
const guards = buildPreToolUseGuards(role);
|
|
2120
|
+
if (guards) {
|
|
2121
|
+
result.PreToolUse = guards;
|
|
2122
|
+
hasHooks = true;
|
|
2016
2123
|
}
|
|
2017
|
-
|
|
2124
|
+
return hasHooks ? result : void 0;
|
|
2018
2125
|
}
|
|
2019
|
-
function
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
function loadConfig3(projectRoot) {
|
|
2023
|
-
const configPath = path5.join(projectRoot, ".majlis", "config.json");
|
|
2024
|
-
if (!fs5.existsSync(configPath)) {
|
|
2025
|
-
return { cycle: { auto_baseline_on_new_experiment: false } };
|
|
2026
|
-
}
|
|
2027
|
-
return JSON.parse(fs5.readFileSync(configPath, "utf-8"));
|
|
2126
|
+
function extractYamlField(yaml, field) {
|
|
2127
|
+
const match = yaml.match(new RegExp(`^${field}:\\s*(.+)$`, "m"));
|
|
2128
|
+
return match ? match[1].trim() : null;
|
|
2028
2129
|
}
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
import_node_child_process2 = require("child_process");
|
|
2036
|
-
init_connection();
|
|
2037
|
-
init_queries();
|
|
2038
|
-
init_format();
|
|
2039
|
-
}
|
|
2040
|
-
});
|
|
2130
|
+
async function spawnAgent(role, context, projectRoot) {
|
|
2131
|
+
const agentDef = loadAgentDefinition(role, projectRoot);
|
|
2132
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2133
|
+
const taskPrompt = context.taskPrompt ?? `Perform your role as ${agentDef.name}.`;
|
|
2134
|
+
const contextJson = JSON.stringify(context);
|
|
2135
|
+
const prompt = `Here is your context:
|
|
2041
2136
|
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
}
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2137
|
+
\`\`\`json
|
|
2138
|
+
${contextJson}
|
|
2139
|
+
\`\`\`
|
|
2140
|
+
|
|
2141
|
+
${taskPrompt}`;
|
|
2142
|
+
const turns = ROLE_MAX_TURNS[role] ?? 15;
|
|
2143
|
+
console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
|
|
2144
|
+
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2145
|
+
prompt,
|
|
2146
|
+
model: agentDef.model,
|
|
2147
|
+
tools: agentDef.tools,
|
|
2148
|
+
systemPrompt: agentDef.systemPrompt,
|
|
2149
|
+
cwd: root,
|
|
2150
|
+
maxTurns: turns,
|
|
2151
|
+
label: role,
|
|
2152
|
+
role
|
|
2153
|
+
});
|
|
2154
|
+
console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
|
|
2155
|
+
const artifactPath = writeArtifact(role, context, markdown, root);
|
|
2156
|
+
if (artifactPath) {
|
|
2157
|
+
console.log(`[${role}] Artifact written to ${artifactPath}`);
|
|
2051
2158
|
}
|
|
2052
|
-
const
|
|
2053
|
-
if (
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
if (!intent) {
|
|
2058
|
-
throw new Error('Usage: majlis session start "intent"');
|
|
2059
|
-
}
|
|
2060
|
-
const existing = getActiveSession(db);
|
|
2061
|
-
if (existing) {
|
|
2062
|
-
warn(`Session already active: "${existing.intent}" (started ${existing.started_at})`);
|
|
2063
|
-
warn("End it first with `majlis session end`.");
|
|
2064
|
-
return;
|
|
2065
|
-
}
|
|
2066
|
-
const latestExp = getLatestExperiment(db);
|
|
2067
|
-
const sess = startSession(db, intent, latestExp?.id ?? null);
|
|
2068
|
-
success(`Session started: "${intent}" (id: ${sess.id})`);
|
|
2069
|
-
if (latestExp) {
|
|
2070
|
-
info(`Linked to experiment: ${latestExp.slug} (${latestExp.status})`);
|
|
2071
|
-
}
|
|
2072
|
-
} else {
|
|
2073
|
-
const active = getActiveSession(db);
|
|
2074
|
-
if (!active) {
|
|
2075
|
-
throw new Error("No active session to end.");
|
|
2159
|
+
const structured = await extractStructuredData(role, markdown);
|
|
2160
|
+
if (structured) {
|
|
2161
|
+
const { valid, missing } = validateForRole(role, structured);
|
|
2162
|
+
if (!valid) {
|
|
2163
|
+
console.warn(`[${role}] Output missing expected fields: ${missing.join(", ")}`);
|
|
2076
2164
|
}
|
|
2077
|
-
const accomplishedIdx = args.indexOf("--accomplished");
|
|
2078
|
-
const accomplished = accomplishedIdx >= 0 ? args[accomplishedIdx + 1] : null;
|
|
2079
|
-
const unfinishedIdx = args.indexOf("--unfinished");
|
|
2080
|
-
const unfinished = unfinishedIdx >= 0 ? args[unfinishedIdx + 1] : null;
|
|
2081
|
-
const fragilityIdx = args.indexOf("--fragility");
|
|
2082
|
-
const fragility = fragilityIdx >= 0 ? args[fragilityIdx + 1] : null;
|
|
2083
|
-
endSession(db, active.id, accomplished, unfinished, fragility);
|
|
2084
|
-
success(`Session ended: "${active.intent}"`);
|
|
2085
|
-
if (accomplished) info(`Accomplished: ${accomplished}`);
|
|
2086
|
-
if (unfinished) info(`Unfinished: ${unfinished}`);
|
|
2087
|
-
if (fragility) warn(`New fragility: ${fragility}`);
|
|
2088
2165
|
}
|
|
2166
|
+
return { output: markdown, structured, truncated };
|
|
2089
2167
|
}
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
init_format();
|
|
2096
|
-
}
|
|
2097
|
-
});
|
|
2168
|
+
async function spawnSynthesiser(context, projectRoot) {
|
|
2169
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2170
|
+
const contextJson = JSON.stringify(context);
|
|
2171
|
+
const taskPrompt = context.taskPrompt ?? "Synthesise the findings into actionable builder guidance.";
|
|
2172
|
+
const prompt = `Here is your context:
|
|
2098
2173
|
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
}
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
return queryCircuitBreakers(db, root, isJson);
|
|
2119
|
-
case "check-commit":
|
|
2120
|
-
return checkCommit(db);
|
|
2121
|
-
}
|
|
2174
|
+
\`\`\`json
|
|
2175
|
+
${contextJson}
|
|
2176
|
+
\`\`\`
|
|
2177
|
+
|
|
2178
|
+
${taskPrompt}`;
|
|
2179
|
+
const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
|
|
2180
|
+
console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
|
|
2181
|
+
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2182
|
+
prompt,
|
|
2183
|
+
model: "sonnet",
|
|
2184
|
+
tools: ["Read", "Glob", "Grep"],
|
|
2185
|
+
systemPrompt,
|
|
2186
|
+
cwd: root,
|
|
2187
|
+
maxTurns: 5,
|
|
2188
|
+
label: "synthesiser",
|
|
2189
|
+
role: "synthesiser"
|
|
2190
|
+
});
|
|
2191
|
+
console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
2192
|
+
return { output: markdown, structured: { guidance: markdown }, truncated };
|
|
2122
2193
|
}
|
|
2123
|
-
function
|
|
2124
|
-
const
|
|
2125
|
-
const
|
|
2126
|
-
|
|
2127
|
-
const
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
}
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
|
|
2194
|
+
async function spawnRecovery(role, partialOutput, context, projectRoot) {
|
|
2195
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2196
|
+
const expSlug = context.experiment?.slug ?? "unknown";
|
|
2197
|
+
console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
|
|
2198
|
+
const expDocPath = path4.join(
|
|
2199
|
+
root,
|
|
2200
|
+
"docs",
|
|
2201
|
+
"experiments",
|
|
2202
|
+
`${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
|
|
2203
|
+
);
|
|
2204
|
+
const templatePath = path4.join(root, "docs", "experiments", "_TEMPLATE.md");
|
|
2205
|
+
const template = fs4.existsSync(templatePath) ? fs4.readFileSync(templatePath, "utf-8") : "";
|
|
2206
|
+
const currentDoc = fs4.existsSync(expDocPath) ? fs4.readFileSync(expDocPath, "utf-8") : "";
|
|
2207
|
+
const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
|
|
2208
|
+
|
|
2209
|
+
Here is the partial agent output (reasoning + tool calls):
|
|
2210
|
+
<partial_output>
|
|
2211
|
+
${partialOutput.slice(-3e3)}
|
|
2212
|
+
</partial_output>
|
|
2213
|
+
|
|
2214
|
+
Here is the current experiment doc:
|
|
2215
|
+
<current_doc>
|
|
2216
|
+
${currentDoc}
|
|
2217
|
+
</current_doc>
|
|
2218
|
+
|
|
2219
|
+
Here is the template that the experiment doc should follow:
|
|
2220
|
+
<template>
|
|
2221
|
+
${template}
|
|
2222
|
+
</template>
|
|
2223
|
+
|
|
2224
|
+
Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
|
|
2225
|
+
- Keep any valid content from the current doc
|
|
2226
|
+
- Fill in what you can infer from the partial output
|
|
2227
|
+
- Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
|
|
2228
|
+
- The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
|
|
2229
|
+
- Do NOT include agent reasoning or thinking \u2014 only structured experiment content
|
|
2230
|
+
- Be concise. This is cleanup, not new work.`;
|
|
2231
|
+
const { text: _markdown } = await runQuery({
|
|
2232
|
+
prompt,
|
|
2233
|
+
model: "haiku",
|
|
2234
|
+
tools: ["Read", "Write"],
|
|
2235
|
+
systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
|
|
2236
|
+
cwd: root,
|
|
2237
|
+
maxTurns: 5,
|
|
2238
|
+
label: "recovery",
|
|
2239
|
+
role: "recovery"
|
|
2240
|
+
});
|
|
2241
|
+
console.log(`[recovery] Cleanup complete for ${expSlug}.`);
|
|
2146
2242
|
}
|
|
2147
|
-
function
|
|
2148
|
-
|
|
2149
|
-
const
|
|
2150
|
-
const
|
|
2151
|
-
const
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2243
|
+
async function runQuery(opts) {
|
|
2244
|
+
let truncated = false;
|
|
2245
|
+
const tag = opts.label ?? "majlis";
|
|
2246
|
+
const hooks = opts.role ? buildAgentHooks(opts.role, opts.maxTurns ?? 15) : void 0;
|
|
2247
|
+
const conversation = (0, import_claude_agent_sdk2.query)({
|
|
2248
|
+
prompt: opts.prompt,
|
|
2249
|
+
options: {
|
|
2250
|
+
model: opts.model,
|
|
2251
|
+
tools: opts.tools,
|
|
2252
|
+
systemPrompt: {
|
|
2253
|
+
type: "preset",
|
|
2254
|
+
preset: "claude_code",
|
|
2255
|
+
append: opts.systemPrompt
|
|
2256
|
+
},
|
|
2257
|
+
cwd: opts.cwd,
|
|
2258
|
+
permissionMode: "bypassPermissions",
|
|
2259
|
+
allowDangerouslySkipPermissions: true,
|
|
2260
|
+
maxTurns: opts.maxTurns ?? 15,
|
|
2261
|
+
persistSession: false,
|
|
2262
|
+
settingSources: ["project"],
|
|
2263
|
+
hooks
|
|
2264
|
+
}
|
|
2265
|
+
});
|
|
2266
|
+
const textParts = [];
|
|
2267
|
+
let costUsd = 0;
|
|
2268
|
+
let turnCount = 0;
|
|
2269
|
+
for await (const message of conversation) {
|
|
2270
|
+
if (message.type === "assistant") {
|
|
2271
|
+
turnCount++;
|
|
2272
|
+
let hasText = false;
|
|
2273
|
+
for (const block of message.message.content) {
|
|
2274
|
+
if (block.type === "text") {
|
|
2275
|
+
textParts.push(block.text);
|
|
2276
|
+
hasText = true;
|
|
2277
|
+
} else if (block.type === "tool_use") {
|
|
2278
|
+
const toolName = block.name ?? "tool";
|
|
2279
|
+
const input = block.input ?? {};
|
|
2280
|
+
const detail = formatToolDetail(toolName, input);
|
|
2281
|
+
process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
|
|
2282
|
+
`);
|
|
2283
|
+
}
|
|
2284
|
+
}
|
|
2285
|
+
if (hasText) {
|
|
2286
|
+
const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
|
|
2287
|
+
if (preview) {
|
|
2288
|
+
process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
|
|
2289
|
+
`);
|
|
2290
|
+
}
|
|
2291
|
+
}
|
|
2292
|
+
} else if (message.type === "tool_progress") {
|
|
2293
|
+
const elapsed = Math.round(message.elapsed_time_seconds);
|
|
2294
|
+
if (elapsed > 0 && elapsed % 5 === 0) {
|
|
2295
|
+
process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
|
|
2296
|
+
`);
|
|
2297
|
+
}
|
|
2298
|
+
} else if (message.type === "result") {
|
|
2299
|
+
if (message.subtype === "success") {
|
|
2300
|
+
costUsd = message.total_cost_usd;
|
|
2301
|
+
} else if (message.subtype === "error_max_turns") {
|
|
2302
|
+
truncated = true;
|
|
2303
|
+
costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
|
|
2304
|
+
console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
|
|
2305
|
+
} else {
|
|
2306
|
+
const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
|
|
2307
|
+
throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
|
|
2308
|
+
}
|
|
2309
|
+
}
|
|
2167
2310
|
}
|
|
2168
|
-
|
|
2169
|
-
const rows = deadEnds.map((d) => [
|
|
2170
|
-
String(d.id),
|
|
2171
|
-
d.sub_type ?? "\u2014",
|
|
2172
|
-
d.approach.slice(0, 40) + (d.approach.length > 40 ? "..." : ""),
|
|
2173
|
-
d.structural_constraint.slice(0, 40) + (d.structural_constraint.length > 40 ? "..." : "")
|
|
2174
|
-
]);
|
|
2175
|
-
console.log(table(["ID", "Sub-Type", "Approach", "Constraint"], rows));
|
|
2311
|
+
return { text: textParts.join("\n\n"), costUsd, truncated };
|
|
2176
2312
|
}
|
|
2177
|
-
function
|
|
2178
|
-
const
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2313
|
+
async function generateSlug(hypothesis, projectRoot) {
|
|
2314
|
+
const fallback = hypothesis.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 30).replace(/-$/, "");
|
|
2315
|
+
try {
|
|
2316
|
+
const { text } = await runQuery({
|
|
2317
|
+
prompt: `Generate a short, descriptive git branch slug (2-4 words, lowercase, hyphen-separated) for this experiment hypothesis:
|
|
2318
|
+
|
|
2319
|
+
"${hypothesis.slice(0, 500)}"
|
|
2320
|
+
|
|
2321
|
+
Output ONLY the slug, nothing else. Examples: uv-containment-filter, skip-degenerate-faces, fix-edge-sewing-order`,
|
|
2322
|
+
model: "haiku",
|
|
2323
|
+
tools: [],
|
|
2324
|
+
systemPrompt: "Output only a short hyphenated slug. No explanation, no quotes, no punctuation except hyphens.",
|
|
2325
|
+
cwd: projectRoot,
|
|
2326
|
+
maxTurns: 1,
|
|
2327
|
+
label: "slug",
|
|
2328
|
+
role: "slug"
|
|
2329
|
+
});
|
|
2330
|
+
const slug = text.trim().toLowerCase().replace(/[^a-z0-9-]+/g, "").replace(/^-|-$/g, "").slice(0, 40);
|
|
2331
|
+
return slug.length >= 3 ? slug : fallback;
|
|
2332
|
+
} catch {
|
|
2333
|
+
return fallback;
|
|
2187
2334
|
}
|
|
2188
|
-
header("Fragility Map");
|
|
2189
|
-
console.log(content);
|
|
2190
2335
|
}
|
|
2191
|
-
function
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
|
|
2199
|
-
|
|
2200
|
-
|
|
2201
|
-
|
|
2202
|
-
|
|
2203
|
-
|
|
2336
|
+
function formatToolDetail(toolName, input) {
|
|
2337
|
+
switch (toolName) {
|
|
2338
|
+
case "Read":
|
|
2339
|
+
return input.file_path ? ` ${input.file_path}` : "";
|
|
2340
|
+
case "Write":
|
|
2341
|
+
return input.file_path ? ` \u2192 ${input.file_path}` : "";
|
|
2342
|
+
case "Edit":
|
|
2343
|
+
return input.file_path ? ` ${input.file_path}` : "";
|
|
2344
|
+
case "Glob":
|
|
2345
|
+
return input.pattern ? ` ${input.pattern}` : "";
|
|
2346
|
+
case "Grep":
|
|
2347
|
+
return input.pattern ? ` /${input.pattern}/` : "";
|
|
2348
|
+
case "Bash":
|
|
2349
|
+
return input.command ? ` $ ${input.command.slice(0, 80)}` : "";
|
|
2350
|
+
case "WebSearch":
|
|
2351
|
+
return input.query ? ` "${input.query}"` : "";
|
|
2352
|
+
default:
|
|
2353
|
+
return "";
|
|
2204
2354
|
}
|
|
2205
|
-
header(`Metric History \u2014 ${fixture}`);
|
|
2206
|
-
const rows = history.map((h) => [
|
|
2207
|
-
String(h.experiment_id),
|
|
2208
|
-
h.experiment_slug ?? "\u2014",
|
|
2209
|
-
h.phase,
|
|
2210
|
-
h.metric_name,
|
|
2211
|
-
String(h.metric_value),
|
|
2212
|
-
h.captured_at
|
|
2213
|
-
]);
|
|
2214
|
-
console.log(table(["Exp", "Slug", "Phase", "Metric", "Value", "Captured"], rows));
|
|
2215
2355
|
}
|
|
2216
|
-
function
|
|
2217
|
-
const
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2356
|
+
function writeArtifact(role, context, markdown, projectRoot) {
|
|
2357
|
+
const dirMap = {
|
|
2358
|
+
builder: "docs/experiments",
|
|
2359
|
+
critic: "docs/doubts",
|
|
2360
|
+
adversary: "docs/challenges",
|
|
2361
|
+
verifier: "docs/verification",
|
|
2362
|
+
reframer: "docs/reframes",
|
|
2363
|
+
compressor: "docs/synthesis",
|
|
2364
|
+
scout: "docs/rihla"
|
|
2365
|
+
};
|
|
2366
|
+
const dir = dirMap[role];
|
|
2367
|
+
if (!dir) return null;
|
|
2368
|
+
if (role === "builder" || role === "compressor") return null;
|
|
2369
|
+
const fullDir = path4.join(projectRoot, dir);
|
|
2370
|
+
if (!fs4.existsSync(fullDir)) {
|
|
2371
|
+
fs4.mkdirSync(fullDir, { recursive: true });
|
|
2226
2372
|
}
|
|
2227
|
-
|
|
2228
|
-
const
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
]);
|
|
2234
|
-
console.log(table(["Sub-Type", "Failures", "Threshold", "Status"], rows));
|
|
2373
|
+
const expSlug = context.experiment?.slug ?? "general";
|
|
2374
|
+
const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
|
|
2375
|
+
const filename = `${nextNum}-${role}-${expSlug}.md`;
|
|
2376
|
+
const target = path4.join(fullDir, filename);
|
|
2377
|
+
fs4.writeFileSync(target, markdown);
|
|
2378
|
+
return target;
|
|
2235
2379
|
}
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2380
|
+
var fs4, path4, import_claude_agent_sdk2, ROLE_MAX_TURNS, CHECKPOINT_INTERVAL, DIM2, RESET2, CYAN2;
|
|
2381
|
+
var init_spawn = __esm({
|
|
2382
|
+
"src/agents/spawn.ts"() {
|
|
2383
|
+
"use strict";
|
|
2384
|
+
fs4 = __toESM(require("fs"));
|
|
2385
|
+
path4 = __toESM(require("path"));
|
|
2386
|
+
import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
|
|
2387
|
+
init_parse();
|
|
2388
|
+
init_connection();
|
|
2389
|
+
ROLE_MAX_TURNS = {
|
|
2390
|
+
builder: 50,
|
|
2391
|
+
critic: 30,
|
|
2392
|
+
adversary: 30,
|
|
2393
|
+
verifier: 50,
|
|
2394
|
+
compressor: 30,
|
|
2395
|
+
reframer: 20,
|
|
2396
|
+
scout: 20,
|
|
2397
|
+
gatekeeper: 10
|
|
2398
|
+
};
|
|
2399
|
+
CHECKPOINT_INTERVAL = {
|
|
2400
|
+
builder: 15,
|
|
2401
|
+
verifier: 12,
|
|
2402
|
+
critic: 15,
|
|
2403
|
+
adversary: 15,
|
|
2404
|
+
compressor: 15
|
|
2405
|
+
};
|
|
2406
|
+
DIM2 = "\x1B[2m";
|
|
2407
|
+
RESET2 = "\x1B[0m";
|
|
2408
|
+
CYAN2 = "\x1B[36m";
|
|
2241
2409
|
}
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2410
|
+
});
|
|
2411
|
+
|
|
2412
|
+
// src/metrics.ts
|
|
2413
|
+
function compareMetrics(db, experimentId, config) {
|
|
2414
|
+
const before = getMetricsByExperimentAndPhase(db, experimentId, "before");
|
|
2415
|
+
const after = getMetricsByExperimentAndPhase(db, experimentId, "after");
|
|
2416
|
+
const fixtures = new Set([...before, ...after].map((m) => m.fixture));
|
|
2417
|
+
const trackedMetrics = Object.keys(config.metrics.tracked);
|
|
2418
|
+
const comparisons = [];
|
|
2419
|
+
for (const fixture of fixtures) {
|
|
2420
|
+
for (const metric of trackedMetrics) {
|
|
2421
|
+
const b = before.find((m) => m.fixture === fixture && m.metric_name === metric);
|
|
2422
|
+
const a = after.find((m) => m.fixture === fixture && m.metric_name === metric);
|
|
2423
|
+
if (b && a) {
|
|
2424
|
+
const direction = config.metrics.tracked[metric]?.direction ?? "lower_is_better";
|
|
2425
|
+
const regression = isRegression(b.metric_value, a.metric_value, direction);
|
|
2426
|
+
comparisons.push({
|
|
2427
|
+
fixture,
|
|
2428
|
+
metric,
|
|
2429
|
+
before: b.metric_value,
|
|
2430
|
+
after: a.metric_value,
|
|
2431
|
+
delta: a.metric_value - b.metric_value,
|
|
2432
|
+
regression
|
|
2433
|
+
});
|
|
2248
2434
|
}
|
|
2249
|
-
} catch {
|
|
2250
2435
|
}
|
|
2251
2436
|
}
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
)
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2437
|
+
return comparisons;
|
|
2438
|
+
}
|
|
2439
|
+
function isRegression(before, after, direction) {
|
|
2440
|
+
switch (direction) {
|
|
2441
|
+
case "lower_is_better":
|
|
2442
|
+
return after > before;
|
|
2443
|
+
case "higher_is_better":
|
|
2444
|
+
return after < before;
|
|
2445
|
+
case "closer_to_gt":
|
|
2446
|
+
return false;
|
|
2447
|
+
default:
|
|
2448
|
+
return false;
|
|
2262
2449
|
}
|
|
2263
2450
|
}
|
|
2264
|
-
function
|
|
2265
|
-
const
|
|
2266
|
-
|
|
2267
|
-
|
|
2451
|
+
function parseMetricsOutput(jsonStr) {
|
|
2452
|
+
const data = JSON.parse(jsonStr);
|
|
2453
|
+
const results = [];
|
|
2454
|
+
if (data.fixtures && typeof data.fixtures === "object") {
|
|
2455
|
+
for (const [fixture, metrics] of Object.entries(data.fixtures)) {
|
|
2456
|
+
for (const [metricName, metricValue] of Object.entries(metrics)) {
|
|
2457
|
+
if (typeof metricValue === "number") {
|
|
2458
|
+
results.push({ fixture, metric_name: metricName, metric_value: metricValue });
|
|
2459
|
+
}
|
|
2460
|
+
}
|
|
2461
|
+
}
|
|
2268
2462
|
}
|
|
2269
|
-
return
|
|
2463
|
+
return results;
|
|
2270
2464
|
}
|
|
2271
|
-
var
|
|
2272
|
-
|
|
2273
|
-
"src/commands/query.ts"() {
|
|
2465
|
+
var init_metrics = __esm({
|
|
2466
|
+
"src/metrics.ts"() {
|
|
2274
2467
|
"use strict";
|
|
2275
|
-
fs6 = __toESM(require("fs"));
|
|
2276
|
-
path6 = __toESM(require("path"));
|
|
2277
|
-
init_connection();
|
|
2278
2468
|
init_queries();
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
["gated" /* GATED */]: ["building" /* BUILDING */, "gated" /* GATED */],
|
|
2292
|
-
// self-loop for rejected hypotheses
|
|
2293
|
-
["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
|
|
2294
|
-
// self-loop for retry after truncation
|
|
2295
|
-
["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
|
|
2296
|
-
["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
|
|
2297
|
-
["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
|
|
2298
|
-
["scouted" /* SCOUTED */]: ["verifying" /* VERIFYING */],
|
|
2299
|
-
["verifying" /* VERIFYING */]: ["verified" /* VERIFIED */],
|
|
2300
|
-
["verified" /* VERIFIED */]: ["resolved" /* RESOLVED */],
|
|
2301
|
-
["resolved" /* RESOLVED */]: ["compressed" /* COMPRESSED */, "building" /* BUILDING */],
|
|
2302
|
-
// cycle-back skips gate
|
|
2303
|
-
["compressed" /* COMPRESSED */]: ["merged" /* MERGED */, "building" /* BUILDING */],
|
|
2304
|
-
// cycle-back skips gate
|
|
2305
|
-
["merged" /* MERGED */]: [],
|
|
2306
|
-
["dead_end" /* DEAD_END */]: []
|
|
2307
|
-
};
|
|
2308
|
-
GRADE_ORDER = ["rejected", "weak", "good", "sound"];
|
|
2309
|
-
}
|
|
2310
|
-
});
|
|
2311
|
-
|
|
2312
|
-
// src/state/machine.ts
|
|
2313
|
-
function transition(current, target) {
|
|
2314
|
-
const valid = TRANSITIONS[current];
|
|
2315
|
-
if (!valid.includes(target)) {
|
|
2316
|
-
throw new Error(
|
|
2317
|
-
`Invalid transition: ${current} \u2192 ${target}. Valid: [${valid.join(", ")}]`
|
|
2318
|
-
);
|
|
2319
|
-
}
|
|
2320
|
-
return target;
|
|
2321
|
-
}
|
|
2322
|
-
function validNext(current) {
|
|
2323
|
-
return TRANSITIONS[current];
|
|
2469
|
+
}
|
|
2470
|
+
});
|
|
2471
|
+
|
|
2472
|
+
// src/commands/measure.ts
|
|
2473
|
+
var measure_exports = {};
|
|
2474
|
+
__export(measure_exports, {
|
|
2475
|
+
baseline: () => baseline,
|
|
2476
|
+
compare: () => compare,
|
|
2477
|
+
measure: () => measure
|
|
2478
|
+
});
|
|
2479
|
+
async function baseline(args) {
|
|
2480
|
+
await captureMetrics("before", args);
|
|
2324
2481
|
}
|
|
2325
|
-
function
|
|
2326
|
-
|
|
2482
|
+
async function measure(args) {
|
|
2483
|
+
await captureMetrics("after", args);
|
|
2327
2484
|
}
|
|
2328
|
-
function
|
|
2329
|
-
|
|
2330
|
-
|
|
2485
|
+
async function captureMetrics(phase, args) {
|
|
2486
|
+
const root = findProjectRoot();
|
|
2487
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2488
|
+
const db = getDb(root);
|
|
2489
|
+
const config = loadConfig(root);
|
|
2490
|
+
const expIdStr = getFlagValue(args, "--experiment");
|
|
2491
|
+
let exp;
|
|
2492
|
+
if (expIdStr !== void 0) {
|
|
2493
|
+
exp = getExperimentById(db, Number(expIdStr));
|
|
2494
|
+
} else {
|
|
2495
|
+
exp = getLatestExperiment(db);
|
|
2331
2496
|
}
|
|
2332
|
-
|
|
2333
|
-
if (
|
|
2334
|
-
|
|
2497
|
+
if (!exp) throw new Error('No active experiment. Run `majlis new "hypothesis"` first.');
|
|
2498
|
+
if (config.build.pre_measure) {
|
|
2499
|
+
info(`Running pre-measure: ${config.build.pre_measure}`);
|
|
2500
|
+
try {
|
|
2501
|
+
(0, import_node_child_process.execSync)(config.build.pre_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
|
|
2502
|
+
} catch {
|
|
2503
|
+
warn("Pre-measure command failed \u2014 continuing anyway.");
|
|
2504
|
+
}
|
|
2335
2505
|
}
|
|
2336
|
-
if (
|
|
2337
|
-
|
|
2506
|
+
if (!config.metrics.command) {
|
|
2507
|
+
throw new Error("No metrics.command configured in .majlis/config.json");
|
|
2338
2508
|
}
|
|
2339
|
-
|
|
2340
|
-
|
|
2509
|
+
info(`Running metrics: ${config.metrics.command}`);
|
|
2510
|
+
let metricsOutput;
|
|
2511
|
+
try {
|
|
2512
|
+
metricsOutput = (0, import_node_child_process.execSync)(config.metrics.command, {
|
|
2513
|
+
cwd: root,
|
|
2514
|
+
encoding: "utf-8",
|
|
2515
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2516
|
+
});
|
|
2517
|
+
} catch (err) {
|
|
2518
|
+
throw new Error(`Metrics command failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
2341
2519
|
}
|
|
2342
|
-
|
|
2343
|
-
|
|
2520
|
+
const parsed = parseMetricsOutput(metricsOutput);
|
|
2521
|
+
if (parsed.length === 0) {
|
|
2522
|
+
warn("Metrics command returned no data.");
|
|
2523
|
+
return;
|
|
2344
2524
|
}
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
-
|
|
2525
|
+
for (const m of parsed) {
|
|
2526
|
+
insertMetric(db, exp.id, phase, m.fixture, m.metric_name, m.metric_value);
|
|
2527
|
+
}
|
|
2528
|
+
success(`Captured ${parsed.length} metric(s) for ${exp.slug} (phase: ${phase})`);
|
|
2529
|
+
if (config.build.post_measure) {
|
|
2530
|
+
try {
|
|
2531
|
+
(0, import_node_child_process.execSync)(config.build.post_measure, { cwd: root, encoding: "utf-8", stdio: "inherit" });
|
|
2532
|
+
} catch {
|
|
2533
|
+
warn("Post-measure command failed.");
|
|
2348
2534
|
}
|
|
2349
2535
|
}
|
|
2350
|
-
return valid[0];
|
|
2351
2536
|
}
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2537
|
+
async function compare(args, isJson) {
|
|
2538
|
+
const root = findProjectRoot();
|
|
2539
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2540
|
+
const db = getDb(root);
|
|
2541
|
+
const config = loadConfig(root);
|
|
2542
|
+
const expIdStr = getFlagValue(args, "--experiment");
|
|
2543
|
+
let exp;
|
|
2544
|
+
if (expIdStr !== void 0) {
|
|
2545
|
+
exp = getExperimentById(db, Number(expIdStr));
|
|
2546
|
+
} else {
|
|
2547
|
+
exp = getLatestExperiment(db);
|
|
2356
2548
|
}
|
|
2357
|
-
|
|
2358
|
-
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
|
|
2549
|
+
if (!exp) throw new Error("No active experiment.");
|
|
2550
|
+
const comparisons = compareMetrics(db, exp.id, config);
|
|
2551
|
+
if (comparisons.length === 0) {
|
|
2552
|
+
warn(`No before/after metrics to compare for ${exp.slug}. Run baseline and measure first.`);
|
|
2553
|
+
return;
|
|
2554
|
+
}
|
|
2555
|
+
if (isJson) {
|
|
2556
|
+
console.log(JSON.stringify({ experiment: exp.slug, comparisons }, null, 2));
|
|
2557
|
+
return;
|
|
2558
|
+
}
|
|
2559
|
+
header(`Metric Comparison \u2014 ${exp.slug}`);
|
|
2560
|
+
const regressions = comparisons.filter((c) => c.regression);
|
|
2561
|
+
const rows = comparisons.map((c) => [
|
|
2562
|
+
c.fixture,
|
|
2563
|
+
c.metric,
|
|
2564
|
+
String(c.before),
|
|
2565
|
+
String(c.after),
|
|
2566
|
+
formatDelta(c.delta),
|
|
2567
|
+
c.regression ? red("REGRESSION") : green("OK")
|
|
2568
|
+
]);
|
|
2569
|
+
console.log(table(["Fixture", "Metric", "Before", "After", "Delta", "Status"], rows));
|
|
2570
|
+
if (regressions.length > 0) {
|
|
2571
|
+
console.log();
|
|
2572
|
+
warn(`${regressions.length} regression(s) detected!`);
|
|
2573
|
+
} else {
|
|
2574
|
+
console.log();
|
|
2575
|
+
success("No regressions detected.");
|
|
2380
2576
|
}
|
|
2381
2577
|
}
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2578
|
+
function formatDelta(delta) {
|
|
2579
|
+
const prefix = delta > 0 ? "+" : "";
|
|
2580
|
+
return `${prefix}${delta.toFixed(4)}`;
|
|
2581
|
+
}
|
|
2582
|
+
var import_node_child_process;
|
|
2583
|
+
var init_measure = __esm({
|
|
2584
|
+
"src/commands/measure.ts"() {
|
|
2385
2585
|
"use strict";
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
}`;
|
|
2393
|
-
ROLE_REQUIRED_FIELDS = {
|
|
2394
|
-
builder: ["decisions"],
|
|
2395
|
-
critic: ["doubts"],
|
|
2396
|
-
adversary: ["challenges"],
|
|
2397
|
-
verifier: ["grades"],
|
|
2398
|
-
gatekeeper: ["gate_decision"],
|
|
2399
|
-
reframer: ["reframe"],
|
|
2400
|
-
scout: ["findings"],
|
|
2401
|
-
compressor: ["compression_report"]
|
|
2402
|
-
};
|
|
2586
|
+
import_node_child_process = require("child_process");
|
|
2587
|
+
init_connection();
|
|
2588
|
+
init_queries();
|
|
2589
|
+
init_metrics();
|
|
2590
|
+
init_config();
|
|
2591
|
+
init_format();
|
|
2403
2592
|
}
|
|
2404
2593
|
});
|
|
2405
2594
|
|
|
2406
|
-
// src/
|
|
2407
|
-
|
|
2408
|
-
|
|
2409
|
-
|
|
2410
|
-
|
|
2411
|
-
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2595
|
+
// src/commands/experiment.ts
|
|
2596
|
+
var experiment_exports = {};
|
|
2597
|
+
__export(experiment_exports, {
|
|
2598
|
+
newExperiment: () => newExperiment,
|
|
2599
|
+
revert: () => revert
|
|
2600
|
+
});
|
|
2601
|
+
async function newExperiment(args) {
|
|
2602
|
+
const root = findProjectRoot();
|
|
2603
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2604
|
+
const hypothesis = args.filter((a) => !a.startsWith("--")).join(" ");
|
|
2605
|
+
if (!hypothesis) {
|
|
2606
|
+
throw new Error('Usage: majlis new "hypothesis"');
|
|
2415
2607
|
}
|
|
2416
|
-
const
|
|
2417
|
-
|
|
2418
|
-
|
|
2419
|
-
|
|
2608
|
+
const db = getDb(root);
|
|
2609
|
+
const config = loadConfig(root);
|
|
2610
|
+
const slug = getFlagValue(args, "--slug") ?? await generateSlug(hypothesis, root);
|
|
2611
|
+
if (getExperimentBySlug(db, slug)) {
|
|
2612
|
+
throw new Error(`Experiment with slug "${slug}" already exists.`);
|
|
2420
2613
|
}
|
|
2421
|
-
|
|
2422
|
-
const
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
`[majlis] FAILED to extract structured data from ${role} output. State machine will continue but data is missing. Manual review required.`
|
|
2426
|
-
);
|
|
2427
|
-
return null;
|
|
2428
|
-
}
|
|
2429
|
-
function extractMajlisJsonBlock(markdown) {
|
|
2430
|
-
const match = markdown.match(/<!--\s*majlis-json\s*\n([\s\S]*?)-->/);
|
|
2431
|
-
if (!match) return null;
|
|
2432
|
-
return match[1].trim();
|
|
2433
|
-
}
|
|
2434
|
-
function tryParseJson(jsonStr) {
|
|
2614
|
+
const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
|
|
2615
|
+
const num = allExps.count + 1;
|
|
2616
|
+
const paddedNum = String(num).padStart(3, "0");
|
|
2617
|
+
const branch = `exp/${paddedNum}-${slug}`;
|
|
2435
2618
|
try {
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
|
|
2439
|
-
|
|
2440
|
-
}
|
|
2441
|
-
function extractViaPatterns(role, markdown) {
|
|
2442
|
-
const result = {};
|
|
2443
|
-
const decisionPattern = /\[(?:decision|Decision)\].*?(?:description|Description):\s*(.+?)(?:\n|$).*?(?:evidence.?level|Evidence.?Level|level):\s*(proof|test|strong_consensus|consensus|analogy|judgment).*?(?:justification|Justification):\s*(.+?)(?:\n|$)/gis;
|
|
2444
|
-
const decisions = [];
|
|
2445
|
-
const evidenceMarkers = /(?:^|\n)\s*[-*]\s*\*?\*?(?:Decision|DECISION)\*?\*?:\s*(.+?)(?:\n|$).*?(?:Evidence|EVIDENCE|Level):\s*(proof|test|strong_consensus|consensus|analogy|judgment)/gim;
|
|
2446
|
-
let match;
|
|
2447
|
-
while ((match = evidenceMarkers.exec(markdown)) !== null) {
|
|
2448
|
-
decisions.push({
|
|
2449
|
-
description: match[1].trim(),
|
|
2450
|
-
evidence_level: match[2].toLowerCase().trim(),
|
|
2451
|
-
justification: "Extracted via regex \u2014 review"
|
|
2619
|
+
(0, import_node_child_process2.execSync)(`git checkout -b ${branch}`, {
|
|
2620
|
+
cwd: root,
|
|
2621
|
+
encoding: "utf-8",
|
|
2622
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2452
2623
|
});
|
|
2624
|
+
info(`Created branch: ${branch}`);
|
|
2625
|
+
} catch (err) {
|
|
2626
|
+
warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
|
|
2453
2627
|
}
|
|
2454
|
-
const
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
2458
|
-
|
|
2459
|
-
|
|
2460
|
-
|
|
2461
|
-
|
|
2462
|
-
|
|
2628
|
+
const subType = getFlagValue(args, "--sub-type") ?? null;
|
|
2629
|
+
const exp = createExperiment(db, slug, branch, hypothesis, subType, null);
|
|
2630
|
+
success(`Created experiment #${exp.id}: ${exp.slug}`);
|
|
2631
|
+
const docsDir = path5.join(root, "docs", "experiments");
|
|
2632
|
+
const templatePath = path5.join(docsDir, "_TEMPLATE.md");
|
|
2633
|
+
if (fs5.existsSync(templatePath)) {
|
|
2634
|
+
const template = fs5.readFileSync(templatePath, "utf-8");
|
|
2635
|
+
const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, subType ?? "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
|
|
2636
|
+
const logPath = path5.join(docsDir, `${paddedNum}-${slug}.md`);
|
|
2637
|
+
fs5.writeFileSync(logPath, logContent);
|
|
2638
|
+
info(`Created experiment log: docs/experiments/${paddedNum}-${slug}.md`);
|
|
2639
|
+
}
|
|
2640
|
+
if (config.cycle.auto_baseline_on_new_experiment && config.metrics.command) {
|
|
2641
|
+
info("Auto-baselining... (run `majlis baseline` to do this manually)");
|
|
2642
|
+
try {
|
|
2643
|
+
const { baseline: baseline2 } = await Promise.resolve().then(() => (init_measure(), measure_exports));
|
|
2644
|
+
await baseline2(["--experiment", String(exp.id)]);
|
|
2645
|
+
} catch (err) {
|
|
2646
|
+
warn("Auto-baseline failed \u2014 run `majlis baseline` manually.");
|
|
2463
2647
|
}
|
|
2464
2648
|
}
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
const
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
|
|
2649
|
+
}
|
|
2650
|
+
async function revert(args) {
|
|
2651
|
+
const root = findProjectRoot();
|
|
2652
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2653
|
+
const db = getDb(root);
|
|
2654
|
+
let exp;
|
|
2655
|
+
const slugArg = args.filter((a) => !a.startsWith("--"))[0];
|
|
2656
|
+
if (slugArg) {
|
|
2657
|
+
exp = getExperimentBySlug(db, slugArg);
|
|
2658
|
+
if (!exp) throw new Error(`Experiment not found: ${slugArg}`);
|
|
2659
|
+
} else {
|
|
2660
|
+
exp = getLatestExperiment(db);
|
|
2661
|
+
if (!exp) throw new Error("No active experiments to revert.");
|
|
2473
2662
|
}
|
|
2474
|
-
const
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2480
|
-
|
|
2663
|
+
const reason = getFlagValue(args, "--reason") ?? "Manually reverted";
|
|
2664
|
+
const category = args.includes("--structural") ? "structural" : "procedural";
|
|
2665
|
+
insertDeadEnd(
|
|
2666
|
+
db,
|
|
2667
|
+
exp.id,
|
|
2668
|
+
exp.hypothesis ?? exp.slug,
|
|
2669
|
+
reason,
|
|
2670
|
+
`Reverted: ${reason}`,
|
|
2671
|
+
exp.sub_type,
|
|
2672
|
+
category
|
|
2673
|
+
);
|
|
2674
|
+
updateExperimentStatus(db, exp.id, "dead_end");
|
|
2675
|
+
try {
|
|
2676
|
+
const currentBranch = (0, import_node_child_process2.execSync)("git rev-parse --abbrev-ref HEAD", {
|
|
2677
|
+
cwd: root,
|
|
2678
|
+
encoding: "utf-8"
|
|
2679
|
+
}).trim();
|
|
2680
|
+
if (currentBranch === exp.branch) {
|
|
2681
|
+
(0, import_node_child_process2.execSync)("git checkout main 2>/dev/null || git checkout master", {
|
|
2682
|
+
cwd: root,
|
|
2683
|
+
encoding: "utf-8",
|
|
2684
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
2481
2685
|
});
|
|
2482
2686
|
}
|
|
2687
|
+
} catch {
|
|
2688
|
+
warn("Could not switch git branches \u2014 do this manually.");
|
|
2483
2689
|
}
|
|
2484
|
-
|
|
2485
|
-
const doubts = [];
|
|
2486
|
-
const doubtPattern = /(?:Doubt|DOUBT|Claim doubted|CLAIM)\s*(?:\d+)?[:.]?\s*(.+?)(?:\n|$)[\s\S]*?(?:Severity|SEVERITY)\s*[:=]\s*(minor|moderate|critical)/gim;
|
|
2487
|
-
while ((match = doubtPattern.exec(markdown)) !== null) {
|
|
2488
|
-
doubts.push({
|
|
2489
|
-
claim_doubted: match[1].trim(),
|
|
2490
|
-
evidence_level_of_claim: "unknown",
|
|
2491
|
-
// Don't fabricate — mark as unknown for review
|
|
2492
|
-
evidence_for_doubt: "Extracted via regex \u2014 review original document",
|
|
2493
|
-
severity: match[2].toLowerCase().trim()
|
|
2494
|
-
});
|
|
2495
|
-
}
|
|
2496
|
-
if (doubts.length > 0) result.doubts = doubts;
|
|
2497
|
-
return result;
|
|
2690
|
+
info(`Experiment ${exp.slug} reverted to dead-end. Reason: ${reason}`);
|
|
2498
2691
|
}
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
|
|
2502
|
-
|
|
2503
|
-
|
|
2692
|
+
var fs5, path5, import_node_child_process2;
|
|
2693
|
+
var init_experiment = __esm({
|
|
2694
|
+
"src/commands/experiment.ts"() {
|
|
2695
|
+
"use strict";
|
|
2696
|
+
fs5 = __toESM(require("fs"));
|
|
2697
|
+
path5 = __toESM(require("path"));
|
|
2698
|
+
import_node_child_process2 = require("child_process");
|
|
2699
|
+
init_connection();
|
|
2700
|
+
init_queries();
|
|
2701
|
+
init_config();
|
|
2702
|
+
init_spawn();
|
|
2703
|
+
init_format();
|
|
2704
|
+
}
|
|
2705
|
+
});
|
|
2504
2706
|
|
|
2505
|
-
|
|
2506
|
-
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
2515
|
-
|
|
2516
|
-
|
|
2517
|
-
|
|
2518
|
-
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
for (const block of message.message.content) {
|
|
2523
|
-
if (block.type === "text") {
|
|
2524
|
-
resultText += block.text;
|
|
2525
|
-
}
|
|
2526
|
-
}
|
|
2527
|
-
}
|
|
2707
|
+
// src/commands/session.ts
|
|
2708
|
+
var session_exports = {};
|
|
2709
|
+
__export(session_exports, {
|
|
2710
|
+
session: () => session
|
|
2711
|
+
});
|
|
2712
|
+
async function session(args) {
|
|
2713
|
+
const subcommand = args[0];
|
|
2714
|
+
if (!subcommand || subcommand !== "start" && subcommand !== "end") {
|
|
2715
|
+
throw new Error('Usage: majlis session start "intent" | majlis session end');
|
|
2716
|
+
}
|
|
2717
|
+
const root = findProjectRoot();
|
|
2718
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2719
|
+
const db = getDb(root);
|
|
2720
|
+
if (subcommand === "start") {
|
|
2721
|
+
const intent = args.slice(1).filter((a) => !a.startsWith("--")).join(" ");
|
|
2722
|
+
if (!intent) {
|
|
2723
|
+
throw new Error('Usage: majlis session start "intent"');
|
|
2528
2724
|
}
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2725
|
+
const existing = getActiveSession(db);
|
|
2726
|
+
if (existing) {
|
|
2727
|
+
warn(`Session already active: "${existing.intent}" (started ${existing.started_at})`);
|
|
2728
|
+
warn("End it first with `majlis session end`.");
|
|
2729
|
+
return;
|
|
2730
|
+
}
|
|
2731
|
+
const latestExp = getLatestExperiment(db);
|
|
2732
|
+
const sess = startSession(db, intent, latestExp?.id ?? null);
|
|
2733
|
+
success(`Session started: "${intent}" (id: ${sess.id})`);
|
|
2734
|
+
if (latestExp) {
|
|
2735
|
+
info(`Linked to experiment: ${latestExp.slug} (${latestExp.status})`);
|
|
2736
|
+
}
|
|
2737
|
+
} else {
|
|
2738
|
+
const active = getActiveSession(db);
|
|
2739
|
+
if (!active) {
|
|
2740
|
+
throw new Error("No active session to end.");
|
|
2741
|
+
}
|
|
2742
|
+
const accomplished = getFlagValue(args, "--accomplished") ?? null;
|
|
2743
|
+
const unfinished = getFlagValue(args, "--unfinished") ?? null;
|
|
2744
|
+
const fragility = getFlagValue(args, "--fragility") ?? null;
|
|
2745
|
+
endSession(db, active.id, accomplished, unfinished, fragility);
|
|
2746
|
+
success(`Session ended: "${active.intent}"`);
|
|
2747
|
+
if (accomplished) info(`Accomplished: ${accomplished}`);
|
|
2748
|
+
if (unfinished) info(`Unfinished: ${unfinished}`);
|
|
2749
|
+
if (fragility) warn(`New fragility: ${fragility}`);
|
|
2533
2750
|
}
|
|
2534
2751
|
}
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
}
|
|
2538
|
-
function validateForRole(role, output) {
|
|
2539
|
-
const required = ROLE_REQUIRED_FIELDS[role];
|
|
2540
|
-
if (!required) return { valid: true, missing: [] };
|
|
2541
|
-
const missing = required.filter((field) => {
|
|
2542
|
-
const value = output[field];
|
|
2543
|
-
if (value === void 0 || value === null) return true;
|
|
2544
|
-
if (Array.isArray(value) && value.length === 0) return true;
|
|
2545
|
-
return false;
|
|
2546
|
-
});
|
|
2547
|
-
return { valid: missing.length === 0, missing };
|
|
2548
|
-
}
|
|
2549
|
-
var import_claude_agent_sdk;
|
|
2550
|
-
var init_parse = __esm({
|
|
2551
|
-
"src/agents/parse.ts"() {
|
|
2752
|
+
var init_session = __esm({
|
|
2753
|
+
"src/commands/session.ts"() {
|
|
2552
2754
|
"use strict";
|
|
2553
|
-
|
|
2554
|
-
|
|
2755
|
+
init_connection();
|
|
2756
|
+
init_queries();
|
|
2757
|
+
init_config();
|
|
2758
|
+
init_format();
|
|
2555
2759
|
}
|
|
2556
2760
|
});
|
|
2557
2761
|
|
|
2558
|
-
// src/
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2762
|
+
// src/commands/query.ts
|
|
2763
|
+
var query_exports = {};
|
|
2764
|
+
__export(query_exports, {
|
|
2765
|
+
query: () => query3
|
|
2766
|
+
});
|
|
2767
|
+
async function query3(command, args, isJson) {
|
|
2768
|
+
const root = findProjectRoot();
|
|
2769
|
+
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
2770
|
+
const db = getDb(root);
|
|
2771
|
+
switch (command) {
|
|
2772
|
+
case "decisions":
|
|
2773
|
+
return queryDecisions(db, args, isJson);
|
|
2774
|
+
case "dead-ends":
|
|
2775
|
+
return queryDeadEnds(db, args, isJson);
|
|
2776
|
+
case "fragility":
|
|
2777
|
+
return queryFragility(root, isJson);
|
|
2778
|
+
case "history":
|
|
2779
|
+
return queryHistory(db, args, isJson);
|
|
2780
|
+
case "circuit-breakers":
|
|
2781
|
+
return queryCircuitBreakers(db, root, isJson);
|
|
2782
|
+
case "check-commit":
|
|
2783
|
+
return checkCommit(db);
|
|
2784
|
+
}
|
|
2785
|
+
}
|
|
2786
|
+
function queryDecisions(db, args, isJson) {
|
|
2787
|
+
const level = getFlagValue(args, "--level");
|
|
2788
|
+
const expIdStr = getFlagValue(args, "--experiment");
|
|
2789
|
+
const experimentId = expIdStr !== void 0 ? Number(expIdStr) : void 0;
|
|
2790
|
+
const decisions = listAllDecisions(db, level, experimentId);
|
|
2791
|
+
if (isJson) {
|
|
2792
|
+
console.log(JSON.stringify(decisions, null, 2));
|
|
2793
|
+
return;
|
|
2794
|
+
}
|
|
2795
|
+
if (decisions.length === 0) {
|
|
2796
|
+
info("No decisions found.");
|
|
2797
|
+
return;
|
|
2798
|
+
}
|
|
2799
|
+
header("Decisions");
|
|
2800
|
+
const rows = decisions.map((d) => [
|
|
2801
|
+
String(d.id),
|
|
2802
|
+
String(d.experiment_id),
|
|
2803
|
+
evidenceColor(d.evidence_level),
|
|
2804
|
+
d.description.slice(0, 60) + (d.description.length > 60 ? "..." : ""),
|
|
2805
|
+
d.status
|
|
2806
|
+
]);
|
|
2807
|
+
console.log(table(["ID", "Exp", "Level", "Description", "Status"], rows));
|
|
2808
|
+
}
|
|
2809
|
+
function queryDeadEnds(db, args, isJson) {
|
|
2810
|
+
const subType = getFlagValue(args, "--sub-type");
|
|
2811
|
+
const searchTerm = getFlagValue(args, "--search");
|
|
2812
|
+
let deadEnds;
|
|
2813
|
+
if (subType) {
|
|
2814
|
+
deadEnds = listDeadEndsBySubType(db, subType);
|
|
2815
|
+
} else if (searchTerm) {
|
|
2816
|
+
deadEnds = searchDeadEnds(db, searchTerm);
|
|
2817
|
+
} else {
|
|
2818
|
+
deadEnds = listAllDeadEnds(db);
|
|
2819
|
+
}
|
|
2820
|
+
if (isJson) {
|
|
2821
|
+
console.log(JSON.stringify(deadEnds, null, 2));
|
|
2822
|
+
return;
|
|
2823
|
+
}
|
|
2824
|
+
if (deadEnds.length === 0) {
|
|
2825
|
+
info("No dead-ends recorded.");
|
|
2826
|
+
return;
|
|
2827
|
+
}
|
|
2828
|
+
header("Dead-End Registry");
|
|
2829
|
+
const rows = deadEnds.map((d) => [
|
|
2830
|
+
String(d.id),
|
|
2831
|
+
d.sub_type ?? "\u2014",
|
|
2832
|
+
d.approach.slice(0, 40) + (d.approach.length > 40 ? "..." : ""),
|
|
2833
|
+
d.structural_constraint.slice(0, 40) + (d.structural_constraint.length > 40 ? "..." : "")
|
|
2834
|
+
]);
|
|
2835
|
+
console.log(table(["ID", "Sub-Type", "Approach", "Constraint"], rows));
|
|
2836
|
+
}
|
|
2837
|
+
function queryFragility(root, isJson) {
|
|
2838
|
+
const fragPath = path6.join(root, "docs", "synthesis", "fragility.md");
|
|
2839
|
+
if (!fs6.existsSync(fragPath)) {
|
|
2840
|
+
info("No fragility map found.");
|
|
2841
|
+
return;
|
|
2564
2842
|
}
|
|
2565
|
-
const content =
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2843
|
+
const content = fs6.readFileSync(fragPath, "utf-8");
|
|
2844
|
+
if (isJson) {
|
|
2845
|
+
console.log(JSON.stringify({ content }, null, 2));
|
|
2846
|
+
return;
|
|
2569
2847
|
}
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
const name = extractYamlField(frontmatter, "name") ?? role;
|
|
2573
|
-
const model = extractYamlField(frontmatter, "model") ?? "opus";
|
|
2574
|
-
const toolsStr = extractYamlField(frontmatter, "tools") ?? "[]";
|
|
2575
|
-
const tools = toolsStr.replace(/[\[\]]/g, "").split(",").map((t) => t.trim()).filter(Boolean);
|
|
2576
|
-
return { name, model, tools, systemPrompt: body };
|
|
2577
|
-
}
|
|
2578
|
-
function extractYamlField(yaml, field) {
|
|
2579
|
-
const match = yaml.match(new RegExp(`^${field}:\\s*(.+)$`, "m"));
|
|
2580
|
-
return match ? match[1].trim() : null;
|
|
2848
|
+
header("Fragility Map");
|
|
2849
|
+
console.log(content);
|
|
2581
2850
|
}
|
|
2582
|
-
|
|
2583
|
-
const
|
|
2584
|
-
|
|
2585
|
-
|
|
2586
|
-
const contextJson = JSON.stringify(context, null, 2);
|
|
2587
|
-
const prompt = `Here is your context:
|
|
2588
|
-
|
|
2589
|
-
\`\`\`json
|
|
2590
|
-
${contextJson}
|
|
2591
|
-
\`\`\`
|
|
2592
|
-
|
|
2593
|
-
${taskPrompt}`;
|
|
2594
|
-
const turns = ROLE_MAX_TURNS[role] ?? 15;
|
|
2595
|
-
console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
|
|
2596
|
-
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2597
|
-
prompt,
|
|
2598
|
-
model: agentDef.model,
|
|
2599
|
-
tools: agentDef.tools,
|
|
2600
|
-
systemPrompt: agentDef.systemPrompt,
|
|
2601
|
-
cwd: root,
|
|
2602
|
-
maxTurns: turns,
|
|
2603
|
-
label: role
|
|
2604
|
-
});
|
|
2605
|
-
console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
|
|
2606
|
-
const artifactPath = writeArtifact(role, context, markdown, root);
|
|
2607
|
-
if (artifactPath) {
|
|
2608
|
-
console.log(`[${role}] Artifact written to ${artifactPath}`);
|
|
2851
|
+
function queryHistory(db, args, isJson) {
|
|
2852
|
+
const fixture = args.filter((a) => !a.startsWith("--"))[0];
|
|
2853
|
+
if (!fixture) {
|
|
2854
|
+
throw new Error("Usage: majlis history <fixture>");
|
|
2609
2855
|
}
|
|
2610
|
-
const
|
|
2611
|
-
if (
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
console.warn(`[${role}] Output missing expected fields: ${missing.join(", ")}`);
|
|
2615
|
-
}
|
|
2856
|
+
const history = getMetricHistoryByFixture(db, fixture);
|
|
2857
|
+
if (isJson) {
|
|
2858
|
+
console.log(JSON.stringify(history, null, 2));
|
|
2859
|
+
return;
|
|
2616
2860
|
}
|
|
2617
|
-
|
|
2618
|
-
}
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
const
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
|
|
2626
|
-
|
|
2627
|
-
|
|
2628
|
-
|
|
2629
|
-
|
|
2630
|
-
|
|
2631
|
-
console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
|
|
2632
|
-
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2633
|
-
prompt,
|
|
2634
|
-
model: "opus",
|
|
2635
|
-
tools: ["Read", "Glob", "Grep"],
|
|
2636
|
-
systemPrompt,
|
|
2637
|
-
cwd: root,
|
|
2638
|
-
maxTurns: 5,
|
|
2639
|
-
label: "synthesiser"
|
|
2640
|
-
});
|
|
2641
|
-
console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
2642
|
-
return { output: markdown, structured: { guidance: markdown }, truncated };
|
|
2861
|
+
if (history.length === 0) {
|
|
2862
|
+
info(`No metric history for fixture: ${fixture}`);
|
|
2863
|
+
return;
|
|
2864
|
+
}
|
|
2865
|
+
header(`Metric History \u2014 ${fixture}`);
|
|
2866
|
+
const rows = history.map((h) => [
|
|
2867
|
+
String(h.experiment_id),
|
|
2868
|
+
h.experiment_slug ?? "\u2014",
|
|
2869
|
+
h.phase,
|
|
2870
|
+
h.metric_name,
|
|
2871
|
+
String(h.metric_value),
|
|
2872
|
+
h.captured_at
|
|
2873
|
+
]);
|
|
2874
|
+
console.log(table(["Exp", "Slug", "Phase", "Metric", "Value", "Captured"], rows));
|
|
2643
2875
|
}
|
|
2644
|
-
|
|
2645
|
-
const
|
|
2646
|
-
const
|
|
2647
|
-
|
|
2648
|
-
|
|
2649
|
-
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
|
|
2655
|
-
|
|
2656
|
-
const
|
|
2657
|
-
|
|
2658
|
-
|
|
2659
|
-
|
|
2660
|
-
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2664
|
-
Here is the current experiment doc:
|
|
2665
|
-
<current_doc>
|
|
2666
|
-
${currentDoc}
|
|
2667
|
-
</current_doc>
|
|
2668
|
-
|
|
2669
|
-
Here is the template that the experiment doc should follow:
|
|
2670
|
-
<template>
|
|
2671
|
-
${template}
|
|
2672
|
-
</template>
|
|
2673
|
-
|
|
2674
|
-
Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
|
|
2675
|
-
- Keep any valid content from the current doc
|
|
2676
|
-
- Fill in what you can infer from the partial output
|
|
2677
|
-
- Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
|
|
2678
|
-
- The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
|
|
2679
|
-
- Do NOT include agent reasoning or thinking \u2014 only structured experiment content
|
|
2680
|
-
- Be concise. This is cleanup, not new work.`;
|
|
2681
|
-
const { text: _markdown } = await runQuery({
|
|
2682
|
-
prompt,
|
|
2683
|
-
model: "haiku",
|
|
2684
|
-
tools: ["Read", "Write"],
|
|
2685
|
-
systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
|
|
2686
|
-
cwd: root,
|
|
2687
|
-
maxTurns: 5,
|
|
2688
|
-
label: "recovery"
|
|
2689
|
-
});
|
|
2690
|
-
console.log(`[recovery] Cleanup complete for ${expSlug}.`);
|
|
2876
|
+
function queryCircuitBreakers(db, root, isJson) {
|
|
2877
|
+
const config = loadConfig(root);
|
|
2878
|
+
const states = getAllCircuitBreakerStates(db, config.cycle.circuit_breaker_threshold);
|
|
2879
|
+
if (isJson) {
|
|
2880
|
+
console.log(JSON.stringify(states, null, 2));
|
|
2881
|
+
return;
|
|
2882
|
+
}
|
|
2883
|
+
if (states.length === 0) {
|
|
2884
|
+
info("No circuit breaker data.");
|
|
2885
|
+
return;
|
|
2886
|
+
}
|
|
2887
|
+
header("Circuit Breakers");
|
|
2888
|
+
const rows = states.map((s) => [
|
|
2889
|
+
s.sub_type,
|
|
2890
|
+
String(s.failure_count),
|
|
2891
|
+
String(config.cycle.circuit_breaker_threshold),
|
|
2892
|
+
s.tripped ? red("TRIPPED") : green("OK")
|
|
2893
|
+
]);
|
|
2894
|
+
console.log(table(["Sub-Type", "Failures", "Threshold", "Status"], rows));
|
|
2691
2895
|
}
|
|
2692
|
-
|
|
2693
|
-
let
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
},
|
|
2705
|
-
cwd: opts.cwd,
|
|
2706
|
-
permissionMode: "bypassPermissions",
|
|
2707
|
-
allowDangerouslySkipPermissions: true,
|
|
2708
|
-
maxTurns: opts.maxTurns ?? 15,
|
|
2709
|
-
persistSession: false,
|
|
2710
|
-
settingSources: ["project"]
|
|
2711
|
-
}
|
|
2712
|
-
});
|
|
2713
|
-
const textParts = [];
|
|
2714
|
-
let costUsd = 0;
|
|
2715
|
-
let turnCount = 0;
|
|
2716
|
-
for await (const message of conversation) {
|
|
2717
|
-
if (message.type === "assistant") {
|
|
2718
|
-
turnCount++;
|
|
2719
|
-
let hasText = false;
|
|
2720
|
-
for (const block of message.message.content) {
|
|
2721
|
-
if (block.type === "text") {
|
|
2722
|
-
textParts.push(block.text);
|
|
2723
|
-
hasText = true;
|
|
2724
|
-
} else if (block.type === "tool_use") {
|
|
2725
|
-
const toolName = block.name ?? "tool";
|
|
2726
|
-
const input = block.input ?? {};
|
|
2727
|
-
const detail = formatToolDetail(toolName, input);
|
|
2728
|
-
process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
|
|
2729
|
-
`);
|
|
2730
|
-
}
|
|
2731
|
-
}
|
|
2732
|
-
if (hasText) {
|
|
2733
|
-
const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
|
|
2734
|
-
if (preview) {
|
|
2735
|
-
process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
|
|
2736
|
-
`);
|
|
2737
|
-
}
|
|
2738
|
-
}
|
|
2739
|
-
} else if (message.type === "tool_progress") {
|
|
2740
|
-
const elapsed = Math.round(message.elapsed_time_seconds);
|
|
2741
|
-
if (elapsed > 0 && elapsed % 5 === 0) {
|
|
2742
|
-
process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
|
|
2743
|
-
`);
|
|
2744
|
-
}
|
|
2745
|
-
} else if (message.type === "result") {
|
|
2746
|
-
if (message.subtype === "success") {
|
|
2747
|
-
costUsd = message.total_cost_usd;
|
|
2748
|
-
} else if (message.subtype === "error_max_turns") {
|
|
2749
|
-
truncated = true;
|
|
2750
|
-
costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
|
|
2751
|
-
console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
|
|
2752
|
-
} else {
|
|
2753
|
-
const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
|
|
2754
|
-
throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
|
|
2896
|
+
function checkCommit(db) {
|
|
2897
|
+
let stdinData = "";
|
|
2898
|
+
try {
|
|
2899
|
+
stdinData = fs6.readFileSync(0, "utf-8");
|
|
2900
|
+
} catch {
|
|
2901
|
+
}
|
|
2902
|
+
if (stdinData) {
|
|
2903
|
+
try {
|
|
2904
|
+
const hookInput = JSON.parse(stdinData);
|
|
2905
|
+
const command = hookInput?.tool_input?.command ?? "";
|
|
2906
|
+
if (!command.includes("git commit")) {
|
|
2907
|
+
return;
|
|
2755
2908
|
}
|
|
2909
|
+
} catch {
|
|
2756
2910
|
}
|
|
2757
2911
|
}
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
2764
|
-
|
|
2765
|
-
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
case "Glob":
|
|
2769
|
-
return input.pattern ? ` ${input.pattern}` : "";
|
|
2770
|
-
case "Grep":
|
|
2771
|
-
return input.pattern ? ` /${input.pattern}/` : "";
|
|
2772
|
-
case "Bash":
|
|
2773
|
-
return input.command ? ` $ ${input.command.slice(0, 80)}` : "";
|
|
2774
|
-
case "WebSearch":
|
|
2775
|
-
return input.query ? ` "${input.query}"` : "";
|
|
2776
|
-
default:
|
|
2777
|
-
return "";
|
|
2912
|
+
const active = listActiveExperiments(db);
|
|
2913
|
+
const unverified = active.filter(
|
|
2914
|
+
(e) => !["merged", "dead_end", "verified", "resolved", "compressed"].includes(e.status)
|
|
2915
|
+
);
|
|
2916
|
+
if (unverified.length > 0) {
|
|
2917
|
+
console.error(`[majlis] ${unverified.length} unverified experiment(s):`);
|
|
2918
|
+
for (const e of unverified) {
|
|
2919
|
+
console.error(` - ${e.slug} (${e.status})`);
|
|
2920
|
+
}
|
|
2921
|
+
process.exit(1);
|
|
2778
2922
|
}
|
|
2779
2923
|
}
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2924
|
+
var fs6, path6;
|
|
2925
|
+
var init_query = __esm({
|
|
2926
|
+
"src/commands/query.ts"() {
|
|
2927
|
+
"use strict";
|
|
2928
|
+
fs6 = __toESM(require("fs"));
|
|
2929
|
+
path6 = __toESM(require("path"));
|
|
2930
|
+
init_connection();
|
|
2931
|
+
init_queries();
|
|
2932
|
+
init_config();
|
|
2933
|
+
init_format();
|
|
2934
|
+
}
|
|
2935
|
+
});
|
|
2936
|
+
|
|
2937
|
+
// src/state/types.ts
|
|
2938
|
+
var TRANSITIONS, GRADE_ORDER;
|
|
2939
|
+
var init_types2 = __esm({
|
|
2940
|
+
"src/state/types.ts"() {
|
|
2941
|
+
"use strict";
|
|
2942
|
+
TRANSITIONS = {
|
|
2943
|
+
["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "gated" /* GATED */],
|
|
2944
|
+
["reframed" /* REFRAMED */]: ["gated" /* GATED */],
|
|
2945
|
+
["gated" /* GATED */]: ["building" /* BUILDING */, "gated" /* GATED */],
|
|
2946
|
+
// self-loop for rejected hypotheses
|
|
2947
|
+
["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
|
|
2948
|
+
// self-loop for retry after truncation
|
|
2949
|
+
["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
|
|
2950
|
+
["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
|
|
2951
|
+
["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
|
|
2952
|
+
["scouted" /* SCOUTED */]: ["verifying" /* VERIFYING */],
|
|
2953
|
+
["verifying" /* VERIFYING */]: ["verified" /* VERIFIED */],
|
|
2954
|
+
["verified" /* VERIFIED */]: ["resolved" /* RESOLVED */],
|
|
2955
|
+
["resolved" /* RESOLVED */]: ["compressed" /* COMPRESSED */, "building" /* BUILDING */],
|
|
2956
|
+
// cycle-back skips gate
|
|
2957
|
+
["compressed" /* COMPRESSED */]: ["merged" /* MERGED */, "building" /* BUILDING */],
|
|
2958
|
+
// cycle-back skips gate
|
|
2959
|
+
["merged" /* MERGED */]: [],
|
|
2960
|
+
["dead_end" /* DEAD_END */]: []
|
|
2961
|
+
};
|
|
2962
|
+
GRADE_ORDER = ["rejected", "weak", "good", "sound"];
|
|
2963
|
+
}
|
|
2964
|
+
});
|
|
2965
|
+
|
|
2966
|
+
// src/state/machine.ts
|
|
2967
|
+
function transition(current, target) {
|
|
2968
|
+
const valid = TRANSITIONS[current];
|
|
2969
|
+
if (!valid.includes(target)) {
|
|
2970
|
+
throw new Error(
|
|
2971
|
+
`Invalid transition: ${current} \u2192 ${target}. Valid: [${valid.join(", ")}]`
|
|
2972
|
+
);
|
|
2796
2973
|
}
|
|
2797
|
-
const expSlug = context.experiment?.slug ?? "general";
|
|
2798
|
-
const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
|
|
2799
|
-
const filename = `${nextNum}-${role}-${expSlug}.md`;
|
|
2800
|
-
const target = path7.join(fullDir, filename);
|
|
2801
|
-
fs7.writeFileSync(target, markdown);
|
|
2802
2974
|
return target;
|
|
2803
2975
|
}
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2976
|
+
function validNext(current) {
|
|
2977
|
+
return TRANSITIONS[current];
|
|
2978
|
+
}
|
|
2979
|
+
function isTerminal(status2) {
|
|
2980
|
+
return TRANSITIONS[status2].length === 0;
|
|
2981
|
+
}
|
|
2982
|
+
function determineNextStep(exp, valid, hasDoubts2, hasChallenges2) {
|
|
2983
|
+
if (valid.length === 0) {
|
|
2984
|
+
throw new Error(`Experiment ${exp.slug} is terminal (${exp.status})`);
|
|
2985
|
+
}
|
|
2986
|
+
const status2 = exp.status;
|
|
2987
|
+
if (status2 === "classified" /* CLASSIFIED */ || status2 === "reframed" /* REFRAMED */) {
|
|
2988
|
+
return valid.includes("gated" /* GATED */) ? "gated" /* GATED */ : valid[0];
|
|
2989
|
+
}
|
|
2990
|
+
if (status2 === "gated" /* GATED */) {
|
|
2991
|
+
return valid.includes("building" /* BUILDING */) ? "building" /* BUILDING */ : valid[0];
|
|
2992
|
+
}
|
|
2993
|
+
if (status2 === "built" /* BUILT */ && !hasDoubts2) {
|
|
2994
|
+
return valid.includes("doubted" /* DOUBTED */) ? "doubted" /* DOUBTED */ : valid[0];
|
|
2995
|
+
}
|
|
2996
|
+
if (status2 === "doubted" /* DOUBTED */ && !hasChallenges2) {
|
|
2997
|
+
return valid.includes("challenged" /* CHALLENGED */) ? "challenged" /* CHALLENGED */ : valid[0];
|
|
2998
|
+
}
|
|
2999
|
+
if (status2 === "doubted" /* DOUBTED */ || status2 === "challenged" /* CHALLENGED */) {
|
|
3000
|
+
if (valid.includes("verifying" /* VERIFYING */)) {
|
|
3001
|
+
return "verifying" /* VERIFYING */;
|
|
3002
|
+
}
|
|
3003
|
+
}
|
|
3004
|
+
if (status2 === "compressed" /* COMPRESSED */) {
|
|
3005
|
+
return valid.includes("merged" /* MERGED */) ? "merged" /* MERGED */ : valid[0];
|
|
3006
|
+
}
|
|
3007
|
+
return valid[0];
|
|
3008
|
+
}
|
|
3009
|
+
var init_machine = __esm({
|
|
3010
|
+
"src/state/machine.ts"() {
|
|
2807
3011
|
"use strict";
|
|
2808
|
-
|
|
2809
|
-
path7 = __toESM(require("path"));
|
|
2810
|
-
import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
|
|
2811
|
-
init_parse();
|
|
2812
|
-
init_connection();
|
|
2813
|
-
ROLE_MAX_TURNS = {
|
|
2814
|
-
builder: 50,
|
|
2815
|
-
critic: 30,
|
|
2816
|
-
adversary: 30,
|
|
2817
|
-
verifier: 50,
|
|
2818
|
-
compressor: 30,
|
|
2819
|
-
reframer: 20,
|
|
2820
|
-
scout: 20,
|
|
2821
|
-
gatekeeper: 10
|
|
2822
|
-
};
|
|
2823
|
-
DIM2 = "\x1B[2m";
|
|
2824
|
-
RESET2 = "\x1B[0m";
|
|
2825
|
-
CYAN2 = "\x1B[36m";
|
|
3012
|
+
init_types2();
|
|
2826
3013
|
}
|
|
2827
3014
|
});
|
|
2828
3015
|
|
|
@@ -2880,11 +3067,13 @@ async function resolve(db, exp, projectRoot) {
|
|
|
2880
3067
|
taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
|
|
2881
3068
|
}, projectRoot);
|
|
2882
3069
|
const guidanceText = guidance.structured?.guidance ?? guidance.output;
|
|
2883
|
-
|
|
2884
|
-
|
|
2885
|
-
|
|
2886
|
-
|
|
2887
|
-
|
|
3070
|
+
db.transaction(() => {
|
|
3071
|
+
storeBuilderGuidance(db, exp.id, guidanceText);
|
|
3072
|
+
updateExperimentStatus(db, exp.id, "building");
|
|
3073
|
+
if (exp.sub_type) {
|
|
3074
|
+
incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
|
|
3075
|
+
}
|
|
3076
|
+
})();
|
|
2888
3077
|
warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance generated for builder.`);
|
|
2889
3078
|
break;
|
|
2890
3079
|
}
|
|
@@ -2892,19 +3081,21 @@ async function resolve(db, exp, projectRoot) {
|
|
|
2892
3081
|
gitRevert(exp.branch, projectRoot);
|
|
2893
3082
|
const rejectedComponents = grades.filter((g) => g.grade === "rejected");
|
|
2894
3083
|
const whyFailed = rejectedComponents.map((r) => r.notes ?? "rejected").join("; ");
|
|
2895
|
-
|
|
2896
|
-
|
|
2897
|
-
|
|
2898
|
-
|
|
2899
|
-
|
|
2900
|
-
|
|
2901
|
-
|
|
2902
|
-
|
|
2903
|
-
|
|
2904
|
-
|
|
2905
|
-
|
|
2906
|
-
|
|
2907
|
-
|
|
3084
|
+
db.transaction(() => {
|
|
3085
|
+
insertDeadEnd(
|
|
3086
|
+
db,
|
|
3087
|
+
exp.id,
|
|
3088
|
+
exp.hypothesis ?? exp.slug,
|
|
3089
|
+
whyFailed,
|
|
3090
|
+
`Approach rejected: ${whyFailed}`,
|
|
3091
|
+
exp.sub_type,
|
|
3092
|
+
"structural"
|
|
3093
|
+
);
|
|
3094
|
+
updateExperimentStatus(db, exp.id, "dead_end");
|
|
3095
|
+
if (exp.sub_type) {
|
|
3096
|
+
incrementSubTypeFailure(db, exp.sub_type, exp.id, "rejected");
|
|
3097
|
+
}
|
|
3098
|
+
})();
|
|
2908
3099
|
info(`Experiment ${exp.slug} DEAD-ENDED (rejected). Constraint recorded.`);
|
|
2909
3100
|
break;
|
|
2910
3101
|
}
|
|
@@ -2943,24 +3134,24 @@ function gitRevert(branch, cwd) {
|
|
|
2943
3134
|
}
|
|
2944
3135
|
}
|
|
2945
3136
|
function appendToFragilityMap(projectRoot, expSlug, gaps) {
|
|
2946
|
-
const fragPath =
|
|
3137
|
+
const fragPath = path7.join(projectRoot, "docs", "synthesis", "fragility.md");
|
|
2947
3138
|
let content = "";
|
|
2948
|
-
if (
|
|
2949
|
-
content =
|
|
3139
|
+
if (fs7.existsSync(fragPath)) {
|
|
3140
|
+
content = fs7.readFileSync(fragPath, "utf-8");
|
|
2950
3141
|
}
|
|
2951
3142
|
const entry = `
|
|
2952
3143
|
## From experiment: ${expSlug}
|
|
2953
3144
|
${gaps}
|
|
2954
3145
|
`;
|
|
2955
|
-
|
|
3146
|
+
fs7.writeFileSync(fragPath, content + entry);
|
|
2956
3147
|
}
|
|
2957
|
-
var
|
|
3148
|
+
var fs7, path7, import_node_child_process3;
|
|
2958
3149
|
var init_resolve = __esm({
|
|
2959
3150
|
"src/resolve.ts"() {
|
|
2960
3151
|
"use strict";
|
|
2961
|
-
|
|
2962
|
-
|
|
2963
|
-
|
|
3152
|
+
fs7 = __toESM(require("fs"));
|
|
3153
|
+
path7 = __toESM(require("path"));
|
|
3154
|
+
init_types2();
|
|
2964
3155
|
init_queries();
|
|
2965
3156
|
init_spawn();
|
|
2966
3157
|
import_node_child_process3 = require("child_process");
|
|
@@ -3003,12 +3194,11 @@ async function resolveCmd(args) {
|
|
|
3003
3194
|
const exp = resolveExperimentArg(db, args);
|
|
3004
3195
|
transition(exp.status, "resolved" /* RESOLVED */);
|
|
3005
3196
|
await resolve(db, exp, root);
|
|
3006
|
-
updateExperimentStatus(db, exp.id, "resolved");
|
|
3007
3197
|
}
|
|
3008
3198
|
async function doGate(db, exp, root) {
|
|
3009
3199
|
transition(exp.status, "gated" /* GATED */);
|
|
3010
|
-
const synthesis = readFileOrEmpty(
|
|
3011
|
-
const fragility = readFileOrEmpty(
|
|
3200
|
+
const synthesis = truncateContext(readFileOrEmpty(path8.join(root, "docs", "synthesis", "current.md")), CONTEXT_LIMITS.synthesis);
|
|
3201
|
+
const fragility = truncateContext(readFileOrEmpty(path8.join(root, "docs", "synthesis", "fragility.md")), CONTEXT_LIMITS.fragility);
|
|
3012
3202
|
const structuralDeadEnds = exp.sub_type ? listStructuralDeadEndsBySubType(db, exp.sub_type) : listStructuralDeadEnds(db);
|
|
3013
3203
|
const result = await spawnAgent("gatekeeper", {
|
|
3014
3204
|
experiment: {
|
|
@@ -3052,13 +3242,12 @@ async function doBuild(db, exp, root) {
|
|
|
3052
3242
|
transition(exp.status, "building" /* BUILDING */);
|
|
3053
3243
|
const deadEnds = exp.sub_type ? listDeadEndsBySubType(db, exp.sub_type) : listAllDeadEnds(db);
|
|
3054
3244
|
const builderGuidance = getBuilderGuidance(db, exp.id);
|
|
3055
|
-
const
|
|
3056
|
-
const
|
|
3057
|
-
const synthesisPath = path9.join(root, "docs", "synthesis", "current.md");
|
|
3058
|
-
const synthesis = fs9.existsSync(synthesisPath) ? fs9.readFileSync(synthesisPath, "utf-8") : "";
|
|
3245
|
+
const fragility = truncateContext(readFileOrEmpty(path8.join(root, "docs", "synthesis", "fragility.md")), CONTEXT_LIMITS.fragility);
|
|
3246
|
+
const synthesis = truncateContext(readFileOrEmpty(path8.join(root, "docs", "synthesis", "current.md")), CONTEXT_LIMITS.synthesis);
|
|
3059
3247
|
const confirmedDoubts = getConfirmedDoubts(db, exp.id);
|
|
3060
|
-
const config =
|
|
3061
|
-
|
|
3248
|
+
const config = loadConfig(root);
|
|
3249
|
+
const existingBaseline = getMetricsByExperimentAndPhase(db, exp.id, "before");
|
|
3250
|
+
if (config.metrics?.command && existingBaseline.length === 0) {
|
|
3062
3251
|
try {
|
|
3063
3252
|
const output = (0, import_node_child_process4.execSync)(config.metrics.command, {
|
|
3064
3253
|
cwd: root,
|
|
@@ -3149,7 +3338,7 @@ async function doChallenge(db, exp, root) {
|
|
|
3149
3338
|
} catch {
|
|
3150
3339
|
}
|
|
3151
3340
|
if (gitDiff.length > 8e3) gitDiff = gitDiff.slice(0, 8e3) + "\n[DIFF TRUNCATED]";
|
|
3152
|
-
const synthesis = readFileOrEmpty(
|
|
3341
|
+
const synthesis = truncateContext(readFileOrEmpty(path8.join(root, "docs", "synthesis", "current.md")), CONTEXT_LIMITS.synthesis);
|
|
3153
3342
|
let taskPrompt = `Construct adversarial test cases for experiment ${exp.slug}: ${exp.hypothesis}`;
|
|
3154
3343
|
if (gitDiff) {
|
|
3155
3344
|
taskPrompt += `
|
|
@@ -3182,9 +3371,9 @@ ${gitDiff}
|
|
|
3182
3371
|
async function doDoubt(db, exp, root) {
|
|
3183
3372
|
transition(exp.status, "doubted" /* DOUBTED */);
|
|
3184
3373
|
const paddedNum = String(exp.id).padStart(3, "0");
|
|
3185
|
-
const expDocPath =
|
|
3186
|
-
const experimentDoc = readFileOrEmpty(expDocPath);
|
|
3187
|
-
const synthesis = readFileOrEmpty(
|
|
3374
|
+
const expDocPath = path8.join(root, "docs", "experiments", `${paddedNum}-${exp.slug}.md`);
|
|
3375
|
+
const experimentDoc = truncateContext(readFileOrEmpty(expDocPath), CONTEXT_LIMITS.experimentDoc);
|
|
3376
|
+
const synthesis = truncateContext(readFileOrEmpty(path8.join(root, "docs", "synthesis", "current.md")), CONTEXT_LIMITS.synthesis);
|
|
3188
3377
|
const deadEnds = exp.sub_type ? listDeadEndsBySubType(db, exp.sub_type) : listAllDeadEnds(db);
|
|
3189
3378
|
let taskPrompt = `Doubt the work in experiment ${exp.slug}: ${exp.hypothesis}. Produce a doubt document with evidence for each doubt.`;
|
|
3190
3379
|
if (experimentDoc) {
|
|
@@ -3223,8 +3412,8 @@ ${experimentDoc}
|
|
|
3223
3412
|
}
|
|
3224
3413
|
async function doScout(db, exp, root) {
|
|
3225
3414
|
transition(exp.status, "scouted" /* SCOUTED */);
|
|
3226
|
-
const synthesis = readFileOrEmpty(
|
|
3227
|
-
const fragility = readFileOrEmpty(
|
|
3415
|
+
const synthesis = truncateContext(readFileOrEmpty(path8.join(root, "docs", "synthesis", "current.md")), CONTEXT_LIMITS.synthesis);
|
|
3416
|
+
const fragility = truncateContext(readFileOrEmpty(path8.join(root, "docs", "synthesis", "fragility.md")), CONTEXT_LIMITS.fragility);
|
|
3228
3417
|
const deadEnds = exp.sub_type ? listDeadEndsBySubType(db, exp.sub_type) : listAllDeadEnds(db);
|
|
3229
3418
|
const deadEndsSummary = deadEnds.map(
|
|
3230
3419
|
(d) => `- [${d.category ?? "structural"}] ${d.approach}: ${d.why_failed}`
|
|
@@ -3271,12 +3460,12 @@ ${fragility}`;
|
|
|
3271
3460
|
async function doVerify(db, exp, root) {
|
|
3272
3461
|
transition(exp.status, "verifying" /* VERIFYING */);
|
|
3273
3462
|
const doubts = getDoubtsByExperiment(db, exp.id);
|
|
3274
|
-
const challengeDir =
|
|
3463
|
+
const challengeDir = path8.join(root, "docs", "challenges");
|
|
3275
3464
|
let challenges = "";
|
|
3276
|
-
if (
|
|
3277
|
-
const files =
|
|
3465
|
+
if (fs8.existsSync(challengeDir)) {
|
|
3466
|
+
const files = fs8.readdirSync(challengeDir).filter((f) => f.includes(exp.slug) && f.endsWith(".md"));
|
|
3278
3467
|
for (const f of files) {
|
|
3279
|
-
challenges +=
|
|
3468
|
+
challenges += fs8.readFileSync(path8.join(challengeDir, f), "utf-8") + "\n\n";
|
|
3280
3469
|
}
|
|
3281
3470
|
}
|
|
3282
3471
|
const beforeMetrics = getMetricsByExperimentAndPhase(db, exp.id, "before");
|
|
@@ -3344,14 +3533,14 @@ async function doVerify(db, exp, root) {
|
|
|
3344
3533
|
success(`Verification complete for ${exp.slug}. Run \`majlis resolve\` next.`);
|
|
3345
3534
|
}
|
|
3346
3535
|
async function doCompress(db, root) {
|
|
3347
|
-
const synthesisPath =
|
|
3348
|
-
const sizeBefore =
|
|
3536
|
+
const synthesisPath = path8.join(root, "docs", "synthesis", "current.md");
|
|
3537
|
+
const sizeBefore = fs8.existsSync(synthesisPath) ? fs8.statSync(synthesisPath).size : 0;
|
|
3349
3538
|
const sessionCount = getSessionsSinceCompression(db);
|
|
3350
3539
|
const dbExport = exportForCompressor(db);
|
|
3351
3540
|
const result = await spawnAgent("compressor", {
|
|
3352
3541
|
taskPrompt: "## Structured Data (CANONICAL \u2014 from SQLite database)\nThe database export below is the source of truth. docs/ files are agent artifacts that may contain stale or incorrect information. Cross-reference everything against this data.\n\n" + dbExport + "\n\n## Your Task\nRead ALL experiments, decisions, doubts, challenges, verification reports, reframes, and recent diffs. Cross-reference for contradictions, redundancies, and patterns. REWRITE docs/synthesis/current.md \u2014 shorter and denser. Update docs/synthesis/fragility.md with current weak areas. Update docs/synthesis/dead-ends.md with structural constraints from rejected experiments."
|
|
3353
3542
|
}, root);
|
|
3354
|
-
const sizeAfter =
|
|
3543
|
+
const sizeAfter = fs8.existsSync(synthesisPath) ? fs8.statSync(synthesisPath).size : 0;
|
|
3355
3544
|
recordCompression(db, sessionCount, sizeBefore, sizeAfter);
|
|
3356
3545
|
success(`Compression complete. Synthesis: ${sizeBefore}B \u2192 ${sizeAfter}B`);
|
|
3357
3546
|
}
|
|
@@ -3442,42 +3631,20 @@ function ingestStructuredOutput(db, experimentId, structured) {
|
|
|
3442
3631
|
info(`Ingested ${structured.findings.length} finding(s)`);
|
|
3443
3632
|
}
|
|
3444
3633
|
}
|
|
3445
|
-
|
|
3446
|
-
try {
|
|
3447
|
-
return fs9.readFileSync(filePath, "utf-8");
|
|
3448
|
-
} catch {
|
|
3449
|
-
return "";
|
|
3450
|
-
}
|
|
3451
|
-
}
|
|
3452
|
-
function loadConfig5(projectRoot) {
|
|
3453
|
-
const configPath = path9.join(projectRoot, ".majlis", "config.json");
|
|
3454
|
-
if (!fs9.existsSync(configPath)) {
|
|
3455
|
-
return {
|
|
3456
|
-
project: { name: "", description: "", objective: "" },
|
|
3457
|
-
cycle: {
|
|
3458
|
-
compression_interval: 5,
|
|
3459
|
-
circuit_breaker_threshold: 3,
|
|
3460
|
-
require_doubt_before_verify: true,
|
|
3461
|
-
require_challenge_before_verify: false,
|
|
3462
|
-
auto_baseline_on_new_experiment: true
|
|
3463
|
-
}
|
|
3464
|
-
};
|
|
3465
|
-
}
|
|
3466
|
-
return JSON.parse(fs9.readFileSync(configPath, "utf-8"));
|
|
3467
|
-
}
|
|
3468
|
-
var fs9, path9, import_node_child_process4;
|
|
3634
|
+
var fs8, path8, import_node_child_process4;
|
|
3469
3635
|
var init_cycle = __esm({
|
|
3470
3636
|
"src/commands/cycle.ts"() {
|
|
3471
3637
|
"use strict";
|
|
3472
|
-
|
|
3473
|
-
|
|
3638
|
+
fs8 = __toESM(require("fs"));
|
|
3639
|
+
path8 = __toESM(require("path"));
|
|
3474
3640
|
import_node_child_process4 = require("child_process");
|
|
3475
3641
|
init_connection();
|
|
3476
3642
|
init_queries();
|
|
3477
3643
|
init_machine();
|
|
3478
|
-
|
|
3644
|
+
init_types2();
|
|
3479
3645
|
init_spawn();
|
|
3480
3646
|
init_resolve();
|
|
3647
|
+
init_config();
|
|
3481
3648
|
init_metrics();
|
|
3482
3649
|
init_format();
|
|
3483
3650
|
}
|
|
@@ -3496,10 +3663,10 @@ async function classify(args) {
|
|
|
3496
3663
|
if (!domain) {
|
|
3497
3664
|
throw new Error('Usage: majlis classify "domain description"');
|
|
3498
3665
|
}
|
|
3499
|
-
const synthesisPath =
|
|
3500
|
-
const synthesis =
|
|
3501
|
-
const deadEndsPath =
|
|
3502
|
-
const deadEnds =
|
|
3666
|
+
const synthesisPath = path9.join(root, "docs", "synthesis", "current.md");
|
|
3667
|
+
const synthesis = fs9.existsSync(synthesisPath) ? fs9.readFileSync(synthesisPath, "utf-8") : "";
|
|
3668
|
+
const deadEndsPath = path9.join(root, "docs", "synthesis", "dead-ends.md");
|
|
3669
|
+
const deadEnds = fs9.existsSync(deadEndsPath) ? fs9.readFileSync(deadEndsPath, "utf-8") : "";
|
|
3503
3670
|
info(`Classifying problem domain: ${domain}`);
|
|
3504
3671
|
const result = await spawnAgent("builder", {
|
|
3505
3672
|
synthesis,
|
|
@@ -3517,22 +3684,22 @@ Write the classification to docs/classification/ following the template.`
|
|
|
3517
3684
|
async function reframe(args) {
|
|
3518
3685
|
const root = findProjectRoot();
|
|
3519
3686
|
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
3520
|
-
const classificationDir =
|
|
3687
|
+
const classificationDir = path9.join(root, "docs", "classification");
|
|
3521
3688
|
let classificationContent = "";
|
|
3522
|
-
if (
|
|
3523
|
-
const files =
|
|
3689
|
+
if (fs9.existsSync(classificationDir)) {
|
|
3690
|
+
const files = fs9.readdirSync(classificationDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
|
|
3524
3691
|
for (const f of files) {
|
|
3525
|
-
classificationContent +=
|
|
3692
|
+
classificationContent += fs9.readFileSync(path9.join(classificationDir, f), "utf-8") + "\n\n";
|
|
3526
3693
|
}
|
|
3527
3694
|
}
|
|
3528
|
-
const synthesisPath =
|
|
3529
|
-
const synthesis =
|
|
3530
|
-
const deadEndsPath =
|
|
3531
|
-
const deadEnds =
|
|
3532
|
-
const configPath =
|
|
3695
|
+
const synthesisPath = path9.join(root, "docs", "synthesis", "current.md");
|
|
3696
|
+
const synthesis = fs9.existsSync(synthesisPath) ? fs9.readFileSync(synthesisPath, "utf-8") : "";
|
|
3697
|
+
const deadEndsPath = path9.join(root, "docs", "synthesis", "dead-ends.md");
|
|
3698
|
+
const deadEnds = fs9.existsSync(deadEndsPath) ? fs9.readFileSync(deadEndsPath, "utf-8") : "";
|
|
3699
|
+
const configPath = path9.join(root, ".majlis", "config.json");
|
|
3533
3700
|
let problemStatement = "";
|
|
3534
|
-
if (
|
|
3535
|
-
const config = JSON.parse(
|
|
3701
|
+
if (fs9.existsSync(configPath)) {
|
|
3702
|
+
const config = JSON.parse(fs9.readFileSync(configPath, "utf-8"));
|
|
3536
3703
|
problemStatement = `${config.project?.description ?? ""}
|
|
3537
3704
|
Objective: ${config.project?.objective ?? ""}`;
|
|
3538
3705
|
}
|
|
@@ -3556,12 +3723,12 @@ Write to docs/reframes/.`
|
|
|
3556
3723
|
}, root);
|
|
3557
3724
|
success("Reframe complete. Check docs/reframes/ for the output.");
|
|
3558
3725
|
}
|
|
3559
|
-
var
|
|
3726
|
+
var fs9, path9;
|
|
3560
3727
|
var init_classify = __esm({
|
|
3561
3728
|
"src/commands/classify.ts"() {
|
|
3562
3729
|
"use strict";
|
|
3563
|
-
|
|
3564
|
-
|
|
3730
|
+
fs9 = __toESM(require("fs"));
|
|
3731
|
+
path9 = __toESM(require("path"));
|
|
3565
3732
|
init_connection();
|
|
3566
3733
|
init_spawn();
|
|
3567
3734
|
init_format();
|
|
@@ -3578,20 +3745,19 @@ async function audit(args) {
|
|
|
3578
3745
|
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
3579
3746
|
const db = getDb(root);
|
|
3580
3747
|
const objective = args.filter((a) => !a.startsWith("--")).join(" ");
|
|
3581
|
-
const config =
|
|
3748
|
+
const config = loadConfig(root);
|
|
3582
3749
|
const experiments = listAllExperiments(db);
|
|
3583
3750
|
const deadEnds = listAllDeadEnds(db);
|
|
3584
3751
|
const circuitBreakers = getAllCircuitBreakerStates(db, config.cycle.circuit_breaker_threshold);
|
|
3585
|
-
const classificationDir =
|
|
3752
|
+
const classificationDir = path10.join(root, "docs", "classification");
|
|
3586
3753
|
let classification = "";
|
|
3587
|
-
if (
|
|
3588
|
-
const files =
|
|
3754
|
+
if (fs10.existsSync(classificationDir)) {
|
|
3755
|
+
const files = fs10.readdirSync(classificationDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
|
|
3589
3756
|
for (const f of files) {
|
|
3590
|
-
classification +=
|
|
3757
|
+
classification += fs10.readFileSync(path10.join(classificationDir, f), "utf-8") + "\n\n";
|
|
3591
3758
|
}
|
|
3592
3759
|
}
|
|
3593
|
-
const
|
|
3594
|
-
const synthesis = fs11.existsSync(synthesisPath) ? fs11.readFileSync(synthesisPath, "utf-8") : "";
|
|
3760
|
+
const synthesis = readFileOrEmpty(path10.join(root, "docs", "synthesis", "current.md"));
|
|
3595
3761
|
header("Maqasid Check \u2014 Purpose Audit");
|
|
3596
3762
|
const trippedBreakers = circuitBreakers.filter((cb) => cb.tripped);
|
|
3597
3763
|
if (trippedBreakers.length > 0) {
|
|
@@ -3635,22 +3801,16 @@ Output: either "classification confirmed \u2014 continue" or "re-classify from X
|
|
|
3635
3801
|
}, root);
|
|
3636
3802
|
success("Purpose audit complete. Review the output above.");
|
|
3637
3803
|
}
|
|
3638
|
-
|
|
3639
|
-
const configPath = path11.join(projectRoot, ".majlis", "config.json");
|
|
3640
|
-
if (!fs11.existsSync(configPath)) {
|
|
3641
|
-
return { project: { name: "", description: "", objective: "" }, cycle: { circuit_breaker_threshold: 3 } };
|
|
3642
|
-
}
|
|
3643
|
-
return JSON.parse(fs11.readFileSync(configPath, "utf-8"));
|
|
3644
|
-
}
|
|
3645
|
-
var fs11, path11;
|
|
3804
|
+
var fs10, path10;
|
|
3646
3805
|
var init_audit = __esm({
|
|
3647
3806
|
"src/commands/audit.ts"() {
|
|
3648
3807
|
"use strict";
|
|
3649
|
-
|
|
3650
|
-
|
|
3808
|
+
fs10 = __toESM(require("fs"));
|
|
3809
|
+
path10 = __toESM(require("path"));
|
|
3651
3810
|
init_connection();
|
|
3652
3811
|
init_queries();
|
|
3653
3812
|
init_spawn();
|
|
3813
|
+
init_config();
|
|
3654
3814
|
init_format();
|
|
3655
3815
|
}
|
|
3656
3816
|
});
|
|
@@ -3664,7 +3824,7 @@ async function next(args, isJson) {
|
|
|
3664
3824
|
const root = findProjectRoot();
|
|
3665
3825
|
if (!root) throw new Error("Not in a Majlis project. Run `majlis init` first.");
|
|
3666
3826
|
const db = getDb(root);
|
|
3667
|
-
const config =
|
|
3827
|
+
const config = loadConfig(root);
|
|
3668
3828
|
const slugArg = args.filter((a) => !a.startsWith("--"))[0];
|
|
3669
3829
|
let exp;
|
|
3670
3830
|
if (slugArg) {
|
|
@@ -3696,7 +3856,17 @@ async function runNextStep(db, exp, config, root, isJson) {
|
|
|
3696
3856
|
}
|
|
3697
3857
|
if (exp.sub_type && checkCircuitBreaker(db, exp.sub_type, config.cycle.circuit_breaker_threshold)) {
|
|
3698
3858
|
warn(`Circuit breaker: ${exp.sub_type} has ${config.cycle.circuit_breaker_threshold}+ failures.`);
|
|
3699
|
-
|
|
3859
|
+
insertDeadEnd(
|
|
3860
|
+
db,
|
|
3861
|
+
exp.id,
|
|
3862
|
+
exp.hypothesis ?? exp.slug,
|
|
3863
|
+
`Circuit breaker tripped for ${exp.sub_type}`,
|
|
3864
|
+
`Sub-type ${exp.sub_type} exceeded ${config.cycle.circuit_breaker_threshold} failures`,
|
|
3865
|
+
exp.sub_type,
|
|
3866
|
+
"procedural"
|
|
3867
|
+
);
|
|
3868
|
+
updateExperimentStatus(db, exp.id, "dead_end");
|
|
3869
|
+
warn("Experiment dead-ended. Triggering Maqasid Check (purpose audit).");
|
|
3700
3870
|
await audit([config.project?.objective ?? ""]);
|
|
3701
3871
|
return;
|
|
3702
3872
|
}
|
|
@@ -3736,6 +3906,16 @@ async function runAutoLoop(db, exp, config, root, isJson) {
|
|
|
3736
3906
|
}
|
|
3737
3907
|
if (exp.sub_type && checkCircuitBreaker(db, exp.sub_type, config.cycle.circuit_breaker_threshold)) {
|
|
3738
3908
|
warn(`Circuit breaker tripped for ${exp.sub_type}. Stopping auto mode.`);
|
|
3909
|
+
insertDeadEnd(
|
|
3910
|
+
db,
|
|
3911
|
+
exp.id,
|
|
3912
|
+
exp.hypothesis ?? exp.slug,
|
|
3913
|
+
`Circuit breaker tripped for ${exp.sub_type}`,
|
|
3914
|
+
`Sub-type ${exp.sub_type} exceeded ${config.cycle.circuit_breaker_threshold} failures`,
|
|
3915
|
+
exp.sub_type,
|
|
3916
|
+
"procedural"
|
|
3917
|
+
);
|
|
3918
|
+
updateExperimentStatus(db, exp.id, "dead_end");
|
|
3739
3919
|
await audit([config.project?.objective ?? ""]);
|
|
3740
3920
|
break;
|
|
3741
3921
|
}
|
|
@@ -3784,37 +3964,26 @@ async function executeStep(step, exp, root) {
|
|
|
3784
3964
|
updateExperimentStatus(getDb(root), exp.id, "reframed");
|
|
3785
3965
|
info(`Reframe acknowledged for ${exp.slug}. Proceeding to gate.`);
|
|
3786
3966
|
break;
|
|
3967
|
+
case "merged" /* MERGED */:
|
|
3968
|
+
updateExperimentStatus(getDb(root), exp.id, "merged");
|
|
3969
|
+
success(`Experiment ${exp.slug} merged.`);
|
|
3970
|
+
break;
|
|
3971
|
+
case "dead_end" /* DEAD_END */:
|
|
3972
|
+
info(`Experiment ${exp.slug} is dead-ended. No further action.`);
|
|
3973
|
+
break;
|
|
3787
3974
|
default:
|
|
3788
3975
|
warn(`Don't know how to execute step: ${step}`);
|
|
3789
3976
|
}
|
|
3790
3977
|
}
|
|
3791
|
-
function loadConfig7(projectRoot) {
|
|
3792
|
-
const configPath = path12.join(projectRoot, ".majlis", "config.json");
|
|
3793
|
-
if (!fs12.existsSync(configPath)) {
|
|
3794
|
-
return {
|
|
3795
|
-
project: { name: "", description: "", objective: "" },
|
|
3796
|
-
cycle: {
|
|
3797
|
-
compression_interval: 5,
|
|
3798
|
-
circuit_breaker_threshold: 3,
|
|
3799
|
-
require_doubt_before_verify: true,
|
|
3800
|
-
require_challenge_before_verify: false,
|
|
3801
|
-
auto_baseline_on_new_experiment: true
|
|
3802
|
-
}
|
|
3803
|
-
};
|
|
3804
|
-
}
|
|
3805
|
-
return JSON.parse(fs12.readFileSync(configPath, "utf-8"));
|
|
3806
|
-
}
|
|
3807
|
-
var fs12, path12;
|
|
3808
3978
|
var init_next = __esm({
|
|
3809
3979
|
"src/commands/next.ts"() {
|
|
3810
3980
|
"use strict";
|
|
3811
|
-
fs12 = __toESM(require("fs"));
|
|
3812
|
-
path12 = __toESM(require("path"));
|
|
3813
3981
|
init_connection();
|
|
3814
3982
|
init_queries();
|
|
3815
3983
|
init_machine();
|
|
3816
|
-
|
|
3984
|
+
init_types2();
|
|
3817
3985
|
init_queries();
|
|
3986
|
+
init_config();
|
|
3818
3987
|
init_cycle();
|
|
3819
3988
|
init_audit();
|
|
3820
3989
|
init_format();
|
|
@@ -3834,13 +4003,19 @@ async function run(args) {
|
|
|
3834
4003
|
throw new Error('Usage: majlis run "goal description"');
|
|
3835
4004
|
}
|
|
3836
4005
|
const db = getDb(root);
|
|
3837
|
-
const config =
|
|
4006
|
+
const config = loadConfig(root);
|
|
3838
4007
|
const MAX_EXPERIMENTS = 10;
|
|
3839
4008
|
const MAX_STEPS = 200;
|
|
3840
4009
|
let experimentCount = 0;
|
|
3841
4010
|
let stepCount = 0;
|
|
4011
|
+
let consecutiveFailures = 0;
|
|
4012
|
+
const usedHypotheses = /* @__PURE__ */ new Set();
|
|
3842
4013
|
header(`Autonomous Mode \u2014 ${goal}`);
|
|
3843
4014
|
while (stepCount < MAX_STEPS && experimentCount < MAX_EXPERIMENTS) {
|
|
4015
|
+
if (isShutdownRequested()) {
|
|
4016
|
+
warn("Shutdown requested. Stopping autonomous mode.");
|
|
4017
|
+
break;
|
|
4018
|
+
}
|
|
3844
4019
|
stepCount++;
|
|
3845
4020
|
let exp = getLatestExperiment(db);
|
|
3846
4021
|
if (!exp) {
|
|
@@ -3860,8 +4035,13 @@ async function run(args) {
|
|
|
3860
4035
|
success("Planner says the goal has been met. Stopping.");
|
|
3861
4036
|
break;
|
|
3862
4037
|
}
|
|
4038
|
+
if (usedHypotheses.has(hypothesis)) {
|
|
4039
|
+
warn(`Planner returned duplicate hypothesis: "${hypothesis.slice(0, 80)}". Stopping.`);
|
|
4040
|
+
break;
|
|
4041
|
+
}
|
|
4042
|
+
usedHypotheses.add(hypothesis);
|
|
3863
4043
|
info(`Next hypothesis: ${hypothesis}`);
|
|
3864
|
-
exp = createNewExperiment(db, root, hypothesis);
|
|
4044
|
+
exp = await createNewExperiment(db, root, hypothesis);
|
|
3865
4045
|
success(`Created experiment #${exp.id}: ${exp.slug}`);
|
|
3866
4046
|
}
|
|
3867
4047
|
if (isTerminal(exp.status)) {
|
|
@@ -3875,7 +4055,9 @@ async function run(args) {
|
|
|
3875
4055
|
info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
|
|
3876
4056
|
try {
|
|
3877
4057
|
await next([exp.slug], false);
|
|
4058
|
+
consecutiveFailures = 0;
|
|
3878
4059
|
} catch (err) {
|
|
4060
|
+
consecutiveFailures++;
|
|
3879
4061
|
const message = err instanceof Error ? err.message : String(err);
|
|
3880
4062
|
warn(`Step failed for ${exp.slug}: ${message}`);
|
|
3881
4063
|
try {
|
|
@@ -3889,7 +4071,13 @@ async function run(args) {
|
|
|
3889
4071
|
"procedural"
|
|
3890
4072
|
);
|
|
3891
4073
|
updateExperimentStatus(db, exp.id, "dead_end");
|
|
3892
|
-
} catch {
|
|
4074
|
+
} catch (innerErr) {
|
|
4075
|
+
const innerMsg = innerErr instanceof Error ? innerErr.message : String(innerErr);
|
|
4076
|
+
warn(`Could not record dead-end: ${innerMsg}`);
|
|
4077
|
+
}
|
|
4078
|
+
if (consecutiveFailures >= 3) {
|
|
4079
|
+
warn(`${consecutiveFailures} consecutive failures. Stopping autonomous mode.`);
|
|
4080
|
+
break;
|
|
3893
4081
|
}
|
|
3894
4082
|
}
|
|
3895
4083
|
}
|
|
@@ -3902,11 +4090,11 @@ async function run(args) {
|
|
|
3902
4090
|
info("Run `majlis status` to see final state.");
|
|
3903
4091
|
}
|
|
3904
4092
|
async function deriveNextHypothesis(goal, root, db) {
|
|
3905
|
-
const synthesis =
|
|
3906
|
-
const fragility =
|
|
3907
|
-
const deadEndsDoc =
|
|
4093
|
+
const synthesis = truncateContext(readFileOrEmpty(path11.join(root, "docs", "synthesis", "current.md")), CONTEXT_LIMITS.synthesis);
|
|
4094
|
+
const fragility = truncateContext(readFileOrEmpty(path11.join(root, "docs", "synthesis", "fragility.md")), CONTEXT_LIMITS.fragility);
|
|
4095
|
+
const deadEndsDoc = truncateContext(readFileOrEmpty(path11.join(root, "docs", "synthesis", "dead-ends.md")), CONTEXT_LIMITS.deadEnds);
|
|
3908
4096
|
const deadEnds = listAllDeadEnds(db);
|
|
3909
|
-
const config =
|
|
4097
|
+
const config = loadConfig(root);
|
|
3910
4098
|
let metricsOutput = "";
|
|
3911
4099
|
if (config.metrics?.command) {
|
|
3912
4100
|
try {
|
|
@@ -3991,8 +4179,8 @@ ${result.output.slice(-2e3)}
|
|
|
3991
4179
|
warn("Could not extract hypothesis. Using goal as fallback.");
|
|
3992
4180
|
return goal;
|
|
3993
4181
|
}
|
|
3994
|
-
function createNewExperiment(db, root, hypothesis) {
|
|
3995
|
-
const slug =
|
|
4182
|
+
async function createNewExperiment(db, root, hypothesis) {
|
|
4183
|
+
const slug = await generateSlug(hypothesis, root);
|
|
3996
4184
|
let finalSlug = slug;
|
|
3997
4185
|
let attempt = 0;
|
|
3998
4186
|
while (getExperimentBySlug(db, finalSlug)) {
|
|
@@ -4016,49 +4204,23 @@ function createNewExperiment(db, root, hypothesis) {
|
|
|
4016
4204
|
const exp = createExperiment(db, finalSlug, branch, hypothesis, null, null);
|
|
4017
4205
|
updateExperimentStatus(db, exp.id, "reframed");
|
|
4018
4206
|
exp.status = "reframed";
|
|
4019
|
-
const docsDir =
|
|
4020
|
-
const templatePath =
|
|
4021
|
-
if (
|
|
4022
|
-
const template =
|
|
4207
|
+
const docsDir = path11.join(root, "docs", "experiments");
|
|
4208
|
+
const templatePath = path11.join(docsDir, "_TEMPLATE.md");
|
|
4209
|
+
if (fs11.existsSync(templatePath)) {
|
|
4210
|
+
const template = fs11.readFileSync(templatePath, "utf-8");
|
|
4023
4211
|
const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
|
|
4024
|
-
const logPath =
|
|
4025
|
-
|
|
4212
|
+
const logPath = path11.join(docsDir, `${paddedNum}-${finalSlug}.md`);
|
|
4213
|
+
fs11.writeFileSync(logPath, logContent);
|
|
4026
4214
|
info(`Created experiment log: docs/experiments/${paddedNum}-${finalSlug}.md`);
|
|
4027
4215
|
}
|
|
4028
4216
|
return exp;
|
|
4029
4217
|
}
|
|
4030
|
-
|
|
4031
|
-
try {
|
|
4032
|
-
return fs13.readFileSync(filePath, "utf-8");
|
|
4033
|
-
} catch {
|
|
4034
|
-
return "";
|
|
4035
|
-
}
|
|
4036
|
-
}
|
|
4037
|
-
function slugify2(text) {
|
|
4038
|
-
return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
|
|
4039
|
-
}
|
|
4040
|
-
function loadConfig8(projectRoot) {
|
|
4041
|
-
const configPath = path13.join(projectRoot, ".majlis", "config.json");
|
|
4042
|
-
if (!fs13.existsSync(configPath)) {
|
|
4043
|
-
return {
|
|
4044
|
-
project: { name: "", description: "", objective: "" },
|
|
4045
|
-
cycle: {
|
|
4046
|
-
compression_interval: 5,
|
|
4047
|
-
circuit_breaker_threshold: 3,
|
|
4048
|
-
require_doubt_before_verify: true,
|
|
4049
|
-
require_challenge_before_verify: false,
|
|
4050
|
-
auto_baseline_on_new_experiment: true
|
|
4051
|
-
}
|
|
4052
|
-
};
|
|
4053
|
-
}
|
|
4054
|
-
return JSON.parse(fs13.readFileSync(configPath, "utf-8"));
|
|
4055
|
-
}
|
|
4056
|
-
var fs13, path13, import_node_child_process5;
|
|
4218
|
+
var fs11, path11, import_node_child_process5;
|
|
4057
4219
|
var init_run = __esm({
|
|
4058
4220
|
"src/commands/run.ts"() {
|
|
4059
4221
|
"use strict";
|
|
4060
|
-
|
|
4061
|
-
|
|
4222
|
+
fs11 = __toESM(require("fs"));
|
|
4223
|
+
path11 = __toESM(require("path"));
|
|
4062
4224
|
import_node_child_process5 = require("child_process");
|
|
4063
4225
|
init_connection();
|
|
4064
4226
|
init_queries();
|
|
@@ -4066,17 +4228,27 @@ var init_run = __esm({
|
|
|
4066
4228
|
init_next();
|
|
4067
4229
|
init_cycle();
|
|
4068
4230
|
init_spawn();
|
|
4231
|
+
init_config();
|
|
4232
|
+
init_shutdown();
|
|
4069
4233
|
init_format();
|
|
4070
4234
|
}
|
|
4071
4235
|
});
|
|
4072
4236
|
|
|
4073
4237
|
// src/cli.ts
|
|
4074
|
-
var
|
|
4075
|
-
var
|
|
4238
|
+
var fs12 = __toESM(require("fs"));
|
|
4239
|
+
var path12 = __toESM(require("path"));
|
|
4076
4240
|
var VERSION = JSON.parse(
|
|
4077
|
-
|
|
4241
|
+
fs12.readFileSync(path12.join(__dirname, "..", "package.json"), "utf-8")
|
|
4078
4242
|
).version;
|
|
4079
4243
|
async function main() {
|
|
4244
|
+
let sigintCount = 0;
|
|
4245
|
+
process.on("SIGINT", () => {
|
|
4246
|
+
sigintCount++;
|
|
4247
|
+
if (sigintCount >= 2) process.exit(130);
|
|
4248
|
+
const { requestShutdown: requestShutdown2 } = (init_shutdown(), __toCommonJS(shutdown_exports));
|
|
4249
|
+
requestShutdown2();
|
|
4250
|
+
console.error("\n\x1B[33m[majlis] Interrupt received. Finishing current step...\x1B[0m");
|
|
4251
|
+
});
|
|
4080
4252
|
const args = process.argv.slice(2);
|
|
4081
4253
|
if (args.includes("--version") || args.includes("-v")) {
|
|
4082
4254
|
console.log(VERSION);
|