majlis 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +246 -17
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -449,7 +449,7 @@ var init_format = __esm({
|
|
|
449
449
|
function getExtractionSchema(role) {
|
|
450
450
|
switch (role) {
|
|
451
451
|
case "builder":
|
|
452
|
-
return '{"decisions": [{"description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string"}]}';
|
|
452
|
+
return '{"decisions": [{"description": "string", "evidence_level": "proof|test|strong_consensus|consensus|analogy|judgment", "justification": "string"}], "abandon": {"reason": "string", "structural_constraint": "string"}}';
|
|
453
453
|
case "critic":
|
|
454
454
|
return '{"doubts": [{"claim_doubted": "string", "evidence_level_of_claim": "string", "evidence_for_doubt": "string", "severity": "minor|moderate|critical"}]}';
|
|
455
455
|
case "adversary":
|
|
@@ -506,7 +506,7 @@ async function extractStructuredData(role, markdown) {
|
|
|
506
506
|
const tier1 = extractMajlisJsonBlock(markdown);
|
|
507
507
|
if (tier1) {
|
|
508
508
|
const parsed = tryParseJson(tier1);
|
|
509
|
-
if (parsed) return parsed;
|
|
509
|
+
if (parsed) return { data: parsed, tier: 1 };
|
|
510
510
|
console.warn(`[majlis] Malformed JSON in <!-- majlis-json --> block for ${role}. Falling back.`);
|
|
511
511
|
} else {
|
|
512
512
|
console.warn(`[majlis] No <!-- majlis-json --> block found in ${role} output. Falling back.`);
|
|
@@ -514,15 +514,18 @@ async function extractStructuredData(role, markdown) {
|
|
|
514
514
|
const tier2 = extractViaPatterns(role, markdown);
|
|
515
515
|
if (tier2 && hasData(tier2)) {
|
|
516
516
|
console.warn(`[majlis] Used regex fallback for ${role}. Review extracted data.`);
|
|
517
|
-
return tier2;
|
|
517
|
+
return { data: tier2, tier: 2 };
|
|
518
518
|
}
|
|
519
519
|
console.warn(`[majlis] Regex fallback insufficient for ${role}. Using Haiku extraction.`);
|
|
520
520
|
const tier3 = await extractViaHaiku(role, markdown);
|
|
521
|
-
if (tier3)
|
|
521
|
+
if (tier3) {
|
|
522
|
+
console.warn(`[majlis] Tier 3 (Haiku) extraction used for ${role}. Data provenance degraded.`);
|
|
523
|
+
return { data: tier3, tier: 3 };
|
|
524
|
+
}
|
|
522
525
|
console.error(
|
|
523
526
|
`[majlis] FAILED to extract structured data from ${role} output. State machine will continue but data is missing. Manual review required.`
|
|
524
527
|
);
|
|
525
|
-
return null;
|
|
528
|
+
return { data: null, tier: null };
|
|
526
529
|
}
|
|
527
530
|
function extractMajlisJsonBlock(markdown) {
|
|
528
531
|
const match = markdown.match(/<!--\s*majlis-json\s*\n?([\s\S]*?)-->/);
|
|
@@ -592,6 +595,23 @@ function extractViaPatterns(role, markdown) {
|
|
|
592
595
|
});
|
|
593
596
|
}
|
|
594
597
|
if (doubts.length > 0) result.doubts = doubts;
|
|
598
|
+
if (role === "builder") {
|
|
599
|
+
const abandonPattern = /\[ABANDON\]\s*(.+?)(?:\n|$)[\s\S]*?(?:structural.?constraint|Constraint|CONSTRAINT)\s*[:=]\s*(.+?)(?:\n|$)/im;
|
|
600
|
+
const abandonMatch = markdown.match(abandonPattern);
|
|
601
|
+
if (abandonMatch) {
|
|
602
|
+
result.abandon = {
|
|
603
|
+
reason: abandonMatch[1].trim(),
|
|
604
|
+
structural_constraint: abandonMatch[2].trim()
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
const invalidMatch = markdown.match(/(?:HYPOTHESIS\s+INVALID|HYPOTHESIS\s+IMPOSSIBLE)\s*[:.\-—]\s*(.+?)(?:\n|$)/im);
|
|
608
|
+
if (invalidMatch && !result.abandon) {
|
|
609
|
+
result.abandon = {
|
|
610
|
+
reason: invalidMatch[1].trim(),
|
|
611
|
+
structural_constraint: "Extracted via regex \u2014 review original document"
|
|
612
|
+
};
|
|
613
|
+
}
|
|
614
|
+
}
|
|
595
615
|
return result;
|
|
596
616
|
}
|
|
597
617
|
async function extractViaHaiku(role, markdown) {
|
|
@@ -631,7 +651,7 @@ ${truncated}`;
|
|
|
631
651
|
}
|
|
632
652
|
}
|
|
633
653
|
function hasData(output) {
|
|
634
|
-
return !!(output.decisions && output.decisions.length > 0 || output.grades && output.grades.length > 0 || output.doubts && output.doubts.length > 0 || output.challenges && output.challenges.length > 0 || output.findings && output.findings.length > 0 || output.guidance || output.reframe || output.compression_report || output.gate_decision || output.diagnosis);
|
|
654
|
+
return !!(output.decisions && output.decisions.length > 0 || output.grades && output.grades.length > 0 || output.doubts && output.doubts.length > 0 || output.challenges && output.challenges.length > 0 || output.findings && output.findings.length > 0 || output.guidance || output.reframe || output.compression_report || output.gate_decision || output.diagnosis || output.abandon);
|
|
635
655
|
}
|
|
636
656
|
function validateForRole(role, output) {
|
|
637
657
|
const required = ROLE_REQUIRED_FIELDS[role];
|
|
@@ -846,6 +866,8 @@ function buildPreToolUseGuards(role, cwd) {
|
|
|
846
866
|
const configFile = path2.resolve(cwd, ".majlis", "config.json");
|
|
847
867
|
const dbFile = path2.resolve(cwd, ".majlis", "majlis.db");
|
|
848
868
|
const settingsFile = path2.resolve(cwd, ".claude", "settings.json");
|
|
869
|
+
const claudeDir = path2.resolve(cwd, ".claude");
|
|
870
|
+
const agentsDir = path2.resolve(cwd, ".majlis", "agents");
|
|
849
871
|
const configGuard = async (input) => {
|
|
850
872
|
const toolInput = input.tool_input ?? {};
|
|
851
873
|
const filePath = toolInput.file_path ?? "";
|
|
@@ -854,6 +876,9 @@ function buildPreToolUseGuards(role, cwd) {
|
|
|
854
876
|
if (resolved === configFile || resolved === dbFile || resolved === settingsFile) {
|
|
855
877
|
return { decision: "block", reason: `Builder may not modify framework files: ${filePath}` };
|
|
856
878
|
}
|
|
879
|
+
if (isInsideDir(resolved, claudeDir) || isInsideDir(resolved, agentsDir)) {
|
|
880
|
+
return { decision: "block", reason: `Builder may not modify agent definitions or framework settings: ${filePath}` };
|
|
881
|
+
}
|
|
857
882
|
}
|
|
858
883
|
return {};
|
|
859
884
|
};
|
|
@@ -867,6 +892,8 @@ function buildPreToolUseGuards(role, cwd) {
|
|
|
867
892
|
const configFile = path2.resolve(cwd, ".majlis", "config.json");
|
|
868
893
|
const dbFile = path2.resolve(cwd, ".majlis", "majlis.db");
|
|
869
894
|
const settingsFile = path2.resolve(cwd, ".claude", "settings.json");
|
|
895
|
+
const claudeDir = path2.resolve(cwd, ".claude");
|
|
896
|
+
const agentsDir = path2.resolve(cwd, ".majlis", "agents");
|
|
870
897
|
const configGuard = async (input) => {
|
|
871
898
|
const toolInput = input.tool_input ?? {};
|
|
872
899
|
const filePath = toolInput.file_path ?? "";
|
|
@@ -875,6 +902,9 @@ function buildPreToolUseGuards(role, cwd) {
|
|
|
875
902
|
if (resolved === configFile || resolved === dbFile || resolved === settingsFile) {
|
|
876
903
|
return { decision: "block", reason: `Verifier may not modify framework files: ${filePath}` };
|
|
877
904
|
}
|
|
905
|
+
if (isInsideDir(resolved, claudeDir) || isInsideDir(resolved, agentsDir)) {
|
|
906
|
+
return { decision: "block", reason: `Verifier may not modify agent definitions or framework settings: ${filePath}` };
|
|
907
|
+
}
|
|
878
908
|
}
|
|
879
909
|
return {};
|
|
880
910
|
};
|
|
@@ -947,14 +977,17 @@ ${taskPrompt}`;
|
|
|
947
977
|
if (artifactPath) {
|
|
948
978
|
console.log(`[${role}] Artifact written to ${artifactPath}`);
|
|
949
979
|
}
|
|
950
|
-
const structured = await extractStructuredData(role, markdown);
|
|
980
|
+
const { data: structured, tier: extractionTier } = await extractStructuredData(role, markdown);
|
|
951
981
|
if (structured) {
|
|
952
982
|
const { valid, missing } = validateForRole(role, structured);
|
|
953
983
|
if (!valid) {
|
|
954
984
|
console.warn(`[${role}] Output missing expected fields: ${missing.join(", ")}`);
|
|
955
985
|
}
|
|
956
986
|
}
|
|
957
|
-
|
|
987
|
+
if (extractionTier === 3) {
|
|
988
|
+
console.warn(`[${role}] WARNING: Structured output was reconstructed by Haiku (tier 3). Data provenance degraded.`);
|
|
989
|
+
}
|
|
990
|
+
return { output: markdown, structured, truncated, extractionTier };
|
|
958
991
|
}
|
|
959
992
|
async function spawnSynthesiser(context, projectRoot, opts) {
|
|
960
993
|
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
@@ -982,7 +1015,7 @@ ${taskPrompt}`;
|
|
|
982
1015
|
role: "synthesiser"
|
|
983
1016
|
});
|
|
984
1017
|
console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
985
|
-
return { output: markdown, structured: { guidance: markdown }, truncated };
|
|
1018
|
+
return { output: markdown, structured: { guidance: markdown }, truncated, extractionTier: null };
|
|
986
1019
|
}
|
|
987
1020
|
async function spawnRecovery(role, partialOutput, context, projectRoot) {
|
|
988
1021
|
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
@@ -1285,7 +1318,8 @@ var init_config = __esm({
|
|
|
1285
1318
|
synthesis: 3e4,
|
|
1286
1319
|
fragility: 15e3,
|
|
1287
1320
|
experimentDoc: 15e3,
|
|
1288
|
-
deadEnds: 15e3
|
|
1321
|
+
deadEnds: 15e3,
|
|
1322
|
+
experimentLineage: 15e3
|
|
1289
1323
|
};
|
|
1290
1324
|
}
|
|
1291
1325
|
});
|
|
@@ -2495,7 +2529,7 @@ function updateSwarmMember(db, swarmRunId, slug, finalStatus, overallGrade, cost
|
|
|
2495
2529
|
WHERE swarm_run_id = ? AND experiment_slug = ?
|
|
2496
2530
|
`).run(finalStatus, overallGrade, costUsd, error, swarmRunId, slug);
|
|
2497
2531
|
}
|
|
2498
|
-
function exportForCompressor(db, maxLength =
|
|
2532
|
+
function exportForCompressor(db, maxLength = 5e4) {
|
|
2499
2533
|
const experiments = listAllExperiments(db);
|
|
2500
2534
|
const sections = ["# Structured Data Export (from SQLite)\n"];
|
|
2501
2535
|
sections.push("## Experiments");
|
|
@@ -2561,6 +2595,70 @@ function exportForCompressor(db, maxLength = 3e4) {
|
|
|
2561
2595
|
}
|
|
2562
2596
|
return full;
|
|
2563
2597
|
}
|
|
2598
|
+
function exportExperimentLineage(db, subType, maxLength = 15e3) {
|
|
2599
|
+
const experiments = subType ? db.prepare(`SELECT * FROM experiments WHERE sub_type = ? ORDER BY created_at`).all(subType) : listAllExperiments(db);
|
|
2600
|
+
if (experiments.length === 0) return "";
|
|
2601
|
+
const sections = ["## Experiment Lineage (from DB \u2014 canonical, not from synthesis)\n"];
|
|
2602
|
+
for (const exp of experiments) {
|
|
2603
|
+
sections.push(`### ${exp.slug} [${exp.status}]`);
|
|
2604
|
+
if (exp.hypothesis) sections.push(`Hypothesis: ${exp.hypothesis}`);
|
|
2605
|
+
const decisions = listDecisionsByExperiment(db, exp.id);
|
|
2606
|
+
if (decisions.length > 0) {
|
|
2607
|
+
sections.push("Decisions:");
|
|
2608
|
+
for (const d of decisions) {
|
|
2609
|
+
sections.push(` - [${d.evidence_level}/${d.status}] ${d.description}`);
|
|
2610
|
+
}
|
|
2611
|
+
}
|
|
2612
|
+
const beforeMetrics = getMetricsByExperimentAndPhase(db, exp.id, "before");
|
|
2613
|
+
const afterMetrics = getMetricsByExperimentAndPhase(db, exp.id, "after");
|
|
2614
|
+
if (beforeMetrics.length > 0 && afterMetrics.length > 0) {
|
|
2615
|
+
sections.push("Metrics:");
|
|
2616
|
+
for (const bm of beforeMetrics) {
|
|
2617
|
+
const am = afterMetrics.find((a) => a.fixture === bm.fixture && a.metric_name === bm.metric_name);
|
|
2618
|
+
if (am) {
|
|
2619
|
+
const delta = am.metric_value - bm.metric_value;
|
|
2620
|
+
const sign = delta >= 0 ? "+" : "";
|
|
2621
|
+
sections.push(` - ${bm.fixture}/${bm.metric_name}: ${bm.metric_value} \u2192 ${am.metric_value} (${sign}${delta.toFixed(4)})`);
|
|
2622
|
+
}
|
|
2623
|
+
}
|
|
2624
|
+
}
|
|
2625
|
+
const doubts = getDoubtsByExperiment(db, exp.id);
|
|
2626
|
+
const resolved = doubts.filter((d) => d.resolution);
|
|
2627
|
+
if (resolved.length > 0) {
|
|
2628
|
+
sections.push("Doubt resolutions:");
|
|
2629
|
+
for (const d of resolved) {
|
|
2630
|
+
sections.push(` - [${d.resolution}] ${d.claim_doubted}`);
|
|
2631
|
+
}
|
|
2632
|
+
}
|
|
2633
|
+
const verifications = getVerificationsByExperiment(db, exp.id);
|
|
2634
|
+
if (verifications.length > 0) {
|
|
2635
|
+
sections.push("Grades:");
|
|
2636
|
+
for (const v of verifications) {
|
|
2637
|
+
sections.push(` - ${v.component}: ${v.grade}${v.notes ? ` \u2014 ${v.notes}` : ""}`);
|
|
2638
|
+
}
|
|
2639
|
+
}
|
|
2640
|
+
sections.push("");
|
|
2641
|
+
const current = sections.join("\n");
|
|
2642
|
+
if (current.length > maxLength - 500) {
|
|
2643
|
+
sections.push(`[LINEAGE TRUNCATED \u2014 ${experiments.length - experiments.indexOf(exp) - 1} experiments omitted]`);
|
|
2644
|
+
break;
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
const deadEnds = subType ? listDeadEndsBySubType(db, subType) : listAllDeadEnds(db);
|
|
2648
|
+
if (deadEnds.length > 0) {
|
|
2649
|
+
sections.push("### Dead Ends (structural constraints)");
|
|
2650
|
+
for (const de of deadEnds) {
|
|
2651
|
+
sections.push(`- [${de.category ?? "structural"}] ${de.approach}: ${de.structural_constraint}`);
|
|
2652
|
+
}
|
|
2653
|
+
}
|
|
2654
|
+
const full = sections.join("\n");
|
|
2655
|
+
if (full.length > maxLength) {
|
|
2656
|
+
return full.slice(0, maxLength) + `
|
|
2657
|
+
|
|
2658
|
+
[LINEAGE TRUNCATED at ${maxLength} chars]`;
|
|
2659
|
+
}
|
|
2660
|
+
return full;
|
|
2661
|
+
}
|
|
2564
2662
|
function exportForDiagnostician(db, maxLength = 6e4) {
|
|
2565
2663
|
const base = exportForCompressor(db, maxLength);
|
|
2566
2664
|
const sections = [base];
|
|
@@ -3850,9 +3948,18 @@ Output your gate_decision as "approve", "reject", or "flag" with reasoning.`
|
|
|
3850
3948
|
const decision = result.structured?.gate_decision ?? "approve";
|
|
3851
3949
|
const reason = result.structured?.reason ?? "";
|
|
3852
3950
|
if (decision === "reject") {
|
|
3853
|
-
|
|
3854
|
-
|
|
3855
|
-
|
|
3951
|
+
insertDeadEnd(
|
|
3952
|
+
db,
|
|
3953
|
+
exp.id,
|
|
3954
|
+
exp.hypothesis ?? exp.slug,
|
|
3955
|
+
reason,
|
|
3956
|
+
`Gate rejected: ${reason}`,
|
|
3957
|
+
exp.sub_type,
|
|
3958
|
+
"procedural"
|
|
3959
|
+
);
|
|
3960
|
+
adminTransitionAndPersist(db, exp.id, "gated", "dead_end" /* DEAD_END */, "revert");
|
|
3961
|
+
warn(`Gate REJECTED for ${exp.slug}: ${reason}. Dead-ended.`);
|
|
3962
|
+
return;
|
|
3856
3963
|
} else {
|
|
3857
3964
|
if (decision === "flag") {
|
|
3858
3965
|
warn(`Gate flagged concerns for ${exp.slug}: ${reason}`);
|
|
@@ -3909,6 +4016,10 @@ Build the experiment: ${exp.hypothesis}` : `Build the experiment: ${exp.hypothes
|
|
|
3909
4016
|
}
|
|
3910
4017
|
taskPrompt += "\n\nNote: The framework captures metrics automatically. Do NOT claim specific numbers unless quoting framework output.";
|
|
3911
4018
|
const supplementaryContext = loadExperimentContext(exp, root);
|
|
4019
|
+
const lineage = exportExperimentLineage(db, exp.sub_type);
|
|
4020
|
+
if (lineage) {
|
|
4021
|
+
taskPrompt += "\n\n" + lineage;
|
|
4022
|
+
}
|
|
3912
4023
|
const result = await spawnAgent("builder", {
|
|
3913
4024
|
experiment: {
|
|
3914
4025
|
id: exp.id,
|
|
@@ -3927,9 +4038,24 @@ Build the experiment: ${exp.hypothesis}` : `Build the experiment: ${exp.hypothes
|
|
|
3927
4038
|
synthesis,
|
|
3928
4039
|
confirmedDoubts,
|
|
3929
4040
|
supplementaryContext: supplementaryContext || void 0,
|
|
4041
|
+
experimentLineage: lineage || void 0,
|
|
3930
4042
|
taskPrompt
|
|
3931
4043
|
}, root);
|
|
3932
4044
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
4045
|
+
if (result.structured?.abandon) {
|
|
4046
|
+
insertDeadEnd(
|
|
4047
|
+
db,
|
|
4048
|
+
exp.id,
|
|
4049
|
+
exp.hypothesis ?? exp.slug,
|
|
4050
|
+
result.structured.abandon.reason,
|
|
4051
|
+
result.structured.abandon.structural_constraint,
|
|
4052
|
+
exp.sub_type,
|
|
4053
|
+
"structural"
|
|
4054
|
+
);
|
|
4055
|
+
adminTransitionAndPersist(db, exp.id, "building", "dead_end" /* DEAD_END */, "revert");
|
|
4056
|
+
info(`Builder abandoned ${exp.slug}: ${result.structured.abandon.reason}`);
|
|
4057
|
+
return;
|
|
4058
|
+
}
|
|
3933
4059
|
if (result.truncated && !result.structured) {
|
|
3934
4060
|
warn(`Builder was truncated (hit max turns) without producing structured output.`);
|
|
3935
4061
|
await spawnRecovery("builder", result.output, {
|
|
@@ -3937,6 +4063,25 @@ Build the experiment: ${exp.hypothesis}` : `Build the experiment: ${exp.hypothes
|
|
|
3937
4063
|
}, root);
|
|
3938
4064
|
warn(`Experiment stays at 'building'. Run \`majlis build\` to retry or \`majlis revert\` to abandon.`);
|
|
3939
4065
|
} else {
|
|
4066
|
+
if (config.build?.pre_measure) {
|
|
4067
|
+
try {
|
|
4068
|
+
const [cmd, ...cmdArgs] = config.build.pre_measure.split(/\s+/);
|
|
4069
|
+
(0, import_node_child_process7.execFileSync)(cmd, cmdArgs, {
|
|
4070
|
+
cwd: root,
|
|
4071
|
+
encoding: "utf-8",
|
|
4072
|
+
timeout: 3e4,
|
|
4073
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
4074
|
+
});
|
|
4075
|
+
} catch (err) {
|
|
4076
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
4077
|
+
const guidance = `Build verification failed after builder completion. Code may be syntactically broken or incomplete.
|
|
4078
|
+
Error: ${errMsg.slice(0, 500)}`;
|
|
4079
|
+
storeBuilderGuidance(db, exp.id, guidance);
|
|
4080
|
+
warn(`Build verification failed for ${exp.slug}. Staying at 'building'.`);
|
|
4081
|
+
warn(`Guidance stored for retry. Run \`majlis build\` to retry.`);
|
|
4082
|
+
return;
|
|
4083
|
+
}
|
|
4084
|
+
}
|
|
3940
4085
|
if (config.metrics?.command) {
|
|
3941
4086
|
try {
|
|
3942
4087
|
const output = (0, import_node_child_process7.execSync)(config.metrics.command, {
|
|
@@ -3955,6 +4100,15 @@ Build the experiment: ${exp.hypothesis}` : `Build the experiment: ${exp.hypothes
|
|
|
3955
4100
|
}
|
|
3956
4101
|
}
|
|
3957
4102
|
gitCommitBuild(exp, root);
|
|
4103
|
+
if (result.extractionTier === 3) {
|
|
4104
|
+
warn(`Builder output extracted via Haiku (tier 3). Data provenance degraded.`);
|
|
4105
|
+
const existing = getBuilderGuidance(db, exp.id) ?? "";
|
|
4106
|
+
storeBuilderGuidance(
|
|
4107
|
+
db,
|
|
4108
|
+
exp.id,
|
|
4109
|
+
existing + "\n[PROVENANCE WARNING] Builder structured output was reconstructed by a secondary model (tier 3). Treat reported decisions with additional scrutiny."
|
|
4110
|
+
);
|
|
4111
|
+
}
|
|
3958
4112
|
updateExperimentStatus(db, exp.id, "built");
|
|
3959
4113
|
success(`Build complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis challenge\` next.`);
|
|
3960
4114
|
}
|
|
@@ -4150,6 +4304,15 @@ async function doVerify(db, exp, root) {
|
|
|
4150
4304
|
}
|
|
4151
4305
|
updateExperimentStatus(db, exp.id, "verifying");
|
|
4152
4306
|
const verifierSupplementaryContext = loadExperimentContext(exp, root);
|
|
4307
|
+
const verifierLineage = exportExperimentLineage(db, exp.sub_type);
|
|
4308
|
+
let verifierTaskPrompt = `Verify experiment ${exp.slug}: ${exp.hypothesis}. Check provenance and content. Test the ${doubts.length} doubt(s) and any adversarial challenges.` + metricsSection + doubtReference;
|
|
4309
|
+
if (verifierLineage) {
|
|
4310
|
+
verifierTaskPrompt += "\n\n" + verifierLineage;
|
|
4311
|
+
}
|
|
4312
|
+
const builderGuidanceForVerifier = getBuilderGuidance(db, exp.id);
|
|
4313
|
+
if (builderGuidanceForVerifier?.includes("[PROVENANCE WARNING]")) {
|
|
4314
|
+
verifierTaskPrompt += "\n\nNote: The builder's structured output was reconstructed by a secondary model (tier 3). Treat reported decisions with additional scrutiny.";
|
|
4315
|
+
}
|
|
4153
4316
|
const result = await spawnAgent("verifier", {
|
|
4154
4317
|
experiment: {
|
|
4155
4318
|
id: exp.id,
|
|
@@ -4163,7 +4326,8 @@ async function doVerify(db, exp, root) {
|
|
|
4163
4326
|
challenges,
|
|
4164
4327
|
metricComparisons: metricComparisons.length > 0 ? metricComparisons : void 0,
|
|
4165
4328
|
supplementaryContext: verifierSupplementaryContext || void 0,
|
|
4166
|
-
|
|
4329
|
+
experimentLineage: verifierLineage || void 0,
|
|
4330
|
+
taskPrompt: verifierTaskPrompt
|
|
4167
4331
|
}, root);
|
|
4168
4332
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
4169
4333
|
if (result.truncated && !result.structured) {
|
|
@@ -5432,6 +5596,7 @@ async function swarm(args) {
|
|
|
5432
5596
|
if (summary.bestExperiment && isMergeable(summary.bestExperiment.overallGrade)) {
|
|
5433
5597
|
const best = summary.bestExperiment;
|
|
5434
5598
|
info(`Best experiment: ${best.worktree.slug} (${best.overallGrade})`);
|
|
5599
|
+
let merged = false;
|
|
5435
5600
|
try {
|
|
5436
5601
|
(0, import_node_child_process10.execFileSync)(
|
|
5437
5602
|
"git",
|
|
@@ -5439,9 +5604,72 @@ async function swarm(args) {
|
|
|
5439
5604
|
{ cwd: root, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }
|
|
5440
5605
|
);
|
|
5441
5606
|
success(`Merged ${best.worktree.slug} into main.`);
|
|
5607
|
+
merged = true;
|
|
5442
5608
|
} catch {
|
|
5443
|
-
warn(`Git merge of ${best.worktree.slug} failed.
|
|
5444
|
-
|
|
5609
|
+
warn(`Git merge of ${best.worktree.slug} failed (conflict). Attempting rebase...`);
|
|
5610
|
+
try {
|
|
5611
|
+
(0, import_node_child_process10.execFileSync)("git", ["merge", "--abort"], { cwd: root, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] });
|
|
5612
|
+
} catch {
|
|
5613
|
+
}
|
|
5614
|
+
try {
|
|
5615
|
+
(0, import_node_child_process10.execFileSync)(
|
|
5616
|
+
"git",
|
|
5617
|
+
["rebase", "main", best.worktree.branch],
|
|
5618
|
+
{ cwd: root, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }
|
|
5619
|
+
);
|
|
5620
|
+
info(`Rebase of ${best.worktree.slug} onto main succeeded. Re-verifying gates...`);
|
|
5621
|
+
const config = loadConfig(root);
|
|
5622
|
+
let gatesHold = true;
|
|
5623
|
+
if (config.metrics?.command && best.experiment) {
|
|
5624
|
+
try {
|
|
5625
|
+
const output = (0, import_node_child_process10.execSync)(config.metrics.command, {
|
|
5626
|
+
cwd: root,
|
|
5627
|
+
encoding: "utf-8",
|
|
5628
|
+
timeout: 6e4,
|
|
5629
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
5630
|
+
}).trim();
|
|
5631
|
+
const parsed = parseMetricsOutput(output);
|
|
5632
|
+
for (const m of parsed) {
|
|
5633
|
+
insertMetric(db, best.experiment.id, "after", m.fixture, m.metric_name, m.metric_value);
|
|
5634
|
+
}
|
|
5635
|
+
const comparisons = compareMetrics(db, best.experiment.id, config);
|
|
5636
|
+
const gateViolations = checkGateViolations(comparisons);
|
|
5637
|
+
if (gateViolations.length > 0) {
|
|
5638
|
+
gatesHold = false;
|
|
5639
|
+
warn(`Gate violations after rebase:`);
|
|
5640
|
+
for (const v of gateViolations) {
|
|
5641
|
+
warn(` - ${v.fixture}/${v.metric}: ${v.before} \u2192 ${v.after} (delta: ${v.delta})`);
|
|
5642
|
+
}
|
|
5643
|
+
}
|
|
5644
|
+
} catch {
|
|
5645
|
+
warn("Could not re-capture metrics after rebase. Proceeding cautiously.");
|
|
5646
|
+
}
|
|
5647
|
+
}
|
|
5648
|
+
if (gatesHold) {
|
|
5649
|
+
(0, import_node_child_process10.execFileSync)("git", ["checkout", "main"], { cwd: root, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] });
|
|
5650
|
+
(0, import_node_child_process10.execFileSync)(
|
|
5651
|
+
"git",
|
|
5652
|
+
["merge", "--ff-only", best.worktree.branch],
|
|
5653
|
+
{ cwd: root, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }
|
|
5654
|
+
);
|
|
5655
|
+
success(`Merged ${best.worktree.slug} into main (via rebase + ff).`);
|
|
5656
|
+
merged = true;
|
|
5657
|
+
} else {
|
|
5658
|
+
warn(`Gate violations after rebase. NOT merging ${best.worktree.slug}.`);
|
|
5659
|
+
info(`Manual resolution needed:`);
|
|
5660
|
+
info(` git checkout main && git merge ${best.worktree.branch} --no-ff`);
|
|
5661
|
+
}
|
|
5662
|
+
} catch {
|
|
5663
|
+
try {
|
|
5664
|
+
(0, import_node_child_process10.execFileSync)("git", ["rebase", "--abort"], { cwd: root, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] });
|
|
5665
|
+
} catch {
|
|
5666
|
+
}
|
|
5667
|
+
warn(`Rebase of ${best.worktree.slug} also failed. Manual merge required:`);
|
|
5668
|
+
info(` git merge ${best.worktree.branch} --no-ff`);
|
|
5669
|
+
}
|
|
5670
|
+
}
|
|
5671
|
+
if (!merged) {
|
|
5672
|
+
info(`${best.worktree.slug} was NOT merged automatically.`);
|
|
5445
5673
|
}
|
|
5446
5674
|
} else {
|
|
5447
5675
|
info("No experiment achieved sound/good grade. Nothing merged.");
|
|
@@ -5594,6 +5822,7 @@ var init_swarm = __esm({
|
|
|
5594
5822
|
init_types2();
|
|
5595
5823
|
init_spawn();
|
|
5596
5824
|
init_config();
|
|
5825
|
+
init_metrics();
|
|
5597
5826
|
init_worktree();
|
|
5598
5827
|
init_runner();
|
|
5599
5828
|
init_aggregate();
|