majlis 0.8.2 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +118 -14
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -2447,7 +2447,7 @@ ${contextJson}
|
|
|
2447
2447
|
\`\`\`
|
|
2448
2448
|
|
|
2449
2449
|
${taskPrompt}`;
|
|
2450
|
-
const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy.
|
|
2450
|
+
const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy.\n\nIf any approach from this iteration is PROVABLY dead (not just needs refinement \u2014 structurally impossible or fundamentally wrong), flag it on its own line as:\n[DEAD-APPROACH] approach name: why it cannot work\n\nOnly use [DEAD-APPROACH] for approaches that are mathematically impossible, violate invariants, or have been empirically proven to fail. Do NOT flag approaches that merely need adjustment.\n\nCRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
|
|
2451
2451
|
console.log(`[synthesiser] Spawning (maxTurns: ${maxTurns})...`);
|
|
2452
2452
|
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2453
2453
|
prompt,
|
|
@@ -4517,6 +4517,41 @@ var init_metrics = __esm({
|
|
|
4517
4517
|
});
|
|
4518
4518
|
|
|
4519
4519
|
// src/resolve.ts
|
|
4520
|
+
function accumulateGuidance(existing, newGuidance) {
|
|
4521
|
+
const iterationNums = existing?.match(/### Iteration (\d+)/g)?.map((m) => parseInt(m.replace("### Iteration ", ""), 10)) ?? [];
|
|
4522
|
+
const maxExisting = iterationNums.length > 0 ? Math.max(...iterationNums) : 0;
|
|
4523
|
+
const iterationNum = maxExisting + 1;
|
|
4524
|
+
const header2 = `### Iteration ${iterationNum} (latest)`;
|
|
4525
|
+
const newBlock = `${header2}
|
|
4526
|
+
${newGuidance}`;
|
|
4527
|
+
if (!existing) return newBlock;
|
|
4528
|
+
const cleaned = existing.replace(/ \(latest\)/g, "");
|
|
4529
|
+
const accumulated = `${newBlock}
|
|
4530
|
+
|
|
4531
|
+
---
|
|
4532
|
+
|
|
4533
|
+
${cleaned}`;
|
|
4534
|
+
if (accumulated.length <= GUIDANCE_MAX_CHARS) return accumulated;
|
|
4535
|
+
const sections = accumulated.split(/(?=^### Iteration \d+)/m);
|
|
4536
|
+
let result = "";
|
|
4537
|
+
for (const section of sections) {
|
|
4538
|
+
if (result.length + section.length > GUIDANCE_MAX_CHARS && result.length > 0) {
|
|
4539
|
+
result += "\n\n[Earlier iterations truncated]";
|
|
4540
|
+
break;
|
|
4541
|
+
}
|
|
4542
|
+
result += section;
|
|
4543
|
+
}
|
|
4544
|
+
return result;
|
|
4545
|
+
}
|
|
4546
|
+
function parseSynthesiserDeadApproaches(output) {
|
|
4547
|
+
const results = [];
|
|
4548
|
+
const regex = /\[DEAD-APPROACH\]\s*(.+?):\s*(.+)/g;
|
|
4549
|
+
let match;
|
|
4550
|
+
while ((match = regex.exec(output)) !== null) {
|
|
4551
|
+
results.push({ approach: match[1].trim(), reason: match[2].trim() });
|
|
4552
|
+
}
|
|
4553
|
+
return results;
|
|
4554
|
+
}
|
|
4520
4555
|
function worstGrade(grades) {
|
|
4521
4556
|
if (grades.length === 0) {
|
|
4522
4557
|
throw new Error("Cannot determine grade from empty verification set \u2014 this indicates a data integrity issue");
|
|
@@ -4551,11 +4586,12 @@ async function resolve2(db, exp, projectRoot) {
|
|
|
4551
4586
|
warn(` ${v.fixture} / ${v.metric}: ${v.before} \u2192 ${v.after} (${v.delta > 0 ? "+" : ""}${v.delta})`);
|
|
4552
4587
|
}
|
|
4553
4588
|
updateExperimentStatus(db, exp.id, "resolved");
|
|
4554
|
-
const
|
|
4589
|
+
const gateGuidance = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
|
|
4555
4590
|
` + gateViolations.map((v) => `- ${v.fixture} / ${v.metric}: was ${v.before}, now ${v.after}`).join("\n");
|
|
4591
|
+
const accumulatedGate = accumulateGuidance(exp.builder_guidance, gateGuidance);
|
|
4556
4592
|
transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
|
|
4557
4593
|
db.transaction(() => {
|
|
4558
|
-
storeBuilderGuidance(db, exp.id,
|
|
4594
|
+
storeBuilderGuidance(db, exp.id, accumulatedGate);
|
|
4559
4595
|
updateExperimentStatus(db, exp.id, "building");
|
|
4560
4596
|
if (exp.sub_type) {
|
|
4561
4597
|
incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
|
|
@@ -4598,16 +4634,47 @@ async function resolve2(db, exp, projectRoot) {
|
|
|
4598
4634
|
confirmedDoubts,
|
|
4599
4635
|
taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
|
|
4600
4636
|
}, projectRoot);
|
|
4601
|
-
const
|
|
4637
|
+
const rawGuidance = guidance.structured?.guidance ?? guidance.output;
|
|
4638
|
+
const accumulated = accumulateGuidance(exp.builder_guidance, rawGuidance);
|
|
4602
4639
|
transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
|
|
4603
4640
|
db.transaction(() => {
|
|
4604
|
-
storeBuilderGuidance(db, exp.id,
|
|
4641
|
+
storeBuilderGuidance(db, exp.id, accumulated);
|
|
4605
4642
|
updateExperimentStatus(db, exp.id, "building");
|
|
4606
4643
|
if (exp.sub_type) {
|
|
4607
4644
|
incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
|
|
4608
4645
|
}
|
|
4646
|
+
const rejectedInWeak = grades.filter((g) => g.grade === "rejected");
|
|
4647
|
+
for (const rc of rejectedInWeak) {
|
|
4648
|
+
insertDeadEnd(
|
|
4649
|
+
db,
|
|
4650
|
+
exp.id,
|
|
4651
|
+
`${rc.component} (iteration within ${exp.slug})`,
|
|
4652
|
+
rc.notes ?? "rejected by verifier",
|
|
4653
|
+
`Component ${rc.component} rejected: ${rc.notes ?? "approach does not work"}`,
|
|
4654
|
+
exp.sub_type,
|
|
4655
|
+
"structural"
|
|
4656
|
+
);
|
|
4657
|
+
}
|
|
4658
|
+
if (rejectedInWeak.length > 0) {
|
|
4659
|
+
info(`Registered ${rejectedInWeak.length} component-level dead-end(s) from weak verification.`);
|
|
4660
|
+
}
|
|
4661
|
+
const deadApproaches = parseSynthesiserDeadApproaches(guidance.output);
|
|
4662
|
+
for (const da of deadApproaches) {
|
|
4663
|
+
insertDeadEnd(
|
|
4664
|
+
db,
|
|
4665
|
+
exp.id,
|
|
4666
|
+
da.approach,
|
|
4667
|
+
da.reason,
|
|
4668
|
+
da.reason,
|
|
4669
|
+
exp.sub_type,
|
|
4670
|
+
"structural"
|
|
4671
|
+
);
|
|
4672
|
+
}
|
|
4673
|
+
if (deadApproaches.length > 0) {
|
|
4674
|
+
info(`Registered ${deadApproaches.length} dead approach(es) from synthesiser.`);
|
|
4675
|
+
}
|
|
4609
4676
|
})();
|
|
4610
|
-
warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance
|
|
4677
|
+
warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance accumulated for builder.`);
|
|
4611
4678
|
break;
|
|
4612
4679
|
}
|
|
4613
4680
|
case "rejected": {
|
|
@@ -4660,11 +4727,12 @@ async function resolveDbOnly(db, exp, projectRoot) {
|
|
|
4660
4727
|
warn(` ${v.fixture} / ${v.metric}: ${v.before} \u2192 ${v.after} (${v.delta > 0 ? "+" : ""}${v.delta})`);
|
|
4661
4728
|
}
|
|
4662
4729
|
updateExperimentStatus(db, exp.id, "resolved");
|
|
4663
|
-
const
|
|
4730
|
+
const swarmGateGuidance = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
|
|
4664
4731
|
` + gateViolations.map((v) => `- ${v.fixture} / ${v.metric}: was ${v.before}, now ${v.after}`).join("\n");
|
|
4732
|
+
const accumulatedSwarmGate = accumulateGuidance(exp.builder_guidance, swarmGateGuidance);
|
|
4665
4733
|
transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
|
|
4666
4734
|
db.transaction(() => {
|
|
4667
|
-
storeBuilderGuidance(db, exp.id,
|
|
4735
|
+
storeBuilderGuidance(db, exp.id, accumulatedSwarmGate);
|
|
4668
4736
|
updateExperimentStatus(db, exp.id, "building");
|
|
4669
4737
|
if (exp.sub_type) {
|
|
4670
4738
|
incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
|
|
@@ -4703,16 +4771,47 @@ async function resolveDbOnly(db, exp, projectRoot) {
|
|
|
4703
4771
|
confirmedDoubts,
|
|
4704
4772
|
taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
|
|
4705
4773
|
}, projectRoot);
|
|
4706
|
-
const
|
|
4774
|
+
const rawGuidance = guidance.structured?.guidance ?? guidance.output;
|
|
4775
|
+
const accumulated = accumulateGuidance(exp.builder_guidance, rawGuidance);
|
|
4707
4776
|
transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
|
|
4708
4777
|
db.transaction(() => {
|
|
4709
|
-
storeBuilderGuidance(db, exp.id,
|
|
4778
|
+
storeBuilderGuidance(db, exp.id, accumulated);
|
|
4710
4779
|
updateExperimentStatus(db, exp.id, "building");
|
|
4711
4780
|
if (exp.sub_type) {
|
|
4712
4781
|
incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
|
|
4713
4782
|
}
|
|
4783
|
+
const rejectedInWeak = grades.filter((g) => g.grade === "rejected");
|
|
4784
|
+
for (const rc of rejectedInWeak) {
|
|
4785
|
+
insertDeadEnd(
|
|
4786
|
+
db,
|
|
4787
|
+
exp.id,
|
|
4788
|
+
`${rc.component} (iteration within ${exp.slug})`,
|
|
4789
|
+
rc.notes ?? "rejected by verifier",
|
|
4790
|
+
`Component ${rc.component} rejected: ${rc.notes ?? "approach does not work"}`,
|
|
4791
|
+
exp.sub_type,
|
|
4792
|
+
"structural"
|
|
4793
|
+
);
|
|
4794
|
+
}
|
|
4795
|
+
if (rejectedInWeak.length > 0) {
|
|
4796
|
+
info(`Registered ${rejectedInWeak.length} component-level dead-end(s) from weak verification.`);
|
|
4797
|
+
}
|
|
4798
|
+
const deadApproaches = parseSynthesiserDeadApproaches(guidance.output);
|
|
4799
|
+
for (const da of deadApproaches) {
|
|
4800
|
+
insertDeadEnd(
|
|
4801
|
+
db,
|
|
4802
|
+
exp.id,
|
|
4803
|
+
da.approach,
|
|
4804
|
+
da.reason,
|
|
4805
|
+
da.reason,
|
|
4806
|
+
exp.sub_type,
|
|
4807
|
+
"structural"
|
|
4808
|
+
);
|
|
4809
|
+
}
|
|
4810
|
+
if (deadApproaches.length > 0) {
|
|
4811
|
+
info(`Registered ${deadApproaches.length} dead approach(es) from synthesiser.`);
|
|
4812
|
+
}
|
|
4714
4813
|
})();
|
|
4715
|
-
warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance
|
|
4814
|
+
warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance accumulated.`);
|
|
4716
4815
|
break;
|
|
4717
4816
|
}
|
|
4718
4817
|
case "rejected": {
|
|
@@ -4805,7 +4904,7 @@ ${gaps}
|
|
|
4805
4904
|
`;
|
|
4806
4905
|
fs10.writeFileSync(fragPath, content + entry);
|
|
4807
4906
|
}
|
|
4808
|
-
var fs10, path10, import_node_child_process4;
|
|
4907
|
+
var fs10, path10, import_node_child_process4, GUIDANCE_MAX_CHARS;
|
|
4809
4908
|
var init_resolve = __esm({
|
|
4810
4909
|
"src/resolve.ts"() {
|
|
4811
4910
|
"use strict";
|
|
@@ -4820,6 +4919,7 @@ var init_resolve = __esm({
|
|
|
4820
4919
|
import_node_child_process4 = require("child_process");
|
|
4821
4920
|
init_git();
|
|
4822
4921
|
init_format();
|
|
4922
|
+
GUIDANCE_MAX_CHARS = 12e3;
|
|
4823
4923
|
}
|
|
4824
4924
|
});
|
|
4825
4925
|
|
|
@@ -5424,11 +5524,15 @@ function gitCommitBuild(exp, cwd) {
|
|
|
5424
5524
|
info("No code changes to commit.");
|
|
5425
5525
|
return;
|
|
5426
5526
|
}
|
|
5427
|
-
const
|
|
5527
|
+
const guidance = exp.builder_guidance ?? "";
|
|
5528
|
+
const iterationNums = guidance.match(/### Iteration (\d+)/g)?.map((m) => parseInt(m.replace("### Iteration ", ""), 10)) ?? [];
|
|
5529
|
+
const iterNum = iterationNums.length > 0 ? Math.max(...iterationNums) + 1 : 1;
|
|
5530
|
+
const iterTag = iterNum > 1 ? ` [iter ${iterNum}]` : "";
|
|
5531
|
+
const msg = `EXP-${String(exp.id).padStart(3, "0")}: ${exp.slug}${iterTag}
|
|
5428
5532
|
|
|
5429
5533
|
${exp.hypothesis ?? ""}`;
|
|
5430
5534
|
(0, import_node_child_process5.execFileSync)("git", ["commit", "-m", msg], { cwd, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] });
|
|
5431
|
-
info(`Committed builder changes on ${exp.branch}.`);
|
|
5535
|
+
info(`Committed builder changes on ${exp.branch}${iterTag}.`);
|
|
5432
5536
|
} catch {
|
|
5433
5537
|
warn("Could not auto-commit builder changes \u2014 commit manually before resolving.");
|
|
5434
5538
|
}
|