majlis 0.8.1 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +124 -24
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -531,7 +531,7 @@ Read source code at the specific locations relevant to your change. Do NOT
|
|
|
531
531
|
read the entire codebase or run diagnostic Python scripts. If the synthesis
|
|
532
532
|
says "lines 1921-22" then read those lines and their context. That's it.
|
|
533
533
|
|
|
534
|
-
Do NOT read raw data files (fixtures/, ground truth
|
|
534
|
+
Do NOT read raw data files (fixtures/, ground truth, test data). The synthesis
|
|
535
535
|
has the relevant facts. Reading raw data wastes turns re-deriving what the
|
|
536
536
|
doubt/challenge/verify cycle already established.
|
|
537
537
|
|
|
@@ -570,7 +570,7 @@ Do NOT iterate. Do NOT "try one more thing." The adversary, critic, and verifier
|
|
|
570
570
|
exist to diagnose what went wrong. The cycle comes back to you with their insights.
|
|
571
571
|
|
|
572
572
|
## Off-limits (DO NOT modify)
|
|
573
|
-
- \`fixtures/\` \u2014 test data, ground truth,
|
|
573
|
+
- \`fixtures/\` \u2014 test data, ground truth, reference outputs. Read-only.
|
|
574
574
|
- \`scripts/benchmark.py\` \u2014 the measurement tool. Never change how you're measured.
|
|
575
575
|
- \`.majlis/\` \u2014 framework config. Not your concern.
|
|
576
576
|
|
|
@@ -1153,9 +1153,9 @@ falsifiable structural constraint that blocks future experiments from repeating
|
|
|
1153
1153
|
## Constraint Quality
|
|
1154
1154
|
|
|
1155
1155
|
Good constraints are specific and block future repetition:
|
|
1156
|
-
- "
|
|
1157
|
-
- "Relaxing
|
|
1158
|
-
- "
|
|
1156
|
+
- "Binary search fallback in sortedMerge() is O(n log n) worst-case when input contains >40% duplicates \u2014 measured via 13-run sweep, ceiling 0.67x baseline"
|
|
1157
|
+
- "Relaxing the pruning threshold in recursiveSplit() causes false positives on sparse inputs (accuracy 95->72.5%)"
|
|
1158
|
+
- "Cyclic dependency in the module graph prevents topological sort \u2014 requires at least one back-edge cut"
|
|
1159
1159
|
|
|
1160
1160
|
Bad constraints are vague and useless:
|
|
1161
1161
|
- "Didn't work"
|
|
@@ -2436,7 +2436,7 @@ ${taskPrompt}`;
|
|
|
2436
2436
|
}
|
|
2437
2437
|
async function spawnSynthesiser(context, projectRoot, opts) {
|
|
2438
2438
|
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2439
|
-
const maxTurns = opts?.maxTurns ??
|
|
2439
|
+
const maxTurns = opts?.maxTurns ?? 15;
|
|
2440
2440
|
const tools = opts?.tools ?? ["Read", "Glob", "Grep"];
|
|
2441
2441
|
const contextJson = JSON.stringify(context);
|
|
2442
2442
|
const taskPrompt = context.taskPrompt ?? "Synthesise the findings into actionable builder guidance.";
|
|
@@ -2447,7 +2447,7 @@ ${contextJson}
|
|
|
2447
2447
|
\`\`\`
|
|
2448
2448
|
|
|
2449
2449
|
${taskPrompt}`;
|
|
2450
|
-
const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy.
|
|
2450
|
+
const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy.\n\nIf any approach from this iteration is PROVABLY dead (not just needs refinement \u2014 structurally impossible or fundamentally wrong), flag it on its own line as:\n[DEAD-APPROACH] approach name: why it cannot work\n\nOnly use [DEAD-APPROACH] for approaches that are mathematically impossible, violate invariants, or have been empirically proven to fail. Do NOT flag approaches that merely need adjustment.\n\nCRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
|
|
2451
2451
|
console.log(`[synthesiser] Spawning (maxTurns: ${maxTurns})...`);
|
|
2452
2452
|
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2453
2453
|
prompt,
|
|
@@ -2540,7 +2540,7 @@ async function generateSlug(hypothesis, projectRoot) {
|
|
|
2540
2540
|
|
|
2541
2541
|
"${hypothesis.slice(0, 500)}"
|
|
2542
2542
|
|
|
2543
|
-
Output ONLY the slug, nothing else. Examples:
|
|
2543
|
+
Output ONLY the slug, nothing else. Examples: add-index-lookup, skip-empty-batches, fix-retry-backoff`,
|
|
2544
2544
|
model: "haiku",
|
|
2545
2545
|
tools: [],
|
|
2546
2546
|
systemPrompt: "Output only a short hyphenated slug. No explanation, no quotes, no punctuation except hyphens.",
|
|
@@ -2615,13 +2615,13 @@ var init_spawn = __esm({
|
|
|
2615
2615
|
adversary: 30,
|
|
2616
2616
|
verifier: 50,
|
|
2617
2617
|
compressor: 30,
|
|
2618
|
-
reframer:
|
|
2619
|
-
scout:
|
|
2620
|
-
gatekeeper:
|
|
2618
|
+
reframer: 30,
|
|
2619
|
+
scout: 30,
|
|
2620
|
+
gatekeeper: 15,
|
|
2621
2621
|
diagnostician: 60,
|
|
2622
2622
|
cartographer: 40,
|
|
2623
2623
|
toolsmith: 30,
|
|
2624
|
-
postmortem:
|
|
2624
|
+
postmortem: 30
|
|
2625
2625
|
};
|
|
2626
2626
|
CHECKPOINT_INTERVAL = {
|
|
2627
2627
|
builder: 12,
|
|
@@ -4517,6 +4517,41 @@ var init_metrics = __esm({
|
|
|
4517
4517
|
});
|
|
4518
4518
|
|
|
4519
4519
|
// src/resolve.ts
|
|
4520
|
+
function accumulateGuidance(existing, newGuidance) {
|
|
4521
|
+
const iterationNums = existing?.match(/### Iteration (\d+)/g)?.map((m) => parseInt(m.replace("### Iteration ", ""), 10)) ?? [];
|
|
4522
|
+
const maxExisting = iterationNums.length > 0 ? Math.max(...iterationNums) : 0;
|
|
4523
|
+
const iterationNum = maxExisting + 1;
|
|
4524
|
+
const header2 = `### Iteration ${iterationNum} (latest)`;
|
|
4525
|
+
const newBlock = `${header2}
|
|
4526
|
+
${newGuidance}`;
|
|
4527
|
+
if (!existing) return newBlock;
|
|
4528
|
+
const cleaned = existing.replace(/ \(latest\)/g, "");
|
|
4529
|
+
const accumulated = `${newBlock}
|
|
4530
|
+
|
|
4531
|
+
---
|
|
4532
|
+
|
|
4533
|
+
${cleaned}`;
|
|
4534
|
+
if (accumulated.length <= GUIDANCE_MAX_CHARS) return accumulated;
|
|
4535
|
+
const sections = accumulated.split(/(?=^### Iteration \d+)/m);
|
|
4536
|
+
let result = "";
|
|
4537
|
+
for (const section of sections) {
|
|
4538
|
+
if (result.length + section.length > GUIDANCE_MAX_CHARS && result.length > 0) {
|
|
4539
|
+
result += "\n\n[Earlier iterations truncated]";
|
|
4540
|
+
break;
|
|
4541
|
+
}
|
|
4542
|
+
result += section;
|
|
4543
|
+
}
|
|
4544
|
+
return result;
|
|
4545
|
+
}
|
|
4546
|
+
function parseSynthesiserDeadApproaches(output) {
|
|
4547
|
+
const results = [];
|
|
4548
|
+
const regex = /\[DEAD-APPROACH\]\s*(.+?):\s*(.+)/g;
|
|
4549
|
+
let match;
|
|
4550
|
+
while ((match = regex.exec(output)) !== null) {
|
|
4551
|
+
results.push({ approach: match[1].trim(), reason: match[2].trim() });
|
|
4552
|
+
}
|
|
4553
|
+
return results;
|
|
4554
|
+
}
|
|
4520
4555
|
function worstGrade(grades) {
|
|
4521
4556
|
if (grades.length === 0) {
|
|
4522
4557
|
throw new Error("Cannot determine grade from empty verification set \u2014 this indicates a data integrity issue");
|
|
@@ -4551,11 +4586,12 @@ async function resolve2(db, exp, projectRoot) {
|
|
|
4551
4586
|
warn(` ${v.fixture} / ${v.metric}: ${v.before} \u2192 ${v.after} (${v.delta > 0 ? "+" : ""}${v.delta})`);
|
|
4552
4587
|
}
|
|
4553
4588
|
updateExperimentStatus(db, exp.id, "resolved");
|
|
4554
|
-
const
|
|
4589
|
+
const gateGuidance = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
|
|
4555
4590
|
` + gateViolations.map((v) => `- ${v.fixture} / ${v.metric}: was ${v.before}, now ${v.after}`).join("\n");
|
|
4591
|
+
const accumulatedGate = accumulateGuidance(exp.builder_guidance, gateGuidance);
|
|
4556
4592
|
transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
|
|
4557
4593
|
db.transaction(() => {
|
|
4558
|
-
storeBuilderGuidance(db, exp.id,
|
|
4594
|
+
storeBuilderGuidance(db, exp.id, accumulatedGate);
|
|
4559
4595
|
updateExperimentStatus(db, exp.id, "building");
|
|
4560
4596
|
if (exp.sub_type) {
|
|
4561
4597
|
incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
|
|
@@ -4598,16 +4634,47 @@ async function resolve2(db, exp, projectRoot) {
|
|
|
4598
4634
|
confirmedDoubts,
|
|
4599
4635
|
taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
|
|
4600
4636
|
}, projectRoot);
|
|
4601
|
-
const
|
|
4637
|
+
const rawGuidance = guidance.structured?.guidance ?? guidance.output;
|
|
4638
|
+
const accumulated = accumulateGuidance(exp.builder_guidance, rawGuidance);
|
|
4602
4639
|
transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
|
|
4603
4640
|
db.transaction(() => {
|
|
4604
|
-
storeBuilderGuidance(db, exp.id,
|
|
4641
|
+
storeBuilderGuidance(db, exp.id, accumulated);
|
|
4605
4642
|
updateExperimentStatus(db, exp.id, "building");
|
|
4606
4643
|
if (exp.sub_type) {
|
|
4607
4644
|
incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
|
|
4608
4645
|
}
|
|
4646
|
+
const rejectedInWeak = grades.filter((g) => g.grade === "rejected");
|
|
4647
|
+
for (const rc of rejectedInWeak) {
|
|
4648
|
+
insertDeadEnd(
|
|
4649
|
+
db,
|
|
4650
|
+
exp.id,
|
|
4651
|
+
`${rc.component} (iteration within ${exp.slug})`,
|
|
4652
|
+
rc.notes ?? "rejected by verifier",
|
|
4653
|
+
`Component ${rc.component} rejected: ${rc.notes ?? "approach does not work"}`,
|
|
4654
|
+
exp.sub_type,
|
|
4655
|
+
"structural"
|
|
4656
|
+
);
|
|
4657
|
+
}
|
|
4658
|
+
if (rejectedInWeak.length > 0) {
|
|
4659
|
+
info(`Registered ${rejectedInWeak.length} component-level dead-end(s) from weak verification.`);
|
|
4660
|
+
}
|
|
4661
|
+
const deadApproaches = parseSynthesiserDeadApproaches(guidance.output);
|
|
4662
|
+
for (const da of deadApproaches) {
|
|
4663
|
+
insertDeadEnd(
|
|
4664
|
+
db,
|
|
4665
|
+
exp.id,
|
|
4666
|
+
da.approach,
|
|
4667
|
+
da.reason,
|
|
4668
|
+
da.reason,
|
|
4669
|
+
exp.sub_type,
|
|
4670
|
+
"structural"
|
|
4671
|
+
);
|
|
4672
|
+
}
|
|
4673
|
+
if (deadApproaches.length > 0) {
|
|
4674
|
+
info(`Registered ${deadApproaches.length} dead approach(es) from synthesiser.`);
|
|
4675
|
+
}
|
|
4609
4676
|
})();
|
|
4610
|
-
warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance
|
|
4677
|
+
warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance accumulated for builder.`);
|
|
4611
4678
|
break;
|
|
4612
4679
|
}
|
|
4613
4680
|
case "rejected": {
|
|
@@ -4660,11 +4727,12 @@ async function resolveDbOnly(db, exp, projectRoot) {
|
|
|
4660
4727
|
warn(` ${v.fixture} / ${v.metric}: ${v.before} \u2192 ${v.after} (${v.delta > 0 ? "+" : ""}${v.delta})`);
|
|
4661
4728
|
}
|
|
4662
4729
|
updateExperimentStatus(db, exp.id, "resolved");
|
|
4663
|
-
const
|
|
4730
|
+
const swarmGateGuidance = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
|
|
4664
4731
|
` + gateViolations.map((v) => `- ${v.fixture} / ${v.metric}: was ${v.before}, now ${v.after}`).join("\n");
|
|
4732
|
+
const accumulatedSwarmGate = accumulateGuidance(exp.builder_guidance, swarmGateGuidance);
|
|
4665
4733
|
transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
|
|
4666
4734
|
db.transaction(() => {
|
|
4667
|
-
storeBuilderGuidance(db, exp.id,
|
|
4735
|
+
storeBuilderGuidance(db, exp.id, accumulatedSwarmGate);
|
|
4668
4736
|
updateExperimentStatus(db, exp.id, "building");
|
|
4669
4737
|
if (exp.sub_type) {
|
|
4670
4738
|
incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
|
|
@@ -4703,16 +4771,47 @@ async function resolveDbOnly(db, exp, projectRoot) {
|
|
|
4703
4771
|
confirmedDoubts,
|
|
4704
4772
|
taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
|
|
4705
4773
|
}, projectRoot);
|
|
4706
|
-
const
|
|
4774
|
+
const rawGuidance = guidance.structured?.guidance ?? guidance.output;
|
|
4775
|
+
const accumulated = accumulateGuidance(exp.builder_guidance, rawGuidance);
|
|
4707
4776
|
transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
|
|
4708
4777
|
db.transaction(() => {
|
|
4709
|
-
storeBuilderGuidance(db, exp.id,
|
|
4778
|
+
storeBuilderGuidance(db, exp.id, accumulated);
|
|
4710
4779
|
updateExperimentStatus(db, exp.id, "building");
|
|
4711
4780
|
if (exp.sub_type) {
|
|
4712
4781
|
incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
|
|
4713
4782
|
}
|
|
4783
|
+
const rejectedInWeak = grades.filter((g) => g.grade === "rejected");
|
|
4784
|
+
for (const rc of rejectedInWeak) {
|
|
4785
|
+
insertDeadEnd(
|
|
4786
|
+
db,
|
|
4787
|
+
exp.id,
|
|
4788
|
+
`${rc.component} (iteration within ${exp.slug})`,
|
|
4789
|
+
rc.notes ?? "rejected by verifier",
|
|
4790
|
+
`Component ${rc.component} rejected: ${rc.notes ?? "approach does not work"}`,
|
|
4791
|
+
exp.sub_type,
|
|
4792
|
+
"structural"
|
|
4793
|
+
);
|
|
4794
|
+
}
|
|
4795
|
+
if (rejectedInWeak.length > 0) {
|
|
4796
|
+
info(`Registered ${rejectedInWeak.length} component-level dead-end(s) from weak verification.`);
|
|
4797
|
+
}
|
|
4798
|
+
const deadApproaches = parseSynthesiserDeadApproaches(guidance.output);
|
|
4799
|
+
for (const da of deadApproaches) {
|
|
4800
|
+
insertDeadEnd(
|
|
4801
|
+
db,
|
|
4802
|
+
exp.id,
|
|
4803
|
+
da.approach,
|
|
4804
|
+
da.reason,
|
|
4805
|
+
da.reason,
|
|
4806
|
+
exp.sub_type,
|
|
4807
|
+
"structural"
|
|
4808
|
+
);
|
|
4809
|
+
}
|
|
4810
|
+
if (deadApproaches.length > 0) {
|
|
4811
|
+
info(`Registered ${deadApproaches.length} dead approach(es) from synthesiser.`);
|
|
4812
|
+
}
|
|
4714
4813
|
})();
|
|
4715
|
-
warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance
|
|
4814
|
+
warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance accumulated.`);
|
|
4716
4815
|
break;
|
|
4717
4816
|
}
|
|
4718
4817
|
case "rejected": {
|
|
@@ -4805,7 +4904,7 @@ ${gaps}
|
|
|
4805
4904
|
`;
|
|
4806
4905
|
fs10.writeFileSync(fragPath, content + entry);
|
|
4807
4906
|
}
|
|
4808
|
-
var fs10, path10, import_node_child_process4;
|
|
4907
|
+
var fs10, path10, import_node_child_process4, GUIDANCE_MAX_CHARS;
|
|
4809
4908
|
var init_resolve = __esm({
|
|
4810
4909
|
"src/resolve.ts"() {
|
|
4811
4910
|
"use strict";
|
|
@@ -4820,6 +4919,7 @@ var init_resolve = __esm({
|
|
|
4820
4919
|
import_node_child_process4 = require("child_process");
|
|
4821
4920
|
init_git();
|
|
4822
4921
|
init_format();
|
|
4922
|
+
GUIDANCE_MAX_CHARS = 12e3;
|
|
4823
4923
|
}
|
|
4824
4924
|
});
|
|
4825
4925
|
|
|
@@ -6697,7 +6797,7 @@ DO NOT read source code or use tools. All context you need is above. Plan from t
|
|
|
6697
6797
|
- It must be specific and actionable \u2014 name the function or mechanism to change
|
|
6698
6798
|
- Do NOT reference specific line numbers \u2014 they shift between experiments
|
|
6699
6799
|
- The hypothesis should be a single sentence describing what to do, e.g.:
|
|
6700
|
-
|
|
6800
|
+
"Replace the O(n^2) pairwise comparison in filterCandidates() with an interval-tree lookup"
|
|
6701
6801
|
|
|
6702
6802
|
CRITICAL: Your LAST line of output MUST be EXACTLY this format (on its own line, nothing after it):
|
|
6703
6803
|
<!-- majlis-json {"goal_met": false, "hypothesis": "your single-sentence hypothesis here"} -->
|