majlis 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +124 -24
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -531,7 +531,7 @@ Read source code at the specific locations relevant to your change. Do NOT
531
531
  read the entire codebase or run diagnostic Python scripts. If the synthesis
532
532
  says "lines 1921-22" then read those lines and their context. That's it.
533
533
 
534
- Do NOT read raw data files (fixtures/, ground truth JSON/STL). The synthesis
534
+ Do NOT read raw data files (fixtures/, ground truth, test data). The synthesis
535
535
  has the relevant facts. Reading raw data wastes turns re-deriving what the
536
536
  doubt/challenge/verify cycle already established.
537
537
 
@@ -570,7 +570,7 @@ Do NOT iterate. Do NOT "try one more thing." The adversary, critic, and verifier
570
570
  exist to diagnose what went wrong. The cycle comes back to you with their insights.
571
571
 
572
572
  ## Off-limits (DO NOT modify)
573
- - \`fixtures/\` \u2014 test data, ground truth, STL files. Read-only.
573
+ - \`fixtures/\` \u2014 test data, ground truth, reference outputs. Read-only.
574
574
  - \`scripts/benchmark.py\` \u2014 the measurement tool. Never change how you're measured.
575
575
  - \`.majlis/\` \u2014 framework config. Not your concern.
576
576
 
@@ -1153,9 +1153,9 @@ falsifiable structural constraint that blocks future experiments from repeating
1153
1153
  ## Constraint Quality
1154
1154
 
1155
1155
  Good constraints are specific and block future repetition:
1156
- - "L6 config space is null \u2014 13-eval Bayesian sweep found all 12 params insensitive (ls=1.27), score ceiling 0.67"
1157
- - "Relaxing curvature split threshold in recursive_curvature_split causes false splits on pure-surface thin strips (seg_pct 95->72.5)"
1158
- - "Torus topology prevents genus-0 assumption for manifold extraction"
1156
+ - "Binary search fallback in sortedMerge() is O(n log n) worst-case when input contains >40% duplicates \u2014 measured via 13-run sweep, ceiling 0.67x baseline"
1157
+ - "Relaxing the pruning threshold in recursiveSplit() causes false positives on sparse inputs (accuracy 95->72.5%)"
1158
+ - "Cyclic dependency in the module graph prevents topological sort \u2014 requires at least one back-edge cut"
1159
1159
 
1160
1160
  Bad constraints are vague and useless:
1161
1161
  - "Didn't work"
@@ -2436,7 +2436,7 @@ ${taskPrompt}`;
2436
2436
  }
2437
2437
  async function spawnSynthesiser(context, projectRoot, opts) {
2438
2438
  const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2439
- const maxTurns = opts?.maxTurns ?? 5;
2439
+ const maxTurns = opts?.maxTurns ?? 15;
2440
2440
  const tools = opts?.tools ?? ["Read", "Glob", "Grep"];
2441
2441
  const contextJson = JSON.stringify(context);
2442
2442
  const taskPrompt = context.taskPrompt ?? "Synthesise the findings into actionable builder guidance.";
@@ -2447,7 +2447,7 @@ ${contextJson}
2447
2447
  \`\`\`
2448
2448
 
2449
2449
  ${taskPrompt}`;
2450
- const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2450
+ const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy.\n\nIf any approach from this iteration is PROVABLY dead (not just needs refinement \u2014 structurally impossible or fundamentally wrong), flag it on its own line as:\n[DEAD-APPROACH] approach name: why it cannot work\n\nOnly use [DEAD-APPROACH] for approaches that are mathematically impossible, violate invariants, or have been empirically proven to fail. Do NOT flag approaches that merely need adjustment.\n\nCRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2451
2451
  console.log(`[synthesiser] Spawning (maxTurns: ${maxTurns})...`);
2452
2452
  const { text: markdown, costUsd, truncated } = await runQuery({
2453
2453
  prompt,
@@ -2540,7 +2540,7 @@ async function generateSlug(hypothesis, projectRoot) {
2540
2540
 
2541
2541
  "${hypothesis.slice(0, 500)}"
2542
2542
 
2543
- Output ONLY the slug, nothing else. Examples: uv-containment-filter, skip-degenerate-faces, fix-edge-sewing-order`,
2543
+ Output ONLY the slug, nothing else. Examples: add-index-lookup, skip-empty-batches, fix-retry-backoff`,
2544
2544
  model: "haiku",
2545
2545
  tools: [],
2546
2546
  systemPrompt: "Output only a short hyphenated slug. No explanation, no quotes, no punctuation except hyphens.",
@@ -2615,13 +2615,13 @@ var init_spawn = __esm({
2615
2615
  adversary: 30,
2616
2616
  verifier: 50,
2617
2617
  compressor: 30,
2618
- reframer: 20,
2619
- scout: 20,
2620
- gatekeeper: 10,
2618
+ reframer: 30,
2619
+ scout: 30,
2620
+ gatekeeper: 15,
2621
2621
  diagnostician: 60,
2622
2622
  cartographer: 40,
2623
2623
  toolsmith: 30,
2624
- postmortem: 20
2624
+ postmortem: 30
2625
2625
  };
2626
2626
  CHECKPOINT_INTERVAL = {
2627
2627
  builder: 12,
@@ -4517,6 +4517,41 @@ var init_metrics = __esm({
4517
4517
  });
4518
4518
 
4519
4519
  // src/resolve.ts
4520
+ function accumulateGuidance(existing, newGuidance) {
4521
+ const iterationNums = existing?.match(/### Iteration (\d+)/g)?.map((m) => parseInt(m.replace("### Iteration ", ""), 10)) ?? [];
4522
+ const maxExisting = iterationNums.length > 0 ? Math.max(...iterationNums) : 0;
4523
+ const iterationNum = maxExisting + 1;
4524
+ const header2 = `### Iteration ${iterationNum} (latest)`;
4525
+ const newBlock = `${header2}
4526
+ ${newGuidance}`;
4527
+ if (!existing) return newBlock;
4528
+ const cleaned = existing.replace(/ \(latest\)/g, "");
4529
+ const accumulated = `${newBlock}
4530
+
4531
+ ---
4532
+
4533
+ ${cleaned}`;
4534
+ if (accumulated.length <= GUIDANCE_MAX_CHARS) return accumulated;
4535
+ const sections = accumulated.split(/(?=^### Iteration \d+)/m);
4536
+ let result = "";
4537
+ for (const section of sections) {
4538
+ if (result.length + section.length > GUIDANCE_MAX_CHARS && result.length > 0) {
4539
+ result += "\n\n[Earlier iterations truncated]";
4540
+ break;
4541
+ }
4542
+ result += section;
4543
+ }
4544
+ return result;
4545
+ }
4546
+ function parseSynthesiserDeadApproaches(output) {
4547
+ const results = [];
4548
+ const regex = /\[DEAD-APPROACH\]\s*(.+?):\s*(.+)/g;
4549
+ let match;
4550
+ while ((match = regex.exec(output)) !== null) {
4551
+ results.push({ approach: match[1].trim(), reason: match[2].trim() });
4552
+ }
4553
+ return results;
4554
+ }
4520
4555
  function worstGrade(grades) {
4521
4556
  if (grades.length === 0) {
4522
4557
  throw new Error("Cannot determine grade from empty verification set \u2014 this indicates a data integrity issue");
@@ -4551,11 +4586,12 @@ async function resolve2(db, exp, projectRoot) {
4551
4586
  warn(` ${v.fixture} / ${v.metric}: ${v.before} \u2192 ${v.after} (${v.delta > 0 ? "+" : ""}${v.delta})`);
4552
4587
  }
4553
4588
  updateExperimentStatus(db, exp.id, "resolved");
4554
- const guidanceText = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
4589
+ const gateGuidance = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
4555
4590
  ` + gateViolations.map((v) => `- ${v.fixture} / ${v.metric}: was ${v.before}, now ${v.after}`).join("\n");
4591
+ const accumulatedGate = accumulateGuidance(exp.builder_guidance, gateGuidance);
4556
4592
  transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
4557
4593
  db.transaction(() => {
4558
- storeBuilderGuidance(db, exp.id, guidanceText);
4594
+ storeBuilderGuidance(db, exp.id, accumulatedGate);
4559
4595
  updateExperimentStatus(db, exp.id, "building");
4560
4596
  if (exp.sub_type) {
4561
4597
  incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
@@ -4598,16 +4634,47 @@ async function resolve2(db, exp, projectRoot) {
4598
4634
  confirmedDoubts,
4599
4635
  taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
4600
4636
  }, projectRoot);
4601
- const guidanceText = guidance.structured?.guidance ?? guidance.output;
4637
+ const rawGuidance = guidance.structured?.guidance ?? guidance.output;
4638
+ const accumulated = accumulateGuidance(exp.builder_guidance, rawGuidance);
4602
4639
  transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
4603
4640
  db.transaction(() => {
4604
- storeBuilderGuidance(db, exp.id, guidanceText);
4641
+ storeBuilderGuidance(db, exp.id, accumulated);
4605
4642
  updateExperimentStatus(db, exp.id, "building");
4606
4643
  if (exp.sub_type) {
4607
4644
  incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
4608
4645
  }
4646
+ const rejectedInWeak = grades.filter((g) => g.grade === "rejected");
4647
+ for (const rc of rejectedInWeak) {
4648
+ insertDeadEnd(
4649
+ db,
4650
+ exp.id,
4651
+ `${rc.component} (iteration within ${exp.slug})`,
4652
+ rc.notes ?? "rejected by verifier",
4653
+ `Component ${rc.component} rejected: ${rc.notes ?? "approach does not work"}`,
4654
+ exp.sub_type,
4655
+ "structural"
4656
+ );
4657
+ }
4658
+ if (rejectedInWeak.length > 0) {
4659
+ info(`Registered ${rejectedInWeak.length} component-level dead-end(s) from weak verification.`);
4660
+ }
4661
+ const deadApproaches = parseSynthesiserDeadApproaches(guidance.output);
4662
+ for (const da of deadApproaches) {
4663
+ insertDeadEnd(
4664
+ db,
4665
+ exp.id,
4666
+ da.approach,
4667
+ da.reason,
4668
+ da.reason,
4669
+ exp.sub_type,
4670
+ "structural"
4671
+ );
4672
+ }
4673
+ if (deadApproaches.length > 0) {
4674
+ info(`Registered ${deadApproaches.length} dead approach(es) from synthesiser.`);
4675
+ }
4609
4676
  })();
4610
- warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance generated for builder.`);
4677
+ warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance accumulated for builder.`);
4611
4678
  break;
4612
4679
  }
4613
4680
  case "rejected": {
@@ -4660,11 +4727,12 @@ async function resolveDbOnly(db, exp, projectRoot) {
4660
4727
  warn(` ${v.fixture} / ${v.metric}: ${v.before} \u2192 ${v.after} (${v.delta > 0 ? "+" : ""}${v.delta})`);
4661
4728
  }
4662
4729
  updateExperimentStatus(db, exp.id, "resolved");
4663
- const guidanceText = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
4730
+ const swarmGateGuidance = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
4664
4731
  ` + gateViolations.map((v) => `- ${v.fixture} / ${v.metric}: was ${v.before}, now ${v.after}`).join("\n");
4732
+ const accumulatedSwarmGate = accumulateGuidance(exp.builder_guidance, swarmGateGuidance);
4665
4733
  transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
4666
4734
  db.transaction(() => {
4667
- storeBuilderGuidance(db, exp.id, guidanceText);
4735
+ storeBuilderGuidance(db, exp.id, accumulatedSwarmGate);
4668
4736
  updateExperimentStatus(db, exp.id, "building");
4669
4737
  if (exp.sub_type) {
4670
4738
  incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
@@ -4703,16 +4771,47 @@ async function resolveDbOnly(db, exp, projectRoot) {
4703
4771
  confirmedDoubts,
4704
4772
  taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
4705
4773
  }, projectRoot);
4706
- const guidanceText = guidance.structured?.guidance ?? guidance.output;
4774
+ const rawGuidance = guidance.structured?.guidance ?? guidance.output;
4775
+ const accumulated = accumulateGuidance(exp.builder_guidance, rawGuidance);
4707
4776
  transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
4708
4777
  db.transaction(() => {
4709
- storeBuilderGuidance(db, exp.id, guidanceText);
4778
+ storeBuilderGuidance(db, exp.id, accumulated);
4710
4779
  updateExperimentStatus(db, exp.id, "building");
4711
4780
  if (exp.sub_type) {
4712
4781
  incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
4713
4782
  }
4783
+ const rejectedInWeak = grades.filter((g) => g.grade === "rejected");
4784
+ for (const rc of rejectedInWeak) {
4785
+ insertDeadEnd(
4786
+ db,
4787
+ exp.id,
4788
+ `${rc.component} (iteration within ${exp.slug})`,
4789
+ rc.notes ?? "rejected by verifier",
4790
+ `Component ${rc.component} rejected: ${rc.notes ?? "approach does not work"}`,
4791
+ exp.sub_type,
4792
+ "structural"
4793
+ );
4794
+ }
4795
+ if (rejectedInWeak.length > 0) {
4796
+ info(`Registered ${rejectedInWeak.length} component-level dead-end(s) from weak verification.`);
4797
+ }
4798
+ const deadApproaches = parseSynthesiserDeadApproaches(guidance.output);
4799
+ for (const da of deadApproaches) {
4800
+ insertDeadEnd(
4801
+ db,
4802
+ exp.id,
4803
+ da.approach,
4804
+ da.reason,
4805
+ da.reason,
4806
+ exp.sub_type,
4807
+ "structural"
4808
+ );
4809
+ }
4810
+ if (deadApproaches.length > 0) {
4811
+ info(`Registered ${deadApproaches.length} dead approach(es) from synthesiser.`);
4812
+ }
4714
4813
  })();
4715
- warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance generated.`);
4814
+ warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance accumulated.`);
4716
4815
  break;
4717
4816
  }
4718
4817
  case "rejected": {
@@ -4805,7 +4904,7 @@ ${gaps}
4805
4904
  `;
4806
4905
  fs10.writeFileSync(fragPath, content + entry);
4807
4906
  }
4808
- var fs10, path10, import_node_child_process4;
4907
+ var fs10, path10, import_node_child_process4, GUIDANCE_MAX_CHARS;
4809
4908
  var init_resolve = __esm({
4810
4909
  "src/resolve.ts"() {
4811
4910
  "use strict";
@@ -4820,6 +4919,7 @@ var init_resolve = __esm({
4820
4919
  import_node_child_process4 = require("child_process");
4821
4920
  init_git();
4822
4921
  init_format();
4922
+ GUIDANCE_MAX_CHARS = 12e3;
4823
4923
  }
4824
4924
  });
4825
4925
 
@@ -6697,7 +6797,7 @@ DO NOT read source code or use tools. All context you need is above. Plan from t
6697
6797
  - It must be specific and actionable \u2014 name the function or mechanism to change
6698
6798
  - Do NOT reference specific line numbers \u2014 they shift between experiments
6699
6799
  - The hypothesis should be a single sentence describing what to do, e.g.:
6700
- "Activate addSeamEdges() in the runEdgeFirst pipeline for full-revolution cylinder faces"
6800
+ "Replace the O(n^2) pairwise comparison in filterCandidates() with an interval-tree lookup"
6701
6801
 
6702
6802
  CRITICAL: Your LAST line of output MUST be EXACTLY this format (on its own line, nothing after it):
6703
6803
  <!-- majlis-json {"goal_met": false, "hypothesis": "your single-sentence hypothesis here"} -->
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "majlis",
3
- "version": "0.8.1",
3
+ "version": "0.8.3",
4
4
  "description": "Multi-agent workflow CLI for structured doubt, independent verification, and compressed knowledge",
5
5
  "bin": {
6
6
  "majlis": "./dist/cli.js"