npm - majlis - Versions diffs - 0.8.1 → 0.8.3 - Mend

majlis 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/cli.js +124 -24
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -531,7 +531,7 @@ Read source code at the specific locations relevant to your change. Do NOT
 read the entire codebase or run diagnostic Python scripts. If the synthesis
 says "lines 1921-22" then read those lines and their context. That's it.
-Do NOT read raw data files (fixtures/, ground truth JSON/STL). The synthesis
+Do NOT read raw data files (fixtures/, ground truth, test data). The synthesis
 has the relevant facts. Reading raw data wastes turns re-deriving what the
 doubt/challenge/verify cycle already established.
@@ -570,7 +570,7 @@ Do NOT iterate. Do NOT "try one more thing." The adversary, critic, and verifier
 exist to diagnose what went wrong. The cycle comes back to you with their insights.
 ## Off-limits (DO NOT modify)
-- \`fixtures/\` \u2014 test data, ground truth, STL files. Read-only.
+- \`fixtures/\` \u2014 test data, ground truth, reference outputs. Read-only.
 - \`scripts/benchmark.py\` \u2014 the measurement tool. Never change how you're measured.
 - \`.majlis/\` \u2014 framework config. Not your concern.
@@ -1153,9 +1153,9 @@ falsifiable structural constraint that blocks future experiments from repeating
 ## Constraint Quality
 Good constraints are specific and block future repetition:
-- "L6 config space is null \u2014 13-eval Bayesian sweep found all 12 params insensitive (ls=1.27), score ceiling 0.67"
-- "Relaxing curvature split threshold in recursive_curvature_split causes false splits on pure-surface thin strips (seg_pct 95->72.5)"
-- "Torus topology prevents genus-0 assumption for manifold extraction"
+- "Binary search fallback in sortedMerge() is O(n log n) worst-case when input contains >40% duplicates \u2014 measured via 13-run sweep, ceiling 0.67x baseline"
+- "Relaxing the pruning threshold in recursiveSplit() causes false positives on sparse inputs (accuracy 95->72.5%)"
+- "Cyclic dependency in the module graph prevents topological sort \u2014 requires at least one back-edge cut"
 Bad constraints are vague and useless:
 - "Didn't work"
@@ -2436,7 +2436,7 @@ ${taskPrompt}`;
 }
 async function spawnSynthesiser(context, projectRoot, opts) {
   const root = projectRoot ?? findProjectRoot() ?? process.cwd();
-  const maxTurns = opts?.maxTurns ?? 5;
+  const maxTurns = opts?.maxTurns ?? 15;
   const tools = opts?.tools ?? ["Read", "Glob", "Grep"];
   const contextJson = JSON.stringify(context);
   const taskPrompt = context.taskPrompt ?? "Synthesise the findings into actionable builder guidance.";
@@ -2447,7 +2447,7 @@ ${contextJson}
 \`\`\`
 ${taskPrompt}`;
-  const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
+  const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy.\n\nIf any approach from this iteration is PROVABLY dead (not just needs refinement \u2014 structurally impossible or fundamentally wrong), flag it on its own line as:\n[DEAD-APPROACH] approach name: why it cannot work\n\nOnly use [DEAD-APPROACH] for approaches that are mathematically impossible, violate invariants, or have been empirically proven to fail. Do NOT flag approaches that merely need adjustment.\n\nCRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
   console.log(`[synthesiser] Spawning (maxTurns: ${maxTurns})...`);
   const { text: markdown, costUsd, truncated } = await runQuery({
     prompt,
@@ -2540,7 +2540,7 @@ async function generateSlug(hypothesis, projectRoot) {
 "${hypothesis.slice(0, 500)}"
-Output ONLY the slug, nothing else. Examples: uv-containment-filter, skip-degenerate-faces, fix-edge-sewing-order`,
+Output ONLY the slug, nothing else. Examples: add-index-lookup, skip-empty-batches, fix-retry-backoff`,
       model: "haiku",
       tools: [],
       systemPrompt: "Output only a short hyphenated slug. No explanation, no quotes, no punctuation except hyphens.",
@@ -2615,13 +2615,13 @@ var init_spawn = __esm({
       adversary: 30,
       verifier: 50,
       compressor: 30,
-      reframer: 20,
-      scout: 20,
-      gatekeeper: 10,
+      reframer: 30,
+      scout: 30,
+      gatekeeper: 15,
       diagnostician: 60,
       cartographer: 40,
       toolsmith: 30,
-      postmortem: 20
+      postmortem: 30
     };
     CHECKPOINT_INTERVAL = {
       builder: 12,
@@ -4517,6 +4517,41 @@ var init_metrics = __esm({
 });
 // src/resolve.ts
+function accumulateGuidance(existing, newGuidance) {
+  const iterationNums = existing?.match(/### Iteration (\d+)/g)?.map((m) => parseInt(m.replace("### Iteration ", ""), 10)) ?? [];
+  const maxExisting = iterationNums.length > 0 ? Math.max(...iterationNums) : 0;
+  const iterationNum = maxExisting + 1;
+  const header2 = `### Iteration ${iterationNum} (latest)`;
+  const newBlock = `${header2}
+${newGuidance}`;
+  if (!existing) return newBlock;
+  const cleaned = existing.replace(/ \(latest\)/g, "");
+  const accumulated = `${newBlock}
+---
+${cleaned}`;
+  if (accumulated.length <= GUIDANCE_MAX_CHARS) return accumulated;
+  const sections = accumulated.split(/(?=^### Iteration \d+)/m);
+  let result = "";
+  for (const section of sections) {
+    if (result.length + section.length > GUIDANCE_MAX_CHARS && result.length > 0) {
+      result += "\n\n[Earlier iterations truncated]";
+      break;
+    }
+    result += section;
+  }
+  return result;
+}
+function parseSynthesiserDeadApproaches(output) {
+  const results = [];
+  const regex = /\[DEAD-APPROACH\]\s*(.+?):\s*(.+)/g;
+  let match;
+  while ((match = regex.exec(output)) !== null) {
+    results.push({ approach: match[1].trim(), reason: match[2].trim() });
+  }
+  return results;
+}
 function worstGrade(grades) {
   if (grades.length === 0) {
     throw new Error("Cannot determine grade from empty verification set \u2014 this indicates a data integrity issue");
@@ -4551,11 +4586,12 @@ async function resolve2(db, exp, projectRoot) {
       warn(`  ${v.fixture} / ${v.metric}: ${v.before} \u2192 ${v.after} (${v.delta > 0 ? "+" : ""}${v.delta})`);
     }
     updateExperimentStatus(db, exp.id, "resolved");
-    const guidanceText = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
+    const gateGuidance = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
 ` + gateViolations.map((v) => `- ${v.fixture} / ${v.metric}: was ${v.before}, now ${v.after}`).join("\n");
+    const accumulatedGate = accumulateGuidance(exp.builder_guidance, gateGuidance);
     transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
     db.transaction(() => {
-      storeBuilderGuidance(db, exp.id, guidanceText);
+      storeBuilderGuidance(db, exp.id, accumulatedGate);
       updateExperimentStatus(db, exp.id, "building");
       if (exp.sub_type) {
         incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
@@ -4598,16 +4634,47 @@ async function resolve2(db, exp, projectRoot) {
         confirmedDoubts,
         taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
       }, projectRoot);
-      const guidanceText = guidance.structured?.guidance ?? guidance.output;
+      const rawGuidance = guidance.structured?.guidance ?? guidance.output;
+      const accumulated = accumulateGuidance(exp.builder_guidance, rawGuidance);
       transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
       db.transaction(() => {
-        storeBuilderGuidance(db, exp.id, guidanceText);
+        storeBuilderGuidance(db, exp.id, accumulated);
         updateExperimentStatus(db, exp.id, "building");
         if (exp.sub_type) {
           incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
         }
+        const rejectedInWeak = grades.filter((g) => g.grade === "rejected");
+        for (const rc of rejectedInWeak) {
+          insertDeadEnd(
+            db,
+            exp.id,
+            `${rc.component} (iteration within ${exp.slug})`,
+            rc.notes ?? "rejected by verifier",
+            `Component ${rc.component} rejected: ${rc.notes ?? "approach does not work"}`,
+            exp.sub_type,
+            "structural"
+          );
+        }
+        if (rejectedInWeak.length > 0) {
+          info(`Registered ${rejectedInWeak.length} component-level dead-end(s) from weak verification.`);
+        }
+        const deadApproaches = parseSynthesiserDeadApproaches(guidance.output);
+        for (const da of deadApproaches) {
+          insertDeadEnd(
+            db,
+            exp.id,
+            da.approach,
+            da.reason,
+            da.reason,
+            exp.sub_type,
+            "structural"
+          );
+        }
+        if (deadApproaches.length > 0) {
+          info(`Registered ${deadApproaches.length} dead approach(es) from synthesiser.`);
+        }
       })();
-      warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance generated for builder.`);
+      warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance accumulated for builder.`);
       break;
     }
     case "rejected": {
@@ -4660,11 +4727,12 @@ async function resolveDbOnly(db, exp, projectRoot) {
       warn(`  ${v.fixture} / ${v.metric}: ${v.before} \u2192 ${v.after} (${v.delta > 0 ? "+" : ""}${v.delta})`);
     }
     updateExperimentStatus(db, exp.id, "resolved");
-    const guidanceText = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
+    const swarmGateGuidance = `Gate fixture regression blocks merge. Fix these regressions before re-attempting:
 ` + gateViolations.map((v) => `- ${v.fixture} / ${v.metric}: was ${v.before}, now ${v.after}`).join("\n");
+    const accumulatedSwarmGate = accumulateGuidance(exp.builder_guidance, swarmGateGuidance);
     transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
     db.transaction(() => {
-      storeBuilderGuidance(db, exp.id, guidanceText);
+      storeBuilderGuidance(db, exp.id, accumulatedSwarmGate);
       updateExperimentStatus(db, exp.id, "building");
       if (exp.sub_type) {
         incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
@@ -4703,16 +4771,47 @@ async function resolveDbOnly(db, exp, projectRoot) {
         confirmedDoubts,
         taskPrompt: "Synthesise the verification report, confirmed doubts, and adversarial case results into specific, actionable guidance for the builder's next attempt. Be concrete: which specific decisions need revisiting, which assumptions broke, and what constraints must the next approach satisfy."
       }, projectRoot);
-      const guidanceText = guidance.structured?.guidance ?? guidance.output;
+      const rawGuidance = guidance.structured?.guidance ?? guidance.output;
+      const accumulated = accumulateGuidance(exp.builder_guidance, rawGuidance);
       transition("resolved" /* RESOLVED */, "building" /* BUILDING */);
       db.transaction(() => {
-        storeBuilderGuidance(db, exp.id, guidanceText);
+        storeBuilderGuidance(db, exp.id, accumulated);
         updateExperimentStatus(db, exp.id, "building");
         if (exp.sub_type) {
           incrementSubTypeFailure(db, exp.sub_type, exp.id, "weak");
         }
+        const rejectedInWeak = grades.filter((g) => g.grade === "rejected");
+        for (const rc of rejectedInWeak) {
+          insertDeadEnd(
+            db,
+            exp.id,
+            `${rc.component} (iteration within ${exp.slug})`,
+            rc.notes ?? "rejected by verifier",
+            `Component ${rc.component} rejected: ${rc.notes ?? "approach does not work"}`,
+            exp.sub_type,
+            "structural"
+          );
+        }
+        if (rejectedInWeak.length > 0) {
+          info(`Registered ${rejectedInWeak.length} component-level dead-end(s) from weak verification.`);
+        }
+        const deadApproaches = parseSynthesiserDeadApproaches(guidance.output);
+        for (const da of deadApproaches) {
+          insertDeadEnd(
+            db,
+            exp.id,
+            da.approach,
+            da.reason,
+            da.reason,
+            exp.sub_type,
+            "structural"
+          );
+        }
+        if (deadApproaches.length > 0) {
+          info(`Registered ${deadApproaches.length} dead approach(es) from synthesiser.`);
+        }
       })();
-      warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance generated.`);
+      warn(`Experiment ${exp.slug} CYCLING BACK (weak). Guidance accumulated.`);
       break;
     }
     case "rejected": {
@@ -4805,7 +4904,7 @@ ${gaps}
 `;
   fs10.writeFileSync(fragPath, content + entry);
 }
-var fs10, path10, import_node_child_process4;
+var fs10, path10, import_node_child_process4, GUIDANCE_MAX_CHARS;
 var init_resolve = __esm({
   "src/resolve.ts"() {
     "use strict";
@@ -4820,6 +4919,7 @@ var init_resolve = __esm({
     import_node_child_process4 = require("child_process");
     init_git();
     init_format();
+    GUIDANCE_MAX_CHARS = 12e3;
   }
 });
@@ -6697,7 +6797,7 @@ DO NOT read source code or use tools. All context you need is above. Plan from t
    - It must be specific and actionable \u2014 name the function or mechanism to change
    - Do NOT reference specific line numbers \u2014 they shift between experiments
    - The hypothesis should be a single sentence describing what to do, e.g.:
-     "Activate addSeamEdges() in the runEdgeFirst pipeline for full-revolution cylinder faces"
+      "Replace the O(n^2) pairwise comparison in filterCandidates() with an interval-tree lookup"
 CRITICAL: Your LAST line of output MUST be EXACTLY this format (on its own line, nothing after it):
 <!-- majlis-json {"goal_met": false, "hypothesis": "your single-sentence hypothesis here"} -->

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "majlis",
-  "version": "0.8.1",
+  "version": "0.8.3",
   "description": "Multi-agent workflow CLI for structured doubt, independent verification, and compressed knowledge",
   "bin": {
     "majlis": "./dist/cli.js"