npm - majlis - Versions diffs - 0.3.3 → 0.4.1 - Mend

majlis 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/cli.js +228 -47
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -529,8 +529,9 @@ You get ONE attempt per cycle. Your job is:
 2. Form ONE hypothesis about what to fix
 3. Implement ONE focused change (not a multi-step debug session)
 4. Run the benchmark ONCE to see the result
-5. Document what you did, what happened, and what you think the next step should be
-6. STOP
+5. Update the experiment doc in docs/experiments/ \u2014 fill in Approach, Results, and Metrics sections. This is NOT optional.
+6. Output the structured majlis-json block with your decisions
+7. STOP
 Do NOT iterate. Do NOT try multiple approaches. Do NOT debug your own fix.
 If your change doesn't work, document why and let the cycle continue \u2014
@@ -625,21 +626,28 @@ tools: [Read, Glob, Grep, Bash]
 ---
 You are the Verifier. Perform dual verification:
-PROVENANCE CHECK:
+## Scope Constraint (CRITICAL)
+You must produce your structured output (grades + doubt resolutions) within your turn budget.
+Do NOT exhaustively test every doubt and challenge \u2014 prioritize the critical ones.
+For each doubt/challenge: one targeted check is enough. Confirm, dismiss, or mark inconclusive.
+Reserve your final turns for writing the structured majlis-json output.
+The framework saves your output automatically. Do NOT attempt to write files.
+## PROVENANCE CHECK:
 - Can every piece of code trace to an experiment or decision?
 - Is the chain unbroken from requirement -> classification -> experiment -> code?
 - Flag any broken chains.
-CONTENT CHECK:
+## CONTENT CHECK:
 - Does the code do what the experiment log says?
-- Do tests demonstrate the hypothesis?
-- Write and run targeted tests against the critic's doubts AND the adversary's cases.
+- Run at most 3-5 targeted diagnostic scripts, focused on the critical doubts/challenges.
+- Do NOT run exhaustive diagnostics on every claim.
 Grade each component: sound / good / weak / rejected
 Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
-Produce your verification report as output. The framework saves it automatically.
 ## Structured Output Format
 <!-- majlis-json
 {
@@ -2285,13 +2293,15 @@ ${contextJson}
 \`\`\`
 ${taskPrompt}`;
-  console.log(`[majlis] Spawning ${role} agent (model: ${agentDef.model})...`);
+  const turns = ROLE_MAX_TURNS[role] ?? 15;
+  console.log(`[majlis] Spawning ${role} agent (model: ${agentDef.model}, maxTurns: ${turns})...`);
   const { text: markdown, costUsd } = await runQuery({
     prompt,
     model: agentDef.model,
     tools: agentDef.tools,
     systemPrompt: agentDef.systemPrompt,
-    cwd: root
+    cwd: root,
+    maxTurns: turns
   });
   console.log(`[majlis] ${role} agent complete (cost: $${costUsd.toFixed(4)})`);
   const artifactPath = writeArtifact(role, context, markdown, root);
@@ -2312,14 +2322,15 @@ ${contextJson}
 \`\`\`
 ${taskPrompt}`;
-  const systemPrompt = "You are a Synthesis Agent. Your job is to take a verification report, confirmed doubts, and adversarial test results, and compress them into specific, actionable guidance for the builder's next attempt. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. Output a 'guidance' field in JSON wrapped in a <!-- majlis-json --> block.";
+  const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
   console.log(`[majlis] Spawning synthesiser micro-agent...`);
   const { text: markdown, costUsd } = await runQuery({
     prompt,
     model: "opus",
     tools: ["Read", "Glob", "Grep"],
     systemPrompt,
-    cwd: root
+    cwd: root,
+    maxTurns: 5
   });
   console.log(`[majlis] Synthesiser complete (cost: $${costUsd.toFixed(4)})`);
   const structured = await extractStructuredData("synthesiser", markdown);
@@ -2339,7 +2350,7 @@ async function runQuery(opts) {
       cwd: opts.cwd,
       permissionMode: "bypassPermissions",
       allowDangerouslySkipPermissions: true,
-      maxTurns: 30,
+      maxTurns: opts.maxTurns ?? 15,
       persistSession: false,
       settingSources: ["project"]
     }
@@ -2439,7 +2450,7 @@ function writeArtifact(role, context, markdown, projectRoot) {
   fs7.writeFileSync(target, markdown);
   return target;
 }
-var fs7, path7, import_claude_agent_sdk2, DIM2, RESET2, CYAN2;
+var fs7, path7, import_claude_agent_sdk2, ROLE_MAX_TURNS, DIM2, RESET2, CYAN2;
 var init_spawn = __esm({
   "src/agents/spawn.ts"() {
     "use strict";
@@ -2448,6 +2459,15 @@ var init_spawn = __esm({
     import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
     init_parse();
     init_connection();
+    ROLE_MAX_TURNS = {
+      builder: 15,
+      critic: 12,
+      adversary: 12,
+      verifier: 15,
+      compressor: 15,
+      reframer: 12,
+      scout: 12
+    };
     DIM2 = "\x1B[2m";
     RESET2 = "\x1B[0m";
     CYAN2 = "\x1B[36m";
@@ -3114,6 +3134,8 @@ async function executeStep(step, exp, root) {
       break;
     case "compressed" /* COMPRESSED */:
       await cycle("compress", []);
+      updateExperimentStatus(getDb(root), exp.id, "compressed");
+      info(`Experiment ${exp.slug} compressed.`);
       break;
     case "reframed" /* REFRAMED */:
       updateExperimentStatus(getDb(root), exp.id, "reframed");
@@ -3169,66 +3191,225 @@ async function run(args) {
     throw new Error('Usage: majlis run "goal description"');
   }
   const db = getDb(root);
-  const MAX_CYCLES = 50;
-  header(`Autonomous Mode \u2014 Goal: ${goal}`);
-  let cycleCount = 0;
-  while (cycleCount < MAX_CYCLES) {
-    cycleCount++;
-    const exp = getLatestExperiment(db);
+  const config = loadConfig7(root);
+  const MAX_EXPERIMENTS = 10;
+  const MAX_STEPS = 200;
+  let experimentCount = 0;
+  let stepCount = 0;
+  header(`Autonomous Mode \u2014 ${goal}`);
+  while (stepCount < MAX_STEPS && experimentCount < MAX_EXPERIMENTS) {
+    stepCount++;
+    let exp = getLatestExperiment(db);
     if (!exp) {
-      warn('No active experiments. Create one with `majlis new "hypothesis"` and re-run.');
-      break;
+      experimentCount++;
+      if (experimentCount > MAX_EXPERIMENTS) {
+        warn(`Reached max experiments (${MAX_EXPERIMENTS}). Stopping.`);
+        break;
+      }
+      const sessionsSinceCompression = getSessionsSinceCompression(db);
+      if (sessionsSinceCompression >= config.cycle.compression_interval) {
+        info("Compressing before next experiment...");
+        await cycle("compress", []);
+      }
+      info(`[Experiment ${experimentCount}/${MAX_EXPERIMENTS}] Planning next experiment...`);
+      const hypothesis = await deriveNextHypothesis(goal, root, db);
+      if (!hypothesis) {
+        success("Planner says the goal has been met. Stopping.");
+        break;
+      }
+      info(`Next hypothesis: ${hypothesis}`);
+      exp = createNewExperiment(db, root, hypothesis);
+      success(`Created experiment #${exp.id}: ${exp.slug}`);
     }
     if (isTerminal(exp.status)) {
       if (exp.status === "merged") {
-        success(`Experiment ${exp.slug} merged successfully.`);
+        success(`Experiment ${exp.slug} merged.`);
       } else if (exp.status === "dead_end") {
-        info(`Experiment ${exp.slug} ended as dead-end.`);
-      }
-      const active = listActiveExperiments(db);
-      if (active.length === 0) {
-        info("No more active experiments. Goal assessment:");
-        info(`Original goal: ${goal}`);
-        warn("Review synthesis and metrics to determine if the goal has been met.");
-        break;
+        info(`Experiment ${exp.slug} dead-ended.`);
       }
-      info(`Continuing with experiment: ${active[0].slug}`);
-    }
-    info(`[Cycle ${cycleCount}/${MAX_CYCLES}] Processing ${exp.slug} (${exp.status})`);
-    await next([exp.slug], false);
-    const updated = getLatestExperiment(db);
-    if (updated && isTerminal(updated.status)) {
       continue;
     }
-    const sessionsSinceCompression = getSessionsSinceCompression(db);
-    if (sessionsSinceCompression >= 5) {
-      warn("Compression due during autonomous run \u2014 consider compressing.");
-    }
+    info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
+    await next([exp.slug], false);
   }
-  if (cycleCount >= MAX_CYCLES) {
-    warn(`Reached maximum cycle count (${MAX_CYCLES}). Stopping autonomous mode.`);
+  if (stepCount >= MAX_STEPS) {
+    warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
   }
   header("Autonomous Mode Complete");
   info(`Goal: ${goal}`);
-  info(`Cycles executed: ${cycleCount}`);
+  info(`Experiments: ${experimentCount}, Steps: ${stepCount}`);
   info("Run `majlis status` to see final state.");
 }
+async function deriveNextHypothesis(goal, root, db) {
+  const synthesis = readFileOrEmpty(path13.join(root, "docs", "synthesis", "current.md"));
+  const fragility = readFileOrEmpty(path13.join(root, "docs", "synthesis", "fragility.md"));
+  const deadEndsDoc = readFileOrEmpty(path13.join(root, "docs", "synthesis", "dead-ends.md"));
+  const deadEnds = listAllDeadEnds(db);
+  const config = loadConfig7(root);
+  let metricsOutput = "";
+  if (config.metrics?.command) {
+    try {
+      metricsOutput = (0, import_node_child_process4.execSync)(config.metrics.command, {
+        cwd: root,
+        encoding: "utf-8",
+        timeout: 6e4,
+        stdio: ["pipe", "pipe", "pipe"]
+      }).trim();
+    } catch {
+      metricsOutput = "(metrics command failed)";
+    }
+  }
+  const result = await spawnSynthesiser({
+    taskPrompt: `You are the Planner for an autonomous Majlis run.
+## Goal
+${goal}
+## Current Metrics
+${metricsOutput || "(no metrics configured)"}
+## Synthesis (what we know so far)
+${synthesis || "(empty \u2014 first experiment)"}
+## Fragility Map (known weak areas)
+${fragility || "(none)"}
+## Dead-End Registry
+${deadEndsDoc || "(none)"}
+## Dead Ends (from DB \u2014 ${deadEnds.length} total)
+${deadEnds.map((d) => `- ${d.approach}: ${d.why_failed} [constraint: ${d.structural_constraint}]`).join("\n") || "(none)"}
+## Your Task
+1. Assess: based on the metrics and synthesis, has the goal been met? Be specific.
+2. If YES \u2014 output the JSON block below with goal_met: true.
+3. If NO \u2014 propose the SINGLE most promising next experiment hypothesis.
+   - It must NOT repeat a dead-ended approach (check the dead-end registry!)
+   - It should attack the weakest point revealed by synthesis/fragility
+   - It must be specific and actionable \u2014 name the exact code/function/mechanism to change
+   - The hypothesis should be a single sentence describing what to do, e.g.:
+     "Activate addSeamEdges() in the runEdgeFirst pipeline for full-revolution cylinder faces"
+CRITICAL: Your LAST line of output MUST be EXACTLY this format (on its own line, nothing after it):
+<!-- majlis-json {"goal_met": false, "hypothesis": "your single-sentence hypothesis here"} -->
+If the goal is met:
+<!-- majlis-json {"goal_met": true, "hypothesis": null} -->`
+  }, root);
+  const structured = result.structured;
+  if (structured?.goal_met === true) {
+    return null;
+  }
+  if (structured?.hypothesis) {
+    return structured.hypothesis;
+  }
+  const jsonMatch = result.output.match(/"hypothesis"\s*:\s*"([^"]+)"/);
+  if (jsonMatch && jsonMatch[1].length > 10) return jsonMatch[1].trim();
+  const blockMatch = result.output.match(/<!--\s*majlis-json\s*(\{[\s\S]*?\})\s*-->/);
+  if (blockMatch) {
+    try {
+      const parsed = JSON.parse(blockMatch[1]);
+      if (parsed.goal_met === true) return null;
+      if (parsed.hypothesis) return parsed.hypothesis;
+    } catch {
+    }
+  }
+  warn("Planner did not return structured output. Retrying with focused prompt...");
+  const retry = await spawnSynthesiser({
+    taskPrompt: `Based on this analysis, output ONLY a single-line JSON block:
+${result.output.slice(-2e3)}
+<!-- majlis-json {"goal_met": false, "hypothesis": "your hypothesis"} -->`
+  }, root);
+  if (retry.structured?.hypothesis) return retry.structured.hypothesis;
+  warn("Could not extract hypothesis. Using goal as fallback.");
+  return goal;
+}
+function createNewExperiment(db, root, hypothesis) {
+  const slug = slugify2(hypothesis);
+  let finalSlug = slug;
+  let attempt = 0;
+  while (getExperimentBySlug(db, finalSlug)) {
+    attempt++;
+    finalSlug = `${slug}-${attempt}`;
+  }
+  const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
+  const num = allExps.count + 1;
+  const paddedNum = String(num).padStart(3, "0");
+  const branch = `exp/${paddedNum}-${finalSlug}`;
+  try {
+    (0, import_node_child_process4.execSync)(`git checkout -b ${branch}`, {
+      cwd: root,
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"]
+    });
+    info(`Created branch: ${branch}`);
+  } catch {
+    warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
+  }
+  const exp = createExperiment(db, finalSlug, branch, hypothesis, null, null);
+  updateExperimentStatus(db, exp.id, "reframed");
+  exp.status = "reframed";
+  const docsDir = path13.join(root, "docs", "experiments");
+  const templatePath = path13.join(docsDir, "_TEMPLATE.md");
+  if (fs13.existsSync(templatePath)) {
+    const template = fs13.readFileSync(templatePath, "utf-8");
+    const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
+    const logPath = path13.join(docsDir, `${paddedNum}-${finalSlug}.md`);
+    fs13.writeFileSync(logPath, logContent);
+    info(`Created experiment log: docs/experiments/${paddedNum}-${finalSlug}.md`);
+  }
+  return exp;
+}
+function readFileOrEmpty(filePath) {
+  try {
+    return fs13.readFileSync(filePath, "utf-8");
+  } catch {
+    return "";
+  }
+}
+function slugify2(text) {
+  return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
+}
+function loadConfig7(projectRoot) {
+  const configPath = path13.join(projectRoot, ".majlis", "config.json");
+  if (!fs13.existsSync(configPath)) {
+    return {
+      project: { name: "", description: "", objective: "" },
+      cycle: {
+        compression_interval: 5,
+        circuit_breaker_threshold: 3,
+        require_doubt_before_verify: true,
+        require_challenge_before_verify: false,
+        auto_baseline_on_new_experiment: true
+      }
+    };
+  }
+  return JSON.parse(fs13.readFileSync(configPath, "utf-8"));
+}
+var fs13, path13, import_node_child_process4;
 var init_run = __esm({
   "src/commands/run.ts"() {
     "use strict";
+    fs13 = __toESM(require("fs"));
+    path13 = __toESM(require("path"));
+    import_node_child_process4 = require("child_process");
     init_connection();
     init_queries();
     init_machine();
     init_next();
+    init_cycle();
+    init_spawn();
     init_format();
   }
 });
 // src/cli.ts
-var fs13 = __toESM(require("fs"));
-var path13 = __toESM(require("path"));
+var fs14 = __toESM(require("fs"));
+var path14 = __toESM(require("path"));
 var VERSION = JSON.parse(
-  fs13.readFileSync(path13.join(__dirname, "..", "package.json"), "utf-8")
+  fs14.readFileSync(path14.join(__dirname, "..", "package.json"), "utf-8")
 ).version;
 async function main() {
   const args = process.argv.slice(2);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "majlis",
-  "version": "0.3.3",
+  "version": "0.4.1",
   "description": "Multi-agent workflow CLI for structured doubt, independent verification, and compressed knowledge",
   "bin": {
     "majlis": "./dist/cli.js"