npm - majlis - Versions diffs - 0.3.2 → 0.4.0 - Mend

majlis 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/cli.js +185 -39
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -529,8 +529,9 @@ You get ONE attempt per cycle. Your job is:
 2. Form ONE hypothesis about what to fix
 3. Implement ONE focused change (not a multi-step debug session)
 4. Run the benchmark ONCE to see the result
-5. Document what you did, what happened, and what you think the next step should be
-6. STOP
+5. Update the experiment doc in docs/experiments/ \u2014 fill in Approach, Results, and Metrics sections. This is NOT optional.
+6. Output the structured majlis-json block with your decisions
+7. STOP
 Do NOT iterate. Do NOT try multiple approaches. Do NOT debug your own fix.
 If your change doesn't work, document why and let the cycle continue \u2014
@@ -625,21 +626,28 @@ tools: [Read, Glob, Grep, Bash]
 ---
 You are the Verifier. Perform dual verification:
-PROVENANCE CHECK:
+## Scope Constraint (CRITICAL)
+You must produce your structured output (grades + doubt resolutions) within your turn budget.
+Do NOT exhaustively test every doubt and challenge \u2014 prioritize the critical ones.
+For each doubt/challenge: one targeted check is enough. Confirm, dismiss, or mark inconclusive.
+Reserve your final turns for writing the structured majlis-json output.
+The framework saves your output automatically. Do NOT attempt to write files.
+## PROVENANCE CHECK:
 - Can every piece of code trace to an experiment or decision?
 - Is the chain unbroken from requirement -> classification -> experiment -> code?
 - Flag any broken chains.
-CONTENT CHECK:
+## CONTENT CHECK:
 - Does the code do what the experiment log says?
-- Do tests demonstrate the hypothesis?
-- Write and run targeted tests against the critic's doubts AND the adversary's cases.
+- Run at most 3-5 targeted diagnostic scripts, focused on the critical doubts/challenges.
+- Do NOT run exhaustive diagnostics on every claim.
 Grade each component: sound / good / weak / rejected
 Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
-Produce your verification report as output. The framework saves it automatically.
 ## Structured Output Format
 <!-- majlis-json
 {
@@ -3169,63 +3177,201 @@ async function run(args) {
     throw new Error('Usage: majlis run "goal description"');
   }
   const db = getDb(root);
-  const MAX_CYCLES = 50;
-  header(`Autonomous Mode \u2014 Goal: ${goal}`);
-  let cycleCount = 0;
-  while (cycleCount < MAX_CYCLES) {
-    cycleCount++;
-    const exp = getLatestExperiment(db);
+  const config = loadConfig7(root);
+  const MAX_EXPERIMENTS = 10;
+  const MAX_STEPS = 200;
+  let experimentCount = 0;
+  let stepCount = 0;
+  header(`Autonomous Mode \u2014 ${goal}`);
+  while (stepCount < MAX_STEPS && experimentCount < MAX_EXPERIMENTS) {
+    stepCount++;
+    let exp = getLatestExperiment(db);
     if (!exp) {
-      warn('No active experiments. Create one with `majlis new "hypothesis"` and re-run.');
-      break;
+      experimentCount++;
+      if (experimentCount > MAX_EXPERIMENTS) {
+        warn(`Reached max experiments (${MAX_EXPERIMENTS}). Stopping.`);
+        break;
+      }
+      const sessionsSinceCompression = getSessionsSinceCompression(db);
+      if (sessionsSinceCompression >= config.cycle.compression_interval) {
+        info("Compressing before next experiment...");
+        await cycle("compress", []);
+      }
+      info(`[Experiment ${experimentCount}/${MAX_EXPERIMENTS}] Planning next experiment...`);
+      const hypothesis = await deriveNextHypothesis(goal, root, db);
+      if (!hypothesis) {
+        success("Planner says the goal has been met. Stopping.");
+        break;
+      }
+      info(`Next hypothesis: ${hypothesis}`);
+      exp = createNewExperiment(db, root, hypothesis);
+      success(`Created experiment #${exp.id}: ${exp.slug}`);
     }
     if (isTerminal(exp.status)) {
       if (exp.status === "merged") {
-        success(`Experiment ${exp.slug} merged successfully.`);
+        success(`Experiment ${exp.slug} merged.`);
       } else if (exp.status === "dead_end") {
-        info(`Experiment ${exp.slug} ended as dead-end.`);
-      }
-      const active = listActiveExperiments(db);
-      if (active.length === 0) {
-        info("No more active experiments. Goal assessment:");
-        info(`Original goal: ${goal}`);
-        warn("Review synthesis and metrics to determine if the goal has been met.");
-        break;
+        info(`Experiment ${exp.slug} dead-ended.`);
       }
-      info(`Continuing with experiment: ${active[0].slug}`);
-    }
-    info(`[Cycle ${cycleCount}/${MAX_CYCLES}] Processing ${exp.slug} (${exp.status})`);
-    await next([exp.slug], false);
-    const updated = getLatestExperiment(db);
-    if (updated && isTerminal(updated.status)) {
       continue;
     }
-    const sessionsSinceCompression = getSessionsSinceCompression(db);
-    if (sessionsSinceCompression >= 5) {
-      warn("Compression due during autonomous run \u2014 consider compressing.");
-    }
+    info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
+    await next([exp.slug], false);
   }
-  if (cycleCount >= MAX_CYCLES) {
-    warn(`Reached maximum cycle count (${MAX_CYCLES}). Stopping autonomous mode.`);
+  if (stepCount >= MAX_STEPS) {
+    warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
   }
   header("Autonomous Mode Complete");
   info(`Goal: ${goal}`);
-  info(`Cycles executed: ${cycleCount}`);
+  info(`Experiments: ${experimentCount}, Steps: ${stepCount}`);
   info("Run `majlis status` to see final state.");
 }
+async function deriveNextHypothesis(goal, root, db) {
+  const synthesis = readFileOrEmpty(path13.join(root, "docs", "synthesis", "current.md"));
+  const fragility = readFileOrEmpty(path13.join(root, "docs", "synthesis", "fragility.md"));
+  const deadEndsDoc = readFileOrEmpty(path13.join(root, "docs", "synthesis", "dead-ends.md"));
+  const deadEnds = listAllDeadEnds(db);
+  const config = loadConfig7(root);
+  let metricsOutput = "";
+  if (config.metrics?.command) {
+    try {
+      metricsOutput = (0, import_node_child_process4.execSync)(config.metrics.command, {
+        cwd: root,
+        encoding: "utf-8",
+        timeout: 6e4,
+        stdio: ["pipe", "pipe", "pipe"]
+      }).trim();
+    } catch {
+      metricsOutput = "(metrics command failed)";
+    }
+  }
+  const result = await spawnSynthesiser({
+    taskPrompt: `You are the Planner for an autonomous Majlis run.
+## Goal
+${goal}
+## Current Metrics
+${metricsOutput || "(no metrics configured)"}
+## Synthesis (what we know so far)
+${synthesis || "(empty \u2014 first experiment)"}
+## Fragility Map (known weak areas)
+${fragility || "(none)"}
+## Dead-End Registry
+${deadEndsDoc || "(none)"}
+## Dead Ends (from DB \u2014 ${deadEnds.length} total)
+${deadEnds.map((d) => `- ${d.approach}: ${d.why_failed} [constraint: ${d.structural_constraint}]`).join("\n") || "(none)"}
+## Your Task
+1. Assess: based on the metrics and synthesis, has the goal been met? Be specific.
+2. If YES \u2014 output: <!-- majlis-json {"goal_met": true, "hypothesis": null} -->
+3. If NO \u2014 propose the SINGLE most promising next experiment hypothesis.
+   - It must NOT repeat a dead-ended approach
+   - It should attack the weakest point revealed by synthesis/fragility
+   - It should be specific and actionable (not vague)
+   - Output: <!-- majlis-json {"goal_met": false, "hypothesis": "your hypothesis here"} -->
+IMPORTANT: You MUST output the <!-- majlis-json --> block. This is how the framework reads your decision.`
+  }, root);
+  const structured = result.structured;
+  if (structured?.goal_met === true) {
+    return null;
+  }
+  if (structured?.hypothesis) {
+    return structured.hypothesis;
+  }
+  const match = result.output.match(/hypothesis["\s:]+([^"}\n]+)/i);
+  if (match) return match[1].trim();
+  warn("Planner did not return a structured hypothesis. Using goal as fallback.");
+  return goal;
+}
+function createNewExperiment(db, root, hypothesis) {
+  const slug = slugify2(hypothesis);
+  let finalSlug = slug;
+  let attempt = 0;
+  while (getExperimentBySlug(db, finalSlug)) {
+    attempt++;
+    finalSlug = `${slug}-${attempt}`;
+  }
+  const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
+  const num = allExps.count + 1;
+  const paddedNum = String(num).padStart(3, "0");
+  const branch = `exp/${paddedNum}-${finalSlug}`;
+  try {
+    (0, import_node_child_process4.execSync)(`git checkout -b ${branch}`, {
+      cwd: root,
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"]
+    });
+    info(`Created branch: ${branch}`);
+  } catch {
+    warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
+  }
+  const exp = createExperiment(db, finalSlug, branch, hypothesis, null, null);
+  const docsDir = path13.join(root, "docs", "experiments");
+  const templatePath = path13.join(docsDir, "_TEMPLATE.md");
+  if (fs13.existsSync(templatePath)) {
+    const template = fs13.readFileSync(templatePath, "utf-8");
+    const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
+    const logPath = path13.join(docsDir, `${paddedNum}-${finalSlug}.md`);
+    fs13.writeFileSync(logPath, logContent);
+    info(`Created experiment log: docs/experiments/${paddedNum}-${finalSlug}.md`);
+  }
+  return exp;
+}
+function readFileOrEmpty(filePath) {
+  try {
+    return fs13.readFileSync(filePath, "utf-8");
+  } catch {
+    return "";
+  }
+}
+function slugify2(text) {
+  return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
+}
+function loadConfig7(projectRoot) {
+  const configPath = path13.join(projectRoot, ".majlis", "config.json");
+  if (!fs13.existsSync(configPath)) {
+    return {
+      project: { name: "", description: "", objective: "" },
+      cycle: {
+        compression_interval: 5,
+        circuit_breaker_threshold: 3,
+        require_doubt_before_verify: true,
+        require_challenge_before_verify: false,
+        auto_baseline_on_new_experiment: true
+      }
+    };
+  }
+  return JSON.parse(fs13.readFileSync(configPath, "utf-8"));
+}
+var fs13, path13, import_node_child_process4;
 var init_run = __esm({
   "src/commands/run.ts"() {
     "use strict";
+    fs13 = __toESM(require("fs"));
+    path13 = __toESM(require("path"));
+    import_node_child_process4 = require("child_process");
     init_connection();
     init_queries();
     init_machine();
     init_next();
+    init_cycle();
+    init_spawn();
     init_format();
   }
 });
 // src/cli.ts
-var VERSION = "0.1.0";
+var fs14 = __toESM(require("fs"));
+var path14 = __toESM(require("path"));
+var VERSION = JSON.parse(
+  fs14.readFileSync(path14.join(__dirname, "..", "package.json"), "utf-8")
+).version;
 async function main() {
   const args = process.argv.slice(2);
   if (args.includes("--version") || args.includes("-v")) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "majlis",
-  "version": "0.3.2",
+  "version": "0.4.0",
   "description": "Multi-agent workflow CLI for structured doubt, independent verification, and compressed knowledge",
   "bin": {
     "majlis": "./dist/cli.js"