majlis 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +228 -47
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -529,8 +529,9 @@ You get ONE attempt per cycle. Your job is:
529
529
  2. Form ONE hypothesis about what to fix
530
530
  3. Implement ONE focused change (not a multi-step debug session)
531
531
  4. Run the benchmark ONCE to see the result
532
- 5. Document what you did, what happened, and what you think the next step should be
533
- 6. STOP
532
+ 5. Update the experiment doc in docs/experiments/ \u2014 fill in Approach, Results, and Metrics sections. This is NOT optional.
533
+ 6. Output the structured majlis-json block with your decisions
534
+ 7. STOP
534
535
 
535
536
  Do NOT iterate. Do NOT try multiple approaches. Do NOT debug your own fix.
536
537
  If your change doesn't work, document why and let the cycle continue \u2014
@@ -625,21 +626,28 @@ tools: [Read, Glob, Grep, Bash]
625
626
  ---
626
627
  You are the Verifier. Perform dual verification:
627
628
 
628
- PROVENANCE CHECK:
629
+ ## Scope Constraint (CRITICAL)
630
+
631
+ You must produce your structured output (grades + doubt resolutions) within your turn budget.
632
+ Do NOT exhaustively test every doubt and challenge \u2014 prioritize the critical ones.
633
+ For each doubt/challenge: one targeted check is enough. Confirm, dismiss, or mark inconclusive.
634
+ Reserve your final turns for writing the structured majlis-json output.
635
+
636
+ The framework saves your output automatically. Do NOT attempt to write files.
637
+
638
+ ## PROVENANCE CHECK:
629
639
  - Can every piece of code trace to an experiment or decision?
630
640
  - Is the chain unbroken from requirement -> classification -> experiment -> code?
631
641
  - Flag any broken chains.
632
642
 
633
- CONTENT CHECK:
643
+ ## CONTENT CHECK:
634
644
  - Does the code do what the experiment log says?
635
- - Do tests demonstrate the hypothesis?
636
- - Write and run targeted tests against the critic's doubts AND the adversary's cases.
645
+ - Run at most 3-5 targeted diagnostic scripts, focused on the critical doubts/challenges.
646
+ - Do NOT run exhaustive diagnostics on every claim.
637
647
 
638
648
  Grade each component: sound / good / weak / rejected
639
649
  Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
640
650
 
641
- Produce your verification report as output. The framework saves it automatically.
642
-
643
651
  ## Structured Output Format
644
652
  <!-- majlis-json
645
653
  {
@@ -2285,13 +2293,15 @@ ${contextJson}
2285
2293
  \`\`\`
2286
2294
 
2287
2295
  ${taskPrompt}`;
2288
- console.log(`[majlis] Spawning ${role} agent (model: ${agentDef.model})...`);
2296
+ const turns = ROLE_MAX_TURNS[role] ?? 15;
2297
+ console.log(`[majlis] Spawning ${role} agent (model: ${agentDef.model}, maxTurns: ${turns})...`);
2289
2298
  const { text: markdown, costUsd } = await runQuery({
2290
2299
  prompt,
2291
2300
  model: agentDef.model,
2292
2301
  tools: agentDef.tools,
2293
2302
  systemPrompt: agentDef.systemPrompt,
2294
- cwd: root
2303
+ cwd: root,
2304
+ maxTurns: turns
2295
2305
  });
2296
2306
  console.log(`[majlis] ${role} agent complete (cost: $${costUsd.toFixed(4)})`);
2297
2307
  const artifactPath = writeArtifact(role, context, markdown, root);
@@ -2312,14 +2322,15 @@ ${contextJson}
2312
2322
  \`\`\`
2313
2323
 
2314
2324
  ${taskPrompt}`;
2315
- const systemPrompt = "You are a Synthesis Agent. Your job is to take a verification report, confirmed doubts, and adversarial test results, and compress them into specific, actionable guidance for the builder's next attempt. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. Output a 'guidance' field in JSON wrapped in a <!-- majlis-json --> block.";
2325
+ const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2316
2326
  console.log(`[majlis] Spawning synthesiser micro-agent...`);
2317
2327
  const { text: markdown, costUsd } = await runQuery({
2318
2328
  prompt,
2319
2329
  model: "opus",
2320
2330
  tools: ["Read", "Glob", "Grep"],
2321
2331
  systemPrompt,
2322
- cwd: root
2332
+ cwd: root,
2333
+ maxTurns: 5
2323
2334
  });
2324
2335
  console.log(`[majlis] Synthesiser complete (cost: $${costUsd.toFixed(4)})`);
2325
2336
  const structured = await extractStructuredData("synthesiser", markdown);
@@ -2339,7 +2350,7 @@ async function runQuery(opts) {
2339
2350
  cwd: opts.cwd,
2340
2351
  permissionMode: "bypassPermissions",
2341
2352
  allowDangerouslySkipPermissions: true,
2342
- maxTurns: 30,
2353
+ maxTurns: opts.maxTurns ?? 15,
2343
2354
  persistSession: false,
2344
2355
  settingSources: ["project"]
2345
2356
  }
@@ -2439,7 +2450,7 @@ function writeArtifact(role, context, markdown, projectRoot) {
2439
2450
  fs7.writeFileSync(target, markdown);
2440
2451
  return target;
2441
2452
  }
2442
- var fs7, path7, import_claude_agent_sdk2, DIM2, RESET2, CYAN2;
2453
+ var fs7, path7, import_claude_agent_sdk2, ROLE_MAX_TURNS, DIM2, RESET2, CYAN2;
2443
2454
  var init_spawn = __esm({
2444
2455
  "src/agents/spawn.ts"() {
2445
2456
  "use strict";
@@ -2448,6 +2459,15 @@ var init_spawn = __esm({
2448
2459
  import_claude_agent_sdk2 = require("@anthropic-ai/claude-agent-sdk");
2449
2460
  init_parse();
2450
2461
  init_connection();
2462
+ ROLE_MAX_TURNS = {
2463
+ builder: 15,
2464
+ critic: 12,
2465
+ adversary: 12,
2466
+ verifier: 15,
2467
+ compressor: 15,
2468
+ reframer: 12,
2469
+ scout: 12
2470
+ };
2451
2471
  DIM2 = "\x1B[2m";
2452
2472
  RESET2 = "\x1B[0m";
2453
2473
  CYAN2 = "\x1B[36m";
@@ -3114,6 +3134,8 @@ async function executeStep(step, exp, root) {
3114
3134
  break;
3115
3135
  case "compressed" /* COMPRESSED */:
3116
3136
  await cycle("compress", []);
3137
+ updateExperimentStatus(getDb(root), exp.id, "compressed");
3138
+ info(`Experiment ${exp.slug} compressed.`);
3117
3139
  break;
3118
3140
  case "reframed" /* REFRAMED */:
3119
3141
  updateExperimentStatus(getDb(root), exp.id, "reframed");
@@ -3169,66 +3191,225 @@ async function run(args) {
3169
3191
  throw new Error('Usage: majlis run "goal description"');
3170
3192
  }
3171
3193
  const db = getDb(root);
3172
- const MAX_CYCLES = 50;
3173
- header(`Autonomous Mode \u2014 Goal: ${goal}`);
3174
- let cycleCount = 0;
3175
- while (cycleCount < MAX_CYCLES) {
3176
- cycleCount++;
3177
- const exp = getLatestExperiment(db);
3194
+ const config = loadConfig7(root);
3195
+ const MAX_EXPERIMENTS = 10;
3196
+ const MAX_STEPS = 200;
3197
+ let experimentCount = 0;
3198
+ let stepCount = 0;
3199
+ header(`Autonomous Mode \u2014 ${goal}`);
3200
+ while (stepCount < MAX_STEPS && experimentCount < MAX_EXPERIMENTS) {
3201
+ stepCount++;
3202
+ let exp = getLatestExperiment(db);
3178
3203
  if (!exp) {
3179
- warn('No active experiments. Create one with `majlis new "hypothesis"` and re-run.');
3180
- break;
3204
+ experimentCount++;
3205
+ if (experimentCount > MAX_EXPERIMENTS) {
3206
+ warn(`Reached max experiments (${MAX_EXPERIMENTS}). Stopping.`);
3207
+ break;
3208
+ }
3209
+ const sessionsSinceCompression = getSessionsSinceCompression(db);
3210
+ if (sessionsSinceCompression >= config.cycle.compression_interval) {
3211
+ info("Compressing before next experiment...");
3212
+ await cycle("compress", []);
3213
+ }
3214
+ info(`[Experiment ${experimentCount}/${MAX_EXPERIMENTS}] Planning next experiment...`);
3215
+ const hypothesis = await deriveNextHypothesis(goal, root, db);
3216
+ if (!hypothesis) {
3217
+ success("Planner says the goal has been met. Stopping.");
3218
+ break;
3219
+ }
3220
+ info(`Next hypothesis: ${hypothesis}`);
3221
+ exp = createNewExperiment(db, root, hypothesis);
3222
+ success(`Created experiment #${exp.id}: ${exp.slug}`);
3181
3223
  }
3182
3224
  if (isTerminal(exp.status)) {
3183
3225
  if (exp.status === "merged") {
3184
- success(`Experiment ${exp.slug} merged successfully.`);
3226
+ success(`Experiment ${exp.slug} merged.`);
3185
3227
  } else if (exp.status === "dead_end") {
3186
- info(`Experiment ${exp.slug} ended as dead-end.`);
3187
- }
3188
- const active = listActiveExperiments(db);
3189
- if (active.length === 0) {
3190
- info("No more active experiments. Goal assessment:");
3191
- info(`Original goal: ${goal}`);
3192
- warn("Review synthesis and metrics to determine if the goal has been met.");
3193
- break;
3228
+ info(`Experiment ${exp.slug} dead-ended.`);
3194
3229
  }
3195
- info(`Continuing with experiment: ${active[0].slug}`);
3196
- }
3197
- info(`[Cycle ${cycleCount}/${MAX_CYCLES}] Processing ${exp.slug} (${exp.status})`);
3198
- await next([exp.slug], false);
3199
- const updated = getLatestExperiment(db);
3200
- if (updated && isTerminal(updated.status)) {
3201
3230
  continue;
3202
3231
  }
3203
- const sessionsSinceCompression = getSessionsSinceCompression(db);
3204
- if (sessionsSinceCompression >= 5) {
3205
- warn("Compression due during autonomous run \u2014 consider compressing.");
3206
- }
3232
+ info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
3233
+ await next([exp.slug], false);
3207
3234
  }
3208
- if (cycleCount >= MAX_CYCLES) {
3209
- warn(`Reached maximum cycle count (${MAX_CYCLES}). Stopping autonomous mode.`);
3235
+ if (stepCount >= MAX_STEPS) {
3236
+ warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
3210
3237
  }
3211
3238
  header("Autonomous Mode Complete");
3212
3239
  info(`Goal: ${goal}`);
3213
- info(`Cycles executed: ${cycleCount}`);
3240
+ info(`Experiments: ${experimentCount}, Steps: ${stepCount}`);
3214
3241
  info("Run `majlis status` to see final state.");
3215
3242
  }
3243
+ async function deriveNextHypothesis(goal, root, db) {
3244
+ const synthesis = readFileOrEmpty(path13.join(root, "docs", "synthesis", "current.md"));
3245
+ const fragility = readFileOrEmpty(path13.join(root, "docs", "synthesis", "fragility.md"));
3246
+ const deadEndsDoc = readFileOrEmpty(path13.join(root, "docs", "synthesis", "dead-ends.md"));
3247
+ const deadEnds = listAllDeadEnds(db);
3248
+ const config = loadConfig7(root);
3249
+ let metricsOutput = "";
3250
+ if (config.metrics?.command) {
3251
+ try {
3252
+ metricsOutput = (0, import_node_child_process4.execSync)(config.metrics.command, {
3253
+ cwd: root,
3254
+ encoding: "utf-8",
3255
+ timeout: 6e4,
3256
+ stdio: ["pipe", "pipe", "pipe"]
3257
+ }).trim();
3258
+ } catch {
3259
+ metricsOutput = "(metrics command failed)";
3260
+ }
3261
+ }
3262
+ const result = await spawnSynthesiser({
3263
+ taskPrompt: `You are the Planner for an autonomous Majlis run.
3264
+
3265
+ ## Goal
3266
+ ${goal}
3267
+
3268
+ ## Current Metrics
3269
+ ${metricsOutput || "(no metrics configured)"}
3270
+
3271
+ ## Synthesis (what we know so far)
3272
+ ${synthesis || "(empty \u2014 first experiment)"}
3273
+
3274
+ ## Fragility Map (known weak areas)
3275
+ ${fragility || "(none)"}
3276
+
3277
+ ## Dead-End Registry
3278
+ ${deadEndsDoc || "(none)"}
3279
+
3280
+ ## Dead Ends (from DB \u2014 ${deadEnds.length} total)
3281
+ ${deadEnds.map((d) => `- ${d.approach}: ${d.why_failed} [constraint: ${d.structural_constraint}]`).join("\n") || "(none)"}
3282
+
3283
+ ## Your Task
3284
+ 1. Assess: based on the metrics and synthesis, has the goal been met? Be specific.
3285
+ 2. If YES \u2014 output the JSON block below with goal_met: true.
3286
+ 3. If NO \u2014 propose the SINGLE most promising next experiment hypothesis.
3287
+ - It must NOT repeat a dead-ended approach (check the dead-end registry!)
3288
+ - It should attack the weakest point revealed by synthesis/fragility
3289
+ - It must be specific and actionable \u2014 name the exact code/function/mechanism to change
3290
+ - The hypothesis should be a single sentence describing what to do, e.g.:
3291
+ "Activate addSeamEdges() in the runEdgeFirst pipeline for full-revolution cylinder faces"
3292
+
3293
+ CRITICAL: Your LAST line of output MUST be EXACTLY this format (on its own line, nothing after it):
3294
+ <!-- majlis-json {"goal_met": false, "hypothesis": "your single-sentence hypothesis here"} -->
3295
+
3296
+ If the goal is met:
3297
+ <!-- majlis-json {"goal_met": true, "hypothesis": null} -->`
3298
+ }, root);
3299
+ const structured = result.structured;
3300
+ if (structured?.goal_met === true) {
3301
+ return null;
3302
+ }
3303
+ if (structured?.hypothesis) {
3304
+ return structured.hypothesis;
3305
+ }
3306
+ const jsonMatch = result.output.match(/"hypothesis"\s*:\s*"([^"]+)"/);
3307
+ if (jsonMatch && jsonMatch[1].length > 10) return jsonMatch[1].trim();
3308
+ const blockMatch = result.output.match(/<!--\s*majlis-json\s*(\{[\s\S]*?\})\s*-->/);
3309
+ if (blockMatch) {
3310
+ try {
3311
+ const parsed = JSON.parse(blockMatch[1]);
3312
+ if (parsed.goal_met === true) return null;
3313
+ if (parsed.hypothesis) return parsed.hypothesis;
3314
+ } catch {
3315
+ }
3316
+ }
3317
+ warn("Planner did not return structured output. Retrying with focused prompt...");
3318
+ const retry = await spawnSynthesiser({
3319
+ taskPrompt: `Based on this analysis, output ONLY a single-line JSON block:
3320
+
3321
+ ${result.output.slice(-2e3)}
3322
+
3323
+ <!-- majlis-json {"goal_met": false, "hypothesis": "your hypothesis"} -->`
3324
+ }, root);
3325
+ if (retry.structured?.hypothesis) return retry.structured.hypothesis;
3326
+ warn("Could not extract hypothesis. Using goal as fallback.");
3327
+ return goal;
3328
+ }
3329
+ function createNewExperiment(db, root, hypothesis) {
3330
+ const slug = slugify2(hypothesis);
3331
+ let finalSlug = slug;
3332
+ let attempt = 0;
3333
+ while (getExperimentBySlug(db, finalSlug)) {
3334
+ attempt++;
3335
+ finalSlug = `${slug}-${attempt}`;
3336
+ }
3337
+ const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
3338
+ const num = allExps.count + 1;
3339
+ const paddedNum = String(num).padStart(3, "0");
3340
+ const branch = `exp/${paddedNum}-${finalSlug}`;
3341
+ try {
3342
+ (0, import_node_child_process4.execSync)(`git checkout -b ${branch}`, {
3343
+ cwd: root,
3344
+ encoding: "utf-8",
3345
+ stdio: ["pipe", "pipe", "pipe"]
3346
+ });
3347
+ info(`Created branch: ${branch}`);
3348
+ } catch {
3349
+ warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
3350
+ }
3351
+ const exp = createExperiment(db, finalSlug, branch, hypothesis, null, null);
3352
+ updateExperimentStatus(db, exp.id, "reframed");
3353
+ exp.status = "reframed";
3354
+ const docsDir = path13.join(root, "docs", "experiments");
3355
+ const templatePath = path13.join(docsDir, "_TEMPLATE.md");
3356
+ if (fs13.existsSync(templatePath)) {
3357
+ const template = fs13.readFileSync(templatePath, "utf-8");
3358
+ const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
3359
+ const logPath = path13.join(docsDir, `${paddedNum}-${finalSlug}.md`);
3360
+ fs13.writeFileSync(logPath, logContent);
3361
+ info(`Created experiment log: docs/experiments/${paddedNum}-${finalSlug}.md`);
3362
+ }
3363
+ return exp;
3364
+ }
3365
+ function readFileOrEmpty(filePath) {
3366
+ try {
3367
+ return fs13.readFileSync(filePath, "utf-8");
3368
+ } catch {
3369
+ return "";
3370
+ }
3371
+ }
3372
+ function slugify2(text) {
3373
+ return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
3374
+ }
3375
+ function loadConfig7(projectRoot) {
3376
+ const configPath = path13.join(projectRoot, ".majlis", "config.json");
3377
+ if (!fs13.existsSync(configPath)) {
3378
+ return {
3379
+ project: { name: "", description: "", objective: "" },
3380
+ cycle: {
3381
+ compression_interval: 5,
3382
+ circuit_breaker_threshold: 3,
3383
+ require_doubt_before_verify: true,
3384
+ require_challenge_before_verify: false,
3385
+ auto_baseline_on_new_experiment: true
3386
+ }
3387
+ };
3388
+ }
3389
+ return JSON.parse(fs13.readFileSync(configPath, "utf-8"));
3390
+ }
3391
+ var fs13, path13, import_node_child_process4;
3216
3392
  var init_run = __esm({
3217
3393
  "src/commands/run.ts"() {
3218
3394
  "use strict";
3395
+ fs13 = __toESM(require("fs"));
3396
+ path13 = __toESM(require("path"));
3397
+ import_node_child_process4 = require("child_process");
3219
3398
  init_connection();
3220
3399
  init_queries();
3221
3400
  init_machine();
3222
3401
  init_next();
3402
+ init_cycle();
3403
+ init_spawn();
3223
3404
  init_format();
3224
3405
  }
3225
3406
  });
3226
3407
 
3227
3408
  // src/cli.ts
3228
- var fs13 = __toESM(require("fs"));
3229
- var path13 = __toESM(require("path"));
3409
+ var fs14 = __toESM(require("fs"));
3410
+ var path14 = __toESM(require("path"));
3230
3411
  var VERSION = JSON.parse(
3231
- fs13.readFileSync(path13.join(__dirname, "..", "package.json"), "utf-8")
3412
+ fs14.readFileSync(path14.join(__dirname, "..", "package.json"), "utf-8")
3232
3413
  ).version;
3233
3414
  async function main() {
3234
3415
  const args = process.argv.slice(2);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "majlis",
3
- "version": "0.3.3",
3
+ "version": "0.4.1",
4
4
  "description": "Multi-agent workflow CLI for structured doubt, independent verification, and compressed knowledge",
5
5
  "bin": {
6
6
  "majlis": "./dist/cli.js"