majlis 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +128 -41
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -2054,7 +2054,8 @@ var init_types = __esm({
2054
2054
  TRANSITIONS = {
2055
2055
  ["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "building" /* BUILDING */],
2056
2056
  ["reframed" /* REFRAMED */]: ["building" /* BUILDING */],
2057
- ["building" /* BUILDING */]: ["built" /* BUILT */],
2057
+ ["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
2058
+ // self-loop for retry after truncation
2058
2059
  ["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
2059
2060
  ["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
2060
2061
  ["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
@@ -2305,22 +2306,23 @@ ${contextJson}
2305
2306
 
2306
2307
  ${taskPrompt}`;
2307
2308
  const turns = ROLE_MAX_TURNS[role] ?? 15;
2308
- console.log(`[majlis] Spawning ${role} agent (model: ${agentDef.model}, maxTurns: ${turns})...`);
2309
- const { text: markdown, costUsd } = await runQuery({
2309
+ console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
2310
+ const { text: markdown, costUsd, truncated } = await runQuery({
2310
2311
  prompt,
2311
2312
  model: agentDef.model,
2312
2313
  tools: agentDef.tools,
2313
2314
  systemPrompt: agentDef.systemPrompt,
2314
2315
  cwd: root,
2315
- maxTurns: turns
2316
+ maxTurns: turns,
2317
+ label: role
2316
2318
  });
2317
- console.log(`[majlis] ${role} agent complete (cost: $${costUsd.toFixed(4)})`);
2319
+ console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
2318
2320
  const artifactPath = writeArtifact(role, context, markdown, root);
2319
2321
  if (artifactPath) {
2320
- console.log(`[majlis] ${role} artifact written to ${artifactPath}`);
2322
+ console.log(`[${role}] Artifact written to ${artifactPath}`);
2321
2323
  }
2322
2324
  const structured = await extractStructuredData(role, markdown);
2323
- return { output: markdown, structured };
2325
+ return { output: markdown, structured, truncated };
2324
2326
  }
2325
2327
  async function spawnSynthesiser(context, projectRoot) {
2326
2328
  const root = projectRoot ?? findProjectRoot() ?? process.cwd();
@@ -2334,20 +2336,70 @@ ${contextJson}
2334
2336
 
2335
2337
  ${taskPrompt}`;
2336
2338
  const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2337
- console.log(`[majlis] Spawning synthesiser micro-agent...`);
2338
- const { text: markdown, costUsd } = await runQuery({
2339
+ console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
2340
+ const { text: markdown, costUsd, truncated } = await runQuery({
2339
2341
  prompt,
2340
2342
  model: "opus",
2341
2343
  tools: ["Read", "Glob", "Grep"],
2342
2344
  systemPrompt,
2343
2345
  cwd: root,
2344
- maxTurns: 5
2346
+ maxTurns: 5,
2347
+ label: "synthesiser"
2345
2348
  });
2346
- console.log(`[majlis] Synthesiser complete (cost: $${costUsd.toFixed(4)})`);
2347
- const structured = await extractStructuredData("synthesiser", markdown);
2348
- return { output: markdown, structured };
2349
+ console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
2350
+ return { output: markdown, structured: { guidance: markdown }, truncated };
2351
+ }
2352
+ async function spawnRecovery(role, partialOutput, context, projectRoot) {
2353
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2354
+ const expSlug = context.experiment?.slug ?? "unknown";
2355
+ console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
2356
+ const expDocPath = path7.join(
2357
+ root,
2358
+ "docs",
2359
+ "experiments",
2360
+ `${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
2361
+ );
2362
+ const templatePath = path7.join(root, "docs", "experiments", "_TEMPLATE.md");
2363
+ const template = fs7.existsSync(templatePath) ? fs7.readFileSync(templatePath, "utf-8") : "";
2364
+ const currentDoc = fs7.existsSync(expDocPath) ? fs7.readFileSync(expDocPath, "utf-8") : "";
2365
+ const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
2366
+
2367
+ Here is the partial agent output (reasoning + tool calls):
2368
+ <partial_output>
2369
+ ${partialOutput.slice(-3e3)}
2370
+ </partial_output>
2371
+
2372
+ Here is the current experiment doc:
2373
+ <current_doc>
2374
+ ${currentDoc}
2375
+ </current_doc>
2376
+
2377
+ Here is the template that the experiment doc should follow:
2378
+ <template>
2379
+ ${template}
2380
+ </template>
2381
+
2382
+ Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
2383
+ - Keep any valid content from the current doc
2384
+ - Fill in what you can infer from the partial output
2385
+ - Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
2386
+ - The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
2387
+ - Do NOT include agent reasoning or thinking \u2014 only structured experiment content
2388
+ - Be concise. This is cleanup, not new work.`;
2389
+ const { text: _markdown } = await runQuery({
2390
+ prompt,
2391
+ model: "haiku",
2392
+ tools: ["Read", "Write"],
2393
+ systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
2394
+ cwd: root,
2395
+ maxTurns: 5,
2396
+ label: "recovery"
2397
+ });
2398
+ console.log(`[recovery] Cleanup complete for ${expSlug}.`);
2349
2399
  }
2350
2400
  async function runQuery(opts) {
2401
+ let truncated = false;
2402
+ const tag = opts.label ?? "majlis";
2351
2403
  const conversation = (0, import_claude_agent_sdk2.query)({
2352
2404
  prompt: opts.prompt,
2353
2405
  options: {
@@ -2381,36 +2433,37 @@ async function runQuery(opts) {
2381
2433
  const toolName = block.name ?? "tool";
2382
2434
  const input = block.input ?? {};
2383
2435
  const detail = formatToolDetail(toolName, input);
2384
- process.stderr.write(`${DIM2}[majlis] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
2436
+ process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
2385
2437
  `);
2386
2438
  }
2387
2439
  }
2388
2440
  if (hasText) {
2389
2441
  const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
2390
2442
  if (preview) {
2391
- process.stderr.write(`${DIM2}[majlis] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
2443
+ process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
2392
2444
  `);
2393
2445
  }
2394
2446
  }
2395
2447
  } else if (message.type === "tool_progress") {
2396
2448
  const elapsed = Math.round(message.elapsed_time_seconds);
2397
2449
  if (elapsed > 0 && elapsed % 5 === 0) {
2398
- process.stderr.write(`${DIM2}[majlis] ${message.tool_name} running (${elapsed}s)...${RESET2}
2450
+ process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
2399
2451
  `);
2400
2452
  }
2401
2453
  } else if (message.type === "result") {
2402
2454
  if (message.subtype === "success") {
2403
2455
  costUsd = message.total_cost_usd;
2404
2456
  } else if (message.subtype === "error_max_turns") {
2457
+ truncated = true;
2405
2458
  costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
2406
- console.warn(`[majlis] Agent hit max turns (${turnCount}). Returning partial output.`);
2459
+ console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
2407
2460
  } else {
2408
2461
  const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
2409
2462
  throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
2410
2463
  }
2411
2464
  }
2412
2465
  }
2413
- return { text: textParts.join("\n\n"), costUsd };
2466
+ return { text: textParts.join("\n\n"), costUsd, truncated };
2414
2467
  }
2415
2468
  function formatToolDetail(toolName, input) {
2416
2469
  switch (toolName) {
@@ -2444,19 +2497,14 @@ function writeArtifact(role, context, markdown, projectRoot) {
2444
2497
  };
2445
2498
  const dir = dirMap[role];
2446
2499
  if (!dir) return null;
2500
+ if (role === "builder" || role === "compressor") return null;
2447
2501
  const fullDir = path7.join(projectRoot, dir);
2448
2502
  if (!fs7.existsSync(fullDir)) {
2449
2503
  fs7.mkdirSync(fullDir, { recursive: true });
2450
2504
  }
2451
- if (role === "compressor") {
2452
- const target2 = path7.join(fullDir, "current.md");
2453
- fs7.writeFileSync(target2, markdown);
2454
- return target2;
2455
- }
2456
2505
  const expSlug = context.experiment?.slug ?? "general";
2457
- const existing = fs7.readdirSync(fullDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
2458
- const nextNum = String(existing.length + 1).padStart(3, "0");
2459
- const filename = role === "builder" ? `${nextNum}-${expSlug}.md` : `${nextNum}-${role}-${expSlug}.md`;
2506
+ const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
2507
+ const filename = `${nextNum}-${role}-${expSlug}.md`;
2460
2508
  const target = path7.join(fullDir, filename);
2461
2509
  fs7.writeFileSync(target, markdown);
2462
2510
  return target;
@@ -2472,12 +2520,12 @@ var init_spawn = __esm({
2472
2520
  init_connection();
2473
2521
  ROLE_MAX_TURNS = {
2474
2522
  builder: 50,
2475
- critic: 12,
2476
- adversary: 12,
2477
- verifier: 15,
2478
- compressor: 15,
2479
- reframer: 12,
2480
- scout: 12
2523
+ critic: 30,
2524
+ adversary: 30,
2525
+ verifier: 50,
2526
+ compressor: 30,
2527
+ reframer: 20,
2528
+ scout: 20
2481
2529
  };
2482
2530
  DIM2 = "\x1B[2m";
2483
2531
  RESET2 = "\x1B[0m";
@@ -2493,9 +2541,19 @@ function worstGrade(grades) {
2493
2541
  return "sound";
2494
2542
  }
2495
2543
  async function resolve(db, exp, projectRoot) {
2496
- const grades = getVerificationsByExperiment(db, exp.id);
2544
+ let grades = getVerificationsByExperiment(db, exp.id);
2497
2545
  if (grades.length === 0) {
2498
- throw new Error(`No verifications found for experiment ${exp.slug}. Run verify first.`);
2546
+ warn(`No verification records for ${exp.slug}. Defaulting to weak.`);
2547
+ insertVerification(
2548
+ db,
2549
+ exp.id,
2550
+ "auto-default",
2551
+ "weak",
2552
+ null,
2553
+ null,
2554
+ "No structured verification output. Auto-defaulted to weak."
2555
+ );
2556
+ grades = getVerificationsByExperiment(db, exp.id);
2499
2557
  }
2500
2558
  const overallGrade = worstGrade(grades);
2501
2559
  switch (overallGrade) {
@@ -2678,8 +2736,16 @@ ${builderGuidance}
2678
2736
  Build the experiment: ${exp.hypothesis}` : `Build the experiment: ${exp.hypothesis}`
2679
2737
  }, root);
2680
2738
  ingestStructuredOutput(db, exp.id, result.structured);
2681
- updateExperimentStatus(db, exp.id, "built");
2682
- success(`Build complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis challenge\` next.`);
2739
+ if (result.truncated && !result.structured) {
2740
+ warn(`Builder was truncated (hit max turns) without producing structured output.`);
2741
+ await spawnRecovery("builder", result.output, {
2742
+ experiment: { id: exp.id, slug: exp.slug, hypothesis: exp.hypothesis, status: "building", sub_type: exp.sub_type, builder_guidance: null }
2743
+ }, root);
2744
+ warn(`Experiment stays at 'building'. Run \`majlis build\` to retry or \`majlis revert\` to abandon.`);
2745
+ } else {
2746
+ updateExperimentStatus(db, exp.id, "built");
2747
+ success(`Build complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis challenge\` next.`);
2748
+ }
2683
2749
  }
2684
2750
  async function doChallenge(db, exp, root) {
2685
2751
  transition(exp.status, "challenged" /* CHALLENGED */);
@@ -2695,8 +2761,12 @@ async function doChallenge(db, exp, root) {
2695
2761
  taskPrompt: `Construct adversarial test cases for experiment ${exp.slug}: ${exp.hypothesis}`
2696
2762
  }, root);
2697
2763
  ingestStructuredOutput(db, exp.id, result.structured);
2698
- updateExperimentStatus(db, exp.id, "challenged");
2699
- success(`Challenge complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis verify\` next.`);
2764
+ if (result.truncated && !result.structured) {
2765
+ warn(`Adversary was truncated without structured output. Experiment stays at current status.`);
2766
+ } else {
2767
+ updateExperimentStatus(db, exp.id, "challenged");
2768
+ success(`Challenge complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis verify\` next.`);
2769
+ }
2700
2770
  }
2701
2771
  async function doDoubt(db, exp, root) {
2702
2772
  transition(exp.status, "doubted" /* DOUBTED */);
@@ -2713,8 +2783,12 @@ async function doDoubt(db, exp, root) {
2713
2783
  taskPrompt: `Doubt the work in experiment ${exp.slug}: ${exp.hypothesis}. Produce a doubt document with evidence for each doubt.`
2714
2784
  }, root);
2715
2785
  ingestStructuredOutput(db, exp.id, result.structured);
2716
- updateExperimentStatus(db, exp.id, "doubted");
2717
- success(`Doubt pass complete for ${exp.slug}. Run \`majlis challenge\` or \`majlis verify\` next.`);
2786
+ if (result.truncated && !result.structured) {
2787
+ warn(`Critic was truncated without structured output. Experiment stays at current status.`);
2788
+ } else {
2789
+ updateExperimentStatus(db, exp.id, "doubted");
2790
+ success(`Doubt pass complete for ${exp.slug}. Run \`majlis challenge\` or \`majlis verify\` next.`);
2791
+ }
2718
2792
  }
2719
2793
  async function doScout(db, exp, root) {
2720
2794
  transition(exp.status, "scouted" /* SCOUTED */);
@@ -2762,6 +2836,10 @@ async function doVerify(db, exp, root) {
2762
2836
  taskPrompt: `Verify experiment ${exp.slug}: ${exp.hypothesis}. Check provenance and content. Test the ${doubts.length} doubt(s) and any adversarial challenges.`
2763
2837
  }, root);
2764
2838
  ingestStructuredOutput(db, exp.id, result.structured);
2839
+ if (result.truncated && !result.structured) {
2840
+ warn(`Verifier was truncated without structured output. Experiment stays at 'verifying'.`);
2841
+ return;
2842
+ }
2765
2843
  if (result.structured?.doubt_resolutions) {
2766
2844
  for (const dr of result.structured.doubt_resolutions) {
2767
2845
  if (dr.doubt_id && dr.resolution) {
@@ -3241,7 +3319,16 @@ async function run(args) {
3241
3319
  continue;
3242
3320
  }
3243
3321
  info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
3244
- await next([exp.slug], false);
3322
+ try {
3323
+ await next([exp.slug], false);
3324
+ } catch (err) {
3325
+ const message = err instanceof Error ? err.message : String(err);
3326
+ warn(`Step failed for ${exp.slug}: ${message}`);
3327
+ try {
3328
+ updateExperimentStatus(db, exp.id, "dead_end");
3329
+ } catch {
3330
+ }
3331
+ }
3245
3332
  }
3246
3333
  if (stepCount >= MAX_STEPS) {
3247
3334
  warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "majlis",
3
- "version": "0.4.2",
3
+ "version": "0.4.4",
4
4
  "description": "Multi-agent workflow CLI for structured doubt, independent verification, and compressed knowledge",
5
5
  "bin": {
6
6
  "majlis": "./dist/cli.js"