majlis 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +115 -26
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -2054,7 +2054,8 @@ var init_types = __esm({
2054
2054
  TRANSITIONS = {
2055
2055
  ["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "building" /* BUILDING */],
2056
2056
  ["reframed" /* REFRAMED */]: ["building" /* BUILDING */],
2057
- ["building" /* BUILDING */]: ["built" /* BUILT */],
2057
+ ["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
2058
+ // self-loop for retry after truncation
2058
2059
  ["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
2059
2060
  ["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
2060
2061
  ["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
@@ -2306,7 +2307,7 @@ ${contextJson}
2306
2307
  ${taskPrompt}`;
2307
2308
  const turns = ROLE_MAX_TURNS[role] ?? 15;
2308
2309
  console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
2309
- const { text: markdown, costUsd } = await runQuery({
2310
+ const { text: markdown, costUsd, truncated } = await runQuery({
2310
2311
  prompt,
2311
2312
  model: agentDef.model,
2312
2313
  tools: agentDef.tools,
@@ -2315,13 +2316,13 @@ ${taskPrompt}`;
2315
2316
  maxTurns: turns,
2316
2317
  label: role
2317
2318
  });
2318
- console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)})`);
2319
+ console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
2319
2320
  const artifactPath = writeArtifact(role, context, markdown, root);
2320
2321
  if (artifactPath) {
2321
2322
  console.log(`[${role}] Artifact written to ${artifactPath}`);
2322
2323
  }
2323
2324
  const structured = await extractStructuredData(role, markdown);
2324
- return { output: markdown, structured };
2325
+ return { output: markdown, structured, truncated };
2325
2326
  }
2326
2327
  async function spawnSynthesiser(context, projectRoot) {
2327
2328
  const root = projectRoot ?? findProjectRoot() ?? process.cwd();
@@ -2336,7 +2337,7 @@ ${contextJson}
2336
2337
  ${taskPrompt}`;
2337
2338
  const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2338
2339
  console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
2339
- const { text: markdown, costUsd } = await runQuery({
2340
+ const { text: markdown, costUsd, truncated } = await runQuery({
2340
2341
  prompt,
2341
2342
  model: "opus",
2342
2343
  tools: ["Read", "Glob", "Grep"],
@@ -2346,9 +2347,58 @@ ${taskPrompt}`;
2346
2347
  label: "synthesiser"
2347
2348
  });
2348
2349
  console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
2349
- return { output: markdown, structured: { guidance: markdown } };
2350
+ return { output: markdown, structured: { guidance: markdown }, truncated };
2351
+ }
2352
+ async function spawnRecovery(role, partialOutput, context, projectRoot) {
2353
+ const root = projectRoot ?? findProjectRoot() ?? process.cwd();
2354
+ const expSlug = context.experiment?.slug ?? "unknown";
2355
+ console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
2356
+ const expDocPath = path7.join(
2357
+ root,
2358
+ "docs",
2359
+ "experiments",
2360
+ `${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
2361
+ );
2362
+ const templatePath = path7.join(root, "docs", "experiments", "_TEMPLATE.md");
2363
+ const template = fs7.existsSync(templatePath) ? fs7.readFileSync(templatePath, "utf-8") : "";
2364
+ const currentDoc = fs7.existsSync(expDocPath) ? fs7.readFileSync(expDocPath, "utf-8") : "";
2365
+ const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
2366
+
2367
+ Here is the partial agent output (reasoning + tool calls):
2368
+ <partial_output>
2369
+ ${partialOutput.slice(-3e3)}
2370
+ </partial_output>
2371
+
2372
+ Here is the current experiment doc:
2373
+ <current_doc>
2374
+ ${currentDoc}
2375
+ </current_doc>
2376
+
2377
+ Here is the template that the experiment doc should follow:
2378
+ <template>
2379
+ ${template}
2380
+ </template>
2381
+
2382
+ Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
2383
+ - Keep any valid content from the current doc
2384
+ - Fill in what you can infer from the partial output
2385
+ - Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
2386
+ - The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
2387
+ - Do NOT include agent reasoning or thinking \u2014 only structured experiment content
2388
+ - Be concise. This is cleanup, not new work.`;
2389
+ const { text: _markdown } = await runQuery({
2390
+ prompt,
2391
+ model: "haiku",
2392
+ tools: ["Read", "Write"],
2393
+ systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
2394
+ cwd: root,
2395
+ maxTurns: 5,
2396
+ label: "recovery"
2397
+ });
2398
+ console.log(`[recovery] Cleanup complete for ${expSlug}.`);
2350
2399
  }
2351
2400
  async function runQuery(opts) {
2401
+ let truncated = false;
2352
2402
  const tag = opts.label ?? "majlis";
2353
2403
  const conversation = (0, import_claude_agent_sdk2.query)({
2354
2404
  prompt: opts.prompt,
@@ -2404,6 +2454,7 @@ async function runQuery(opts) {
2404
2454
  if (message.subtype === "success") {
2405
2455
  costUsd = message.total_cost_usd;
2406
2456
  } else if (message.subtype === "error_max_turns") {
2457
+ truncated = true;
2407
2458
  costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
2408
2459
  console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
2409
2460
  } else {
@@ -2412,7 +2463,7 @@ async function runQuery(opts) {
2412
2463
  }
2413
2464
  }
2414
2465
  }
2415
- return { text: textParts.join("\n\n"), costUsd };
2466
+ return { text: textParts.join("\n\n"), costUsd, truncated };
2416
2467
  }
2417
2468
  function formatToolDetail(toolName, input) {
2418
2469
  switch (toolName) {
@@ -2446,19 +2497,14 @@ function writeArtifact(role, context, markdown, projectRoot) {
2446
2497
  };
2447
2498
  const dir = dirMap[role];
2448
2499
  if (!dir) return null;
2500
+ if (role === "builder" || role === "compressor") return null;
2449
2501
  const fullDir = path7.join(projectRoot, dir);
2450
2502
  if (!fs7.existsSync(fullDir)) {
2451
2503
  fs7.mkdirSync(fullDir, { recursive: true });
2452
2504
  }
2453
- if (role === "compressor") {
2454
- const target2 = path7.join(fullDir, "current.md");
2455
- fs7.writeFileSync(target2, markdown);
2456
- return target2;
2457
- }
2458
2505
  const expSlug = context.experiment?.slug ?? "general";
2459
- const existing = fs7.readdirSync(fullDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
2460
- const nextNum = String(context.experiment?.id ?? existing.length + 1).padStart(3, "0");
2461
- const filename = role === "builder" ? `${nextNum}-${expSlug}.md` : `${nextNum}-${role}-${expSlug}.md`;
2506
+ const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
2507
+ const filename = `${nextNum}-${role}-${expSlug}.md`;
2462
2508
  const target = path7.join(fullDir, filename);
2463
2509
  fs7.writeFileSync(target, markdown);
2464
2510
  return target;
@@ -2588,6 +2634,10 @@ function gitRevert(branch, cwd) {
2588
2634
  encoding: "utf-8"
2589
2635
  }).trim();
2590
2636
  if (currentBranch === branch) {
2637
+ try {
2638
+ (0, import_node_child_process3.execSync)("git checkout -- .", { cwd, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] });
2639
+ } catch {
2640
+ }
2591
2641
  (0, import_node_child_process3.execSync)("git checkout main 2>/dev/null || git checkout master", {
2592
2642
  cwd,
2593
2643
  encoding: "utf-8",
@@ -2690,8 +2740,17 @@ ${builderGuidance}
2690
2740
  Build the experiment: ${exp.hypothesis}` : `Build the experiment: ${exp.hypothesis}`
2691
2741
  }, root);
2692
2742
  ingestStructuredOutput(db, exp.id, result.structured);
2693
- updateExperimentStatus(db, exp.id, "built");
2694
- success(`Build complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis challenge\` next.`);
2743
+ if (result.truncated && !result.structured) {
2744
+ warn(`Builder was truncated (hit max turns) without producing structured output.`);
2745
+ await spawnRecovery("builder", result.output, {
2746
+ experiment: { id: exp.id, slug: exp.slug, hypothesis: exp.hypothesis, status: "building", sub_type: exp.sub_type, builder_guidance: null }
2747
+ }, root);
2748
+ warn(`Experiment stays at 'building'. Run \`majlis build\` to retry or \`majlis revert\` to abandon.`);
2749
+ } else {
2750
+ gitCommitBuild(exp, root);
2751
+ updateExperimentStatus(db, exp.id, "built");
2752
+ success(`Build complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis challenge\` next.`);
2753
+ }
2695
2754
  }
2696
2755
  async function doChallenge(db, exp, root) {
2697
2756
  transition(exp.status, "challenged" /* CHALLENGED */);
@@ -2707,8 +2766,12 @@ async function doChallenge(db, exp, root) {
2707
2766
  taskPrompt: `Construct adversarial test cases for experiment ${exp.slug}: ${exp.hypothesis}`
2708
2767
  }, root);
2709
2768
  ingestStructuredOutput(db, exp.id, result.structured);
2710
- updateExperimentStatus(db, exp.id, "challenged");
2711
- success(`Challenge complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis verify\` next.`);
2769
+ if (result.truncated && !result.structured) {
2770
+ warn(`Adversary was truncated without structured output. Experiment stays at current status.`);
2771
+ } else {
2772
+ updateExperimentStatus(db, exp.id, "challenged");
2773
+ success(`Challenge complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis verify\` next.`);
2774
+ }
2712
2775
  }
2713
2776
  async function doDoubt(db, exp, root) {
2714
2777
  transition(exp.status, "doubted" /* DOUBTED */);
@@ -2725,8 +2788,12 @@ async function doDoubt(db, exp, root) {
2725
2788
  taskPrompt: `Doubt the work in experiment ${exp.slug}: ${exp.hypothesis}. Produce a doubt document with evidence for each doubt.`
2726
2789
  }, root);
2727
2790
  ingestStructuredOutput(db, exp.id, result.structured);
2728
- updateExperimentStatus(db, exp.id, "doubted");
2729
- success(`Doubt pass complete for ${exp.slug}. Run \`majlis challenge\` or \`majlis verify\` next.`);
2791
+ if (result.truncated && !result.structured) {
2792
+ warn(`Critic was truncated without structured output. Experiment stays at current status.`);
2793
+ } else {
2794
+ updateExperimentStatus(db, exp.id, "doubted");
2795
+ success(`Doubt pass complete for ${exp.slug}. Run \`majlis challenge\` or \`majlis verify\` next.`);
2796
+ }
2730
2797
  }
2731
2798
  async function doScout(db, exp, root) {
2732
2799
  transition(exp.status, "scouted" /* SCOUTED */);
@@ -2774,6 +2841,10 @@ async function doVerify(db, exp, root) {
2774
2841
  taskPrompt: `Verify experiment ${exp.slug}: ${exp.hypothesis}. Check provenance and content. Test the ${doubts.length} doubt(s) and any adversarial challenges.`
2775
2842
  }, root);
2776
2843
  ingestStructuredOutput(db, exp.id, result.structured);
2844
+ if (result.truncated && !result.structured) {
2845
+ warn(`Verifier was truncated without structured output. Experiment stays at 'verifying'.`);
2846
+ return;
2847
+ }
2777
2848
  if (result.structured?.doubt_resolutions) {
2778
2849
  for (const dr of result.structured.doubt_resolutions) {
2779
2850
  if (dr.doubt_id && dr.resolution) {
@@ -2795,6 +2866,23 @@ async function doCompress(db, root) {
2795
2866
  recordCompression(db, sessionCount, sizeBefore, sizeAfter);
2796
2867
  success(`Compression complete. Synthesis: ${sizeBefore}B \u2192 ${sizeAfter}B`);
2797
2868
  }
2869
+ function gitCommitBuild(exp, cwd) {
2870
+ try {
2871
+ (0, import_node_child_process4.execSync)('git add -A -- ":!.majlis/"', { cwd, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] });
2872
+ const diff = (0, import_node_child_process4.execSync)("git diff --cached --stat", { cwd, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
2873
+ if (!diff) {
2874
+ info("No code changes to commit.");
2875
+ return;
2876
+ }
2877
+ const msg = `EXP-${String(exp.id).padStart(3, "0")}: ${exp.slug}
2878
+
2879
+ ${exp.hypothesis ?? ""}`;
2880
+ (0, import_node_child_process4.execSync)(`git commit -m ${JSON.stringify(msg)}`, { cwd, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] });
2881
+ info(`Committed builder changes on ${exp.branch}.`);
2882
+ } catch {
2883
+ warn("Could not auto-commit builder changes \u2014 commit manually before resolving.");
2884
+ }
2885
+ }
2798
2886
  function resolveExperimentArg(db, args) {
2799
2887
  const slugArg = args.filter((a) => !a.startsWith("--"))[0];
2800
2888
  let exp;
@@ -2849,12 +2937,13 @@ function ingestStructuredOutput(db, experimentId, structured) {
2849
2937
  info(`Ingested ${structured.challenges.length} challenge(s)`);
2850
2938
  }
2851
2939
  }
2852
- var fs9, path9;
2940
+ var fs9, path9, import_node_child_process4;
2853
2941
  var init_cycle = __esm({
2854
2942
  "src/commands/cycle.ts"() {
2855
2943
  "use strict";
2856
2944
  fs9 = __toESM(require("fs"));
2857
2945
  path9 = __toESM(require("path"));
2946
+ import_node_child_process4 = require("child_process");
2858
2947
  init_connection();
2859
2948
  init_queries();
2860
2949
  init_machine();
@@ -3281,7 +3370,7 @@ async function deriveNextHypothesis(goal, root, db) {
3281
3370
  let metricsOutput = "";
3282
3371
  if (config.metrics?.command) {
3283
3372
  try {
3284
- metricsOutput = (0, import_node_child_process4.execSync)(config.metrics.command, {
3373
+ metricsOutput = (0, import_node_child_process5.execSync)(config.metrics.command, {
3285
3374
  cwd: root,
3286
3375
  encoding: "utf-8",
3287
3376
  timeout: 6e4,
@@ -3372,7 +3461,7 @@ function createNewExperiment(db, root, hypothesis) {
3372
3461
  const paddedNum = String(num).padStart(3, "0");
3373
3462
  const branch = `exp/${paddedNum}-${finalSlug}`;
3374
3463
  try {
3375
- (0, import_node_child_process4.execSync)(`git checkout -b ${branch}`, {
3464
+ (0, import_node_child_process5.execSync)(`git checkout -b ${branch}`, {
3376
3465
  cwd: root,
3377
3466
  encoding: "utf-8",
3378
3467
  stdio: ["pipe", "pipe", "pipe"]
@@ -3421,13 +3510,13 @@ function loadConfig7(projectRoot) {
3421
3510
  }
3422
3511
  return JSON.parse(fs13.readFileSync(configPath, "utf-8"));
3423
3512
  }
3424
- var fs13, path13, import_node_child_process4;
3513
+ var fs13, path13, import_node_child_process5;
3425
3514
  var init_run = __esm({
3426
3515
  "src/commands/run.ts"() {
3427
3516
  "use strict";
3428
3517
  fs13 = __toESM(require("fs"));
3429
3518
  path13 = __toESM(require("path"));
3430
- import_node_child_process4 = require("child_process");
3519
+ import_node_child_process5 = require("child_process");
3431
3520
  init_connection();
3432
3521
  init_queries();
3433
3522
  init_machine();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "majlis",
3
- "version": "0.4.3",
3
+ "version": "0.4.5",
4
4
  "description": "Multi-agent workflow CLI for structured doubt, independent verification, and compressed knowledge",
5
5
  "bin": {
6
6
  "majlis": "./dist/cli.js"