majlis 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +87 -21
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -2054,7 +2054,8 @@ var init_types = __esm({
|
|
|
2054
2054
|
TRANSITIONS = {
|
|
2055
2055
|
["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "building" /* BUILDING */],
|
|
2056
2056
|
["reframed" /* REFRAMED */]: ["building" /* BUILDING */],
|
|
2057
|
-
["building" /* BUILDING */]: ["built" /* BUILT */],
|
|
2057
|
+
["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
|
|
2058
|
+
// self-loop for retry after truncation
|
|
2058
2059
|
["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
|
|
2059
2060
|
["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
|
|
2060
2061
|
["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
|
|
@@ -2306,7 +2307,7 @@ ${contextJson}
|
|
|
2306
2307
|
${taskPrompt}`;
|
|
2307
2308
|
const turns = ROLE_MAX_TURNS[role] ?? 15;
|
|
2308
2309
|
console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
|
|
2309
|
-
const { text: markdown, costUsd } = await runQuery({
|
|
2310
|
+
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2310
2311
|
prompt,
|
|
2311
2312
|
model: agentDef.model,
|
|
2312
2313
|
tools: agentDef.tools,
|
|
@@ -2315,13 +2316,13 @@ ${taskPrompt}`;
|
|
|
2315
2316
|
maxTurns: turns,
|
|
2316
2317
|
label: role
|
|
2317
2318
|
});
|
|
2318
|
-
console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
2319
|
+
console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
|
|
2319
2320
|
const artifactPath = writeArtifact(role, context, markdown, root);
|
|
2320
2321
|
if (artifactPath) {
|
|
2321
2322
|
console.log(`[${role}] Artifact written to ${artifactPath}`);
|
|
2322
2323
|
}
|
|
2323
2324
|
const structured = await extractStructuredData(role, markdown);
|
|
2324
|
-
return { output: markdown, structured };
|
|
2325
|
+
return { output: markdown, structured, truncated };
|
|
2325
2326
|
}
|
|
2326
2327
|
async function spawnSynthesiser(context, projectRoot) {
|
|
2327
2328
|
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
@@ -2336,7 +2337,7 @@ ${contextJson}
|
|
|
2336
2337
|
${taskPrompt}`;
|
|
2337
2338
|
const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
|
|
2338
2339
|
console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
|
|
2339
|
-
const { text: markdown, costUsd } = await runQuery({
|
|
2340
|
+
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2340
2341
|
prompt,
|
|
2341
2342
|
model: "opus",
|
|
2342
2343
|
tools: ["Read", "Glob", "Grep"],
|
|
@@ -2346,9 +2347,58 @@ ${taskPrompt}`;
|
|
|
2346
2347
|
label: "synthesiser"
|
|
2347
2348
|
});
|
|
2348
2349
|
console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
2349
|
-
return { output: markdown, structured: { guidance: markdown } };
|
|
2350
|
+
return { output: markdown, structured: { guidance: markdown }, truncated };
|
|
2351
|
+
}
|
|
2352
|
+
async function spawnRecovery(role, partialOutput, context, projectRoot) {
|
|
2353
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2354
|
+
const expSlug = context.experiment?.slug ?? "unknown";
|
|
2355
|
+
console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
|
|
2356
|
+
const expDocPath = path7.join(
|
|
2357
|
+
root,
|
|
2358
|
+
"docs",
|
|
2359
|
+
"experiments",
|
|
2360
|
+
`${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
|
|
2361
|
+
);
|
|
2362
|
+
const templatePath = path7.join(root, "docs", "experiments", "_TEMPLATE.md");
|
|
2363
|
+
const template = fs7.existsSync(templatePath) ? fs7.readFileSync(templatePath, "utf-8") : "";
|
|
2364
|
+
const currentDoc = fs7.existsSync(expDocPath) ? fs7.readFileSync(expDocPath, "utf-8") : "";
|
|
2365
|
+
const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
|
|
2366
|
+
|
|
2367
|
+
Here is the partial agent output (reasoning + tool calls):
|
|
2368
|
+
<partial_output>
|
|
2369
|
+
${partialOutput.slice(-3e3)}
|
|
2370
|
+
</partial_output>
|
|
2371
|
+
|
|
2372
|
+
Here is the current experiment doc:
|
|
2373
|
+
<current_doc>
|
|
2374
|
+
${currentDoc}
|
|
2375
|
+
</current_doc>
|
|
2376
|
+
|
|
2377
|
+
Here is the template that the experiment doc should follow:
|
|
2378
|
+
<template>
|
|
2379
|
+
${template}
|
|
2380
|
+
</template>
|
|
2381
|
+
|
|
2382
|
+
Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
|
|
2383
|
+
- Keep any valid content from the current doc
|
|
2384
|
+
- Fill in what you can infer from the partial output
|
|
2385
|
+
- Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
|
|
2386
|
+
- The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
|
|
2387
|
+
- Do NOT include agent reasoning or thinking \u2014 only structured experiment content
|
|
2388
|
+
- Be concise. This is cleanup, not new work.`;
|
|
2389
|
+
const { text: _markdown } = await runQuery({
|
|
2390
|
+
prompt,
|
|
2391
|
+
model: "haiku",
|
|
2392
|
+
tools: ["Read", "Write"],
|
|
2393
|
+
systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
|
|
2394
|
+
cwd: root,
|
|
2395
|
+
maxTurns: 5,
|
|
2396
|
+
label: "recovery"
|
|
2397
|
+
});
|
|
2398
|
+
console.log(`[recovery] Cleanup complete for ${expSlug}.`);
|
|
2350
2399
|
}
|
|
2351
2400
|
async function runQuery(opts) {
|
|
2401
|
+
let truncated = false;
|
|
2352
2402
|
const tag = opts.label ?? "majlis";
|
|
2353
2403
|
const conversation = (0, import_claude_agent_sdk2.query)({
|
|
2354
2404
|
prompt: opts.prompt,
|
|
@@ -2404,6 +2454,7 @@ async function runQuery(opts) {
|
|
|
2404
2454
|
if (message.subtype === "success") {
|
|
2405
2455
|
costUsd = message.total_cost_usd;
|
|
2406
2456
|
} else if (message.subtype === "error_max_turns") {
|
|
2457
|
+
truncated = true;
|
|
2407
2458
|
costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
|
|
2408
2459
|
console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
|
|
2409
2460
|
} else {
|
|
@@ -2412,7 +2463,7 @@ async function runQuery(opts) {
|
|
|
2412
2463
|
}
|
|
2413
2464
|
}
|
|
2414
2465
|
}
|
|
2415
|
-
return { text: textParts.join("\n\n"), costUsd };
|
|
2466
|
+
return { text: textParts.join("\n\n"), costUsd, truncated };
|
|
2416
2467
|
}
|
|
2417
2468
|
function formatToolDetail(toolName, input) {
|
|
2418
2469
|
switch (toolName) {
|
|
@@ -2446,19 +2497,14 @@ function writeArtifact(role, context, markdown, projectRoot) {
|
|
|
2446
2497
|
};
|
|
2447
2498
|
const dir = dirMap[role];
|
|
2448
2499
|
if (!dir) return null;
|
|
2500
|
+
if (role === "builder" || role === "compressor") return null;
|
|
2449
2501
|
const fullDir = path7.join(projectRoot, dir);
|
|
2450
2502
|
if (!fs7.existsSync(fullDir)) {
|
|
2451
2503
|
fs7.mkdirSync(fullDir, { recursive: true });
|
|
2452
2504
|
}
|
|
2453
|
-
if (role === "compressor") {
|
|
2454
|
-
const target2 = path7.join(fullDir, "current.md");
|
|
2455
|
-
fs7.writeFileSync(target2, markdown);
|
|
2456
|
-
return target2;
|
|
2457
|
-
}
|
|
2458
2505
|
const expSlug = context.experiment?.slug ?? "general";
|
|
2459
|
-
const
|
|
2460
|
-
const
|
|
2461
|
-
const filename = role === "builder" ? `${nextNum}-${expSlug}.md` : `${nextNum}-${role}-${expSlug}.md`;
|
|
2506
|
+
const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
|
|
2507
|
+
const filename = `${nextNum}-${role}-${expSlug}.md`;
|
|
2462
2508
|
const target = path7.join(fullDir, filename);
|
|
2463
2509
|
fs7.writeFileSync(target, markdown);
|
|
2464
2510
|
return target;
|
|
@@ -2690,8 +2736,16 @@ ${builderGuidance}
|
|
|
2690
2736
|
Build the experiment: ${exp.hypothesis}` : `Build the experiment: ${exp.hypothesis}`
|
|
2691
2737
|
}, root);
|
|
2692
2738
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
2693
|
-
|
|
2694
|
-
|
|
2739
|
+
if (result.truncated && !result.structured) {
|
|
2740
|
+
warn(`Builder was truncated (hit max turns) without producing structured output.`);
|
|
2741
|
+
await spawnRecovery("builder", result.output, {
|
|
2742
|
+
experiment: { id: exp.id, slug: exp.slug, hypothesis: exp.hypothesis, status: "building", sub_type: exp.sub_type, builder_guidance: null }
|
|
2743
|
+
}, root);
|
|
2744
|
+
warn(`Experiment stays at 'building'. Run \`majlis build\` to retry or \`majlis revert\` to abandon.`);
|
|
2745
|
+
} else {
|
|
2746
|
+
updateExperimentStatus(db, exp.id, "built");
|
|
2747
|
+
success(`Build complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis challenge\` next.`);
|
|
2748
|
+
}
|
|
2695
2749
|
}
|
|
2696
2750
|
async function doChallenge(db, exp, root) {
|
|
2697
2751
|
transition(exp.status, "challenged" /* CHALLENGED */);
|
|
@@ -2707,8 +2761,12 @@ async function doChallenge(db, exp, root) {
|
|
|
2707
2761
|
taskPrompt: `Construct adversarial test cases for experiment ${exp.slug}: ${exp.hypothesis}`
|
|
2708
2762
|
}, root);
|
|
2709
2763
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
2710
|
-
|
|
2711
|
-
|
|
2764
|
+
if (result.truncated && !result.structured) {
|
|
2765
|
+
warn(`Adversary was truncated without structured output. Experiment stays at current status.`);
|
|
2766
|
+
} else {
|
|
2767
|
+
updateExperimentStatus(db, exp.id, "challenged");
|
|
2768
|
+
success(`Challenge complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis verify\` next.`);
|
|
2769
|
+
}
|
|
2712
2770
|
}
|
|
2713
2771
|
async function doDoubt(db, exp, root) {
|
|
2714
2772
|
transition(exp.status, "doubted" /* DOUBTED */);
|
|
@@ -2725,8 +2783,12 @@ async function doDoubt(db, exp, root) {
|
|
|
2725
2783
|
taskPrompt: `Doubt the work in experiment ${exp.slug}: ${exp.hypothesis}. Produce a doubt document with evidence for each doubt.`
|
|
2726
2784
|
}, root);
|
|
2727
2785
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
2728
|
-
|
|
2729
|
-
|
|
2786
|
+
if (result.truncated && !result.structured) {
|
|
2787
|
+
warn(`Critic was truncated without structured output. Experiment stays at current status.`);
|
|
2788
|
+
} else {
|
|
2789
|
+
updateExperimentStatus(db, exp.id, "doubted");
|
|
2790
|
+
success(`Doubt pass complete for ${exp.slug}. Run \`majlis challenge\` or \`majlis verify\` next.`);
|
|
2791
|
+
}
|
|
2730
2792
|
}
|
|
2731
2793
|
async function doScout(db, exp, root) {
|
|
2732
2794
|
transition(exp.status, "scouted" /* SCOUTED */);
|
|
@@ -2774,6 +2836,10 @@ async function doVerify(db, exp, root) {
|
|
|
2774
2836
|
taskPrompt: `Verify experiment ${exp.slug}: ${exp.hypothesis}. Check provenance and content. Test the ${doubts.length} doubt(s) and any adversarial challenges.`
|
|
2775
2837
|
}, root);
|
|
2776
2838
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
2839
|
+
if (result.truncated && !result.structured) {
|
|
2840
|
+
warn(`Verifier was truncated without structured output. Experiment stays at 'verifying'.`);
|
|
2841
|
+
return;
|
|
2842
|
+
}
|
|
2777
2843
|
if (result.structured?.doubt_resolutions) {
|
|
2778
2844
|
for (const dr of result.structured.doubt_resolutions) {
|
|
2779
2845
|
if (dr.doubt_id && dr.resolution) {
|