majlis 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +128 -41
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -2054,7 +2054,8 @@ var init_types = __esm({
|
|
|
2054
2054
|
TRANSITIONS = {
|
|
2055
2055
|
["classified" /* CLASSIFIED */]: ["reframed" /* REFRAMED */, "building" /* BUILDING */],
|
|
2056
2056
|
["reframed" /* REFRAMED */]: ["building" /* BUILDING */],
|
|
2057
|
-
["building" /* BUILDING */]: ["built" /* BUILT */],
|
|
2057
|
+
["building" /* BUILDING */]: ["built" /* BUILT */, "building" /* BUILDING */],
|
|
2058
|
+
// self-loop for retry after truncation
|
|
2058
2059
|
["built" /* BUILT */]: ["challenged" /* CHALLENGED */, "doubted" /* DOUBTED */],
|
|
2059
2060
|
["challenged" /* CHALLENGED */]: ["doubted" /* DOUBTED */, "verifying" /* VERIFYING */],
|
|
2060
2061
|
["doubted" /* DOUBTED */]: ["challenged" /* CHALLENGED */, "scouted" /* SCOUTED */, "verifying" /* VERIFYING */],
|
|
@@ -2305,22 +2306,23 @@ ${contextJson}
|
|
|
2305
2306
|
|
|
2306
2307
|
${taskPrompt}`;
|
|
2307
2308
|
const turns = ROLE_MAX_TURNS[role] ?? 15;
|
|
2308
|
-
console.log(`[
|
|
2309
|
-
const { text: markdown, costUsd } = await runQuery({
|
|
2309
|
+
console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
|
|
2310
|
+
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2310
2311
|
prompt,
|
|
2311
2312
|
model: agentDef.model,
|
|
2312
2313
|
tools: agentDef.tools,
|
|
2313
2314
|
systemPrompt: agentDef.systemPrompt,
|
|
2314
2315
|
cwd: root,
|
|
2315
|
-
maxTurns: turns
|
|
2316
|
+
maxTurns: turns,
|
|
2317
|
+
label: role
|
|
2316
2318
|
});
|
|
2317
|
-
console.log(`[
|
|
2319
|
+
console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)}${truncated ? ", TRUNCATED" : ""})`);
|
|
2318
2320
|
const artifactPath = writeArtifact(role, context, markdown, root);
|
|
2319
2321
|
if (artifactPath) {
|
|
2320
|
-
console.log(`[
|
|
2322
|
+
console.log(`[${role}] Artifact written to ${artifactPath}`);
|
|
2321
2323
|
}
|
|
2322
2324
|
const structured = await extractStructuredData(role, markdown);
|
|
2323
|
-
return { output: markdown, structured };
|
|
2325
|
+
return { output: markdown, structured, truncated };
|
|
2324
2326
|
}
|
|
2325
2327
|
async function spawnSynthesiser(context, projectRoot) {
|
|
2326
2328
|
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
@@ -2334,20 +2336,70 @@ ${contextJson}
|
|
|
2334
2336
|
|
|
2335
2337
|
${taskPrompt}`;
|
|
2336
2338
|
const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
|
|
2337
|
-
console.log(`[
|
|
2338
|
-
const { text: markdown, costUsd } = await runQuery({
|
|
2339
|
+
console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
|
|
2340
|
+
const { text: markdown, costUsd, truncated } = await runQuery({
|
|
2339
2341
|
prompt,
|
|
2340
2342
|
model: "opus",
|
|
2341
2343
|
tools: ["Read", "Glob", "Grep"],
|
|
2342
2344
|
systemPrompt,
|
|
2343
2345
|
cwd: root,
|
|
2344
|
-
maxTurns: 5
|
|
2346
|
+
maxTurns: 5,
|
|
2347
|
+
label: "synthesiser"
|
|
2345
2348
|
});
|
|
2346
|
-
console.log(`[
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
+
console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
2350
|
+
return { output: markdown, structured: { guidance: markdown }, truncated };
|
|
2351
|
+
}
|
|
2352
|
+
async function spawnRecovery(role, partialOutput, context, projectRoot) {
|
|
2353
|
+
const root = projectRoot ?? findProjectRoot() ?? process.cwd();
|
|
2354
|
+
const expSlug = context.experiment?.slug ?? "unknown";
|
|
2355
|
+
console.log(`[recovery] Cleaning up after truncated ${role} for ${expSlug}...`);
|
|
2356
|
+
const expDocPath = path7.join(
|
|
2357
|
+
root,
|
|
2358
|
+
"docs",
|
|
2359
|
+
"experiments",
|
|
2360
|
+
`${String(context.experiment?.id ?? 0).padStart(3, "0")}-${expSlug}.md`
|
|
2361
|
+
);
|
|
2362
|
+
const templatePath = path7.join(root, "docs", "experiments", "_TEMPLATE.md");
|
|
2363
|
+
const template = fs7.existsSync(templatePath) ? fs7.readFileSync(templatePath, "utf-8") : "";
|
|
2364
|
+
const currentDoc = fs7.existsSync(expDocPath) ? fs7.readFileSync(expDocPath, "utf-8") : "";
|
|
2365
|
+
const prompt = `The ${role} agent was truncated (hit max turns) while working on experiment "${expSlug}".
|
|
2366
|
+
|
|
2367
|
+
Here is the partial agent output (reasoning + tool calls):
|
|
2368
|
+
<partial_output>
|
|
2369
|
+
${partialOutput.slice(-3e3)}
|
|
2370
|
+
</partial_output>
|
|
2371
|
+
|
|
2372
|
+
Here is the current experiment doc:
|
|
2373
|
+
<current_doc>
|
|
2374
|
+
${currentDoc}
|
|
2375
|
+
</current_doc>
|
|
2376
|
+
|
|
2377
|
+
Here is the template that the experiment doc should follow:
|
|
2378
|
+
<template>
|
|
2379
|
+
${template}
|
|
2380
|
+
</template>
|
|
2381
|
+
|
|
2382
|
+
Your job: Write a CLEAN experiment doc to ${expDocPath} using the Write tool.
|
|
2383
|
+
- Keep any valid content from the current doc
|
|
2384
|
+
- Fill in what you can infer from the partial output
|
|
2385
|
+
- Mark incomplete sections with "[TRUNCATED \u2014 ${role} did not finish]"
|
|
2386
|
+
- The doc MUST have the <!-- majlis-json --> block, even if decisions are empty
|
|
2387
|
+
- Do NOT include agent reasoning or thinking \u2014 only structured experiment content
|
|
2388
|
+
- Be concise. This is cleanup, not new work.`;
|
|
2389
|
+
const { text: _markdown } = await runQuery({
|
|
2390
|
+
prompt,
|
|
2391
|
+
model: "haiku",
|
|
2392
|
+
tools: ["Read", "Write"],
|
|
2393
|
+
systemPrompt: `You are a Recovery Agent. You clean up experiment docs after truncated agent runs. Write clean, structured docs. Never include agent reasoning or monologue.`,
|
|
2394
|
+
cwd: root,
|
|
2395
|
+
maxTurns: 5,
|
|
2396
|
+
label: "recovery"
|
|
2397
|
+
});
|
|
2398
|
+
console.log(`[recovery] Cleanup complete for ${expSlug}.`);
|
|
2349
2399
|
}
|
|
2350
2400
|
async function runQuery(opts) {
|
|
2401
|
+
let truncated = false;
|
|
2402
|
+
const tag = opts.label ?? "majlis";
|
|
2351
2403
|
const conversation = (0, import_claude_agent_sdk2.query)({
|
|
2352
2404
|
prompt: opts.prompt,
|
|
2353
2405
|
options: {
|
|
@@ -2381,36 +2433,37 @@ async function runQuery(opts) {
|
|
|
2381
2433
|
const toolName = block.name ?? "tool";
|
|
2382
2434
|
const input = block.input ?? {};
|
|
2383
2435
|
const detail = formatToolDetail(toolName, input);
|
|
2384
|
-
process.stderr.write(`${DIM2}[
|
|
2436
|
+
process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
|
|
2385
2437
|
`);
|
|
2386
2438
|
}
|
|
2387
2439
|
}
|
|
2388
2440
|
if (hasText) {
|
|
2389
2441
|
const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
|
|
2390
2442
|
if (preview) {
|
|
2391
|
-
process.stderr.write(`${DIM2}[
|
|
2443
|
+
process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
|
|
2392
2444
|
`);
|
|
2393
2445
|
}
|
|
2394
2446
|
}
|
|
2395
2447
|
} else if (message.type === "tool_progress") {
|
|
2396
2448
|
const elapsed = Math.round(message.elapsed_time_seconds);
|
|
2397
2449
|
if (elapsed > 0 && elapsed % 5 === 0) {
|
|
2398
|
-
process.stderr.write(`${DIM2}[
|
|
2450
|
+
process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
|
|
2399
2451
|
`);
|
|
2400
2452
|
}
|
|
2401
2453
|
} else if (message.type === "result") {
|
|
2402
2454
|
if (message.subtype === "success") {
|
|
2403
2455
|
costUsd = message.total_cost_usd;
|
|
2404
2456
|
} else if (message.subtype === "error_max_turns") {
|
|
2457
|
+
truncated = true;
|
|
2405
2458
|
costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
|
|
2406
|
-
console.warn(`[
|
|
2459
|
+
console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
|
|
2407
2460
|
} else {
|
|
2408
2461
|
const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
|
|
2409
2462
|
throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
|
|
2410
2463
|
}
|
|
2411
2464
|
}
|
|
2412
2465
|
}
|
|
2413
|
-
return { text: textParts.join("\n\n"), costUsd };
|
|
2466
|
+
return { text: textParts.join("\n\n"), costUsd, truncated };
|
|
2414
2467
|
}
|
|
2415
2468
|
function formatToolDetail(toolName, input) {
|
|
2416
2469
|
switch (toolName) {
|
|
@@ -2444,19 +2497,14 @@ function writeArtifact(role, context, markdown, projectRoot) {
|
|
|
2444
2497
|
};
|
|
2445
2498
|
const dir = dirMap[role];
|
|
2446
2499
|
if (!dir) return null;
|
|
2500
|
+
if (role === "builder" || role === "compressor") return null;
|
|
2447
2501
|
const fullDir = path7.join(projectRoot, dir);
|
|
2448
2502
|
if (!fs7.existsSync(fullDir)) {
|
|
2449
2503
|
fs7.mkdirSync(fullDir, { recursive: true });
|
|
2450
2504
|
}
|
|
2451
|
-
if (role === "compressor") {
|
|
2452
|
-
const target2 = path7.join(fullDir, "current.md");
|
|
2453
|
-
fs7.writeFileSync(target2, markdown);
|
|
2454
|
-
return target2;
|
|
2455
|
-
}
|
|
2456
2505
|
const expSlug = context.experiment?.slug ?? "general";
|
|
2457
|
-
const
|
|
2458
|
-
const
|
|
2459
|
-
const filename = role === "builder" ? `${nextNum}-${expSlug}.md` : `${nextNum}-${role}-${expSlug}.md`;
|
|
2506
|
+
const nextNum = String(context.experiment?.id ?? 1).padStart(3, "0");
|
|
2507
|
+
const filename = `${nextNum}-${role}-${expSlug}.md`;
|
|
2460
2508
|
const target = path7.join(fullDir, filename);
|
|
2461
2509
|
fs7.writeFileSync(target, markdown);
|
|
2462
2510
|
return target;
|
|
@@ -2472,12 +2520,12 @@ var init_spawn = __esm({
|
|
|
2472
2520
|
init_connection();
|
|
2473
2521
|
ROLE_MAX_TURNS = {
|
|
2474
2522
|
builder: 50,
|
|
2475
|
-
critic:
|
|
2476
|
-
adversary:
|
|
2477
|
-
verifier:
|
|
2478
|
-
compressor:
|
|
2479
|
-
reframer:
|
|
2480
|
-
scout:
|
|
2523
|
+
critic: 30,
|
|
2524
|
+
adversary: 30,
|
|
2525
|
+
verifier: 50,
|
|
2526
|
+
compressor: 30,
|
|
2527
|
+
reframer: 20,
|
|
2528
|
+
scout: 20
|
|
2481
2529
|
};
|
|
2482
2530
|
DIM2 = "\x1B[2m";
|
|
2483
2531
|
RESET2 = "\x1B[0m";
|
|
@@ -2493,9 +2541,19 @@ function worstGrade(grades) {
|
|
|
2493
2541
|
return "sound";
|
|
2494
2542
|
}
|
|
2495
2543
|
async function resolve(db, exp, projectRoot) {
|
|
2496
|
-
|
|
2544
|
+
let grades = getVerificationsByExperiment(db, exp.id);
|
|
2497
2545
|
if (grades.length === 0) {
|
|
2498
|
-
|
|
2546
|
+
warn(`No verification records for ${exp.slug}. Defaulting to weak.`);
|
|
2547
|
+
insertVerification(
|
|
2548
|
+
db,
|
|
2549
|
+
exp.id,
|
|
2550
|
+
"auto-default",
|
|
2551
|
+
"weak",
|
|
2552
|
+
null,
|
|
2553
|
+
null,
|
|
2554
|
+
"No structured verification output. Auto-defaulted to weak."
|
|
2555
|
+
);
|
|
2556
|
+
grades = getVerificationsByExperiment(db, exp.id);
|
|
2499
2557
|
}
|
|
2500
2558
|
const overallGrade = worstGrade(grades);
|
|
2501
2559
|
switch (overallGrade) {
|
|
@@ -2678,8 +2736,16 @@ ${builderGuidance}
|
|
|
2678
2736
|
Build the experiment: ${exp.hypothesis}` : `Build the experiment: ${exp.hypothesis}`
|
|
2679
2737
|
}, root);
|
|
2680
2738
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
2681
|
-
|
|
2682
|
-
|
|
2739
|
+
if (result.truncated && !result.structured) {
|
|
2740
|
+
warn(`Builder was truncated (hit max turns) without producing structured output.`);
|
|
2741
|
+
await spawnRecovery("builder", result.output, {
|
|
2742
|
+
experiment: { id: exp.id, slug: exp.slug, hypothesis: exp.hypothesis, status: "building", sub_type: exp.sub_type, builder_guidance: null }
|
|
2743
|
+
}, root);
|
|
2744
|
+
warn(`Experiment stays at 'building'. Run \`majlis build\` to retry or \`majlis revert\` to abandon.`);
|
|
2745
|
+
} else {
|
|
2746
|
+
updateExperimentStatus(db, exp.id, "built");
|
|
2747
|
+
success(`Build complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis challenge\` next.`);
|
|
2748
|
+
}
|
|
2683
2749
|
}
|
|
2684
2750
|
async function doChallenge(db, exp, root) {
|
|
2685
2751
|
transition(exp.status, "challenged" /* CHALLENGED */);
|
|
@@ -2695,8 +2761,12 @@ async function doChallenge(db, exp, root) {
|
|
|
2695
2761
|
taskPrompt: `Construct adversarial test cases for experiment ${exp.slug}: ${exp.hypothesis}`
|
|
2696
2762
|
}, root);
|
|
2697
2763
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
2698
|
-
|
|
2699
|
-
|
|
2764
|
+
if (result.truncated && !result.structured) {
|
|
2765
|
+
warn(`Adversary was truncated without structured output. Experiment stays at current status.`);
|
|
2766
|
+
} else {
|
|
2767
|
+
updateExperimentStatus(db, exp.id, "challenged");
|
|
2768
|
+
success(`Challenge complete for ${exp.slug}. Run \`majlis doubt\` or \`majlis verify\` next.`);
|
|
2769
|
+
}
|
|
2700
2770
|
}
|
|
2701
2771
|
async function doDoubt(db, exp, root) {
|
|
2702
2772
|
transition(exp.status, "doubted" /* DOUBTED */);
|
|
@@ -2713,8 +2783,12 @@ async function doDoubt(db, exp, root) {
|
|
|
2713
2783
|
taskPrompt: `Doubt the work in experiment ${exp.slug}: ${exp.hypothesis}. Produce a doubt document with evidence for each doubt.`
|
|
2714
2784
|
}, root);
|
|
2715
2785
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
2716
|
-
|
|
2717
|
-
|
|
2786
|
+
if (result.truncated && !result.structured) {
|
|
2787
|
+
warn(`Critic was truncated without structured output. Experiment stays at current status.`);
|
|
2788
|
+
} else {
|
|
2789
|
+
updateExperimentStatus(db, exp.id, "doubted");
|
|
2790
|
+
success(`Doubt pass complete for ${exp.slug}. Run \`majlis challenge\` or \`majlis verify\` next.`);
|
|
2791
|
+
}
|
|
2718
2792
|
}
|
|
2719
2793
|
async function doScout(db, exp, root) {
|
|
2720
2794
|
transition(exp.status, "scouted" /* SCOUTED */);
|
|
@@ -2762,6 +2836,10 @@ async function doVerify(db, exp, root) {
|
|
|
2762
2836
|
taskPrompt: `Verify experiment ${exp.slug}: ${exp.hypothesis}. Check provenance and content. Test the ${doubts.length} doubt(s) and any adversarial challenges.`
|
|
2763
2837
|
}, root);
|
|
2764
2838
|
ingestStructuredOutput(db, exp.id, result.structured);
|
|
2839
|
+
if (result.truncated && !result.structured) {
|
|
2840
|
+
warn(`Verifier was truncated without structured output. Experiment stays at 'verifying'.`);
|
|
2841
|
+
return;
|
|
2842
|
+
}
|
|
2765
2843
|
if (result.structured?.doubt_resolutions) {
|
|
2766
2844
|
for (const dr of result.structured.doubt_resolutions) {
|
|
2767
2845
|
if (dr.doubt_id && dr.resolution) {
|
|
@@ -3241,7 +3319,16 @@ async function run(args) {
|
|
|
3241
3319
|
continue;
|
|
3242
3320
|
}
|
|
3243
3321
|
info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
|
|
3244
|
-
|
|
3322
|
+
try {
|
|
3323
|
+
await next([exp.slug], false);
|
|
3324
|
+
} catch (err) {
|
|
3325
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
3326
|
+
warn(`Step failed for ${exp.slug}: ${message}`);
|
|
3327
|
+
try {
|
|
3328
|
+
updateExperimentStatus(db, exp.id, "dead_end");
|
|
3329
|
+
} catch {
|
|
3330
|
+
}
|
|
3331
|
+
}
|
|
3245
3332
|
}
|
|
3246
3333
|
if (stepCount >= MAX_STEPS) {
|
|
3247
3334
|
warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
|