majlis 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +73 -40
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -522,24 +522,35 @@ Before building:
|
|
|
522
522
|
3. Check docs/classification/ for problem taxonomy
|
|
523
523
|
4. Check docs/experiments/ for prior work
|
|
524
524
|
|
|
525
|
-
|
|
525
|
+
Read as much code as you need to understand the problem. Reading is free \u2014 spend
|
|
526
|
+
as many turns as necessary on Read, Grep, and Glob to build full context before
|
|
527
|
+
you touch anything.
|
|
528
|
+
|
|
529
|
+
## The Rule: ONE Change, Then Document
|
|
530
|
+
|
|
531
|
+
You make ONE code change per cycle. Not two, not "one more quick fix." ONE.
|
|
526
532
|
|
|
527
|
-
|
|
528
|
-
1. Read and
|
|
529
|
-
2.
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
533
|
+
The sequence:
|
|
534
|
+
1. **Read and understand** \u2014 read synthesis, dead-ends, source code. Take your time.
|
|
535
|
+
2. **Write the experiment doc FIRST** \u2014 before coding, fill in the Approach section
|
|
536
|
+
with what you plan to do and why. This ensures there is always a record.
|
|
537
|
+
3. **Implement ONE focused change** \u2014 a single coherent edit to the codebase.
|
|
538
|
+
4. **Run the benchmark ONCE** \u2014 observe the result.
|
|
539
|
+
5. **Update the experiment doc** \u2014 fill in Results and Metrics with what happened.
|
|
540
|
+
6. **Output the majlis-json block** \u2014 your structured decisions.
|
|
541
|
+
7. **STOP.**
|
|
535
542
|
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
The cycle will come back to you with their insights.
|
|
543
|
+
If your change doesn't work, document what happened and STOP. Do NOT try to fix it.
|
|
544
|
+
Do NOT iterate. Do NOT "try one more thing." The adversary, critic, and verifier
|
|
545
|
+
exist to diagnose what went wrong. The cycle comes back to you with their insights.
|
|
540
546
|
|
|
541
|
-
If you find yourself wanting to
|
|
542
|
-
and write up what you learned.
|
|
547
|
+
If you find yourself wanting to debug your own fix, that's the signal to stop
|
|
548
|
+
and write up what you learned.
|
|
549
|
+
|
|
550
|
+
## Off-limits (DO NOT modify)
|
|
551
|
+
- \`fixtures/\` \u2014 test data, ground truth, STL files. Read-only.
|
|
552
|
+
- \`scripts/benchmark.py\` \u2014 the measurement tool. Never change how you're measured.
|
|
553
|
+
- \`.majlis/\` \u2014 framework config. Not your concern.
|
|
543
554
|
|
|
544
555
|
## During building:
|
|
545
556
|
- Tag EVERY decision: proof / test / strong-consensus / consensus / analogy / judgment
|
|
@@ -2294,19 +2305,20 @@ ${contextJson}
|
|
|
2294
2305
|
|
|
2295
2306
|
${taskPrompt}`;
|
|
2296
2307
|
const turns = ROLE_MAX_TURNS[role] ?? 15;
|
|
2297
|
-
console.log(`[
|
|
2308
|
+
console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
|
|
2298
2309
|
const { text: markdown, costUsd } = await runQuery({
|
|
2299
2310
|
prompt,
|
|
2300
2311
|
model: agentDef.model,
|
|
2301
2312
|
tools: agentDef.tools,
|
|
2302
2313
|
systemPrompt: agentDef.systemPrompt,
|
|
2303
2314
|
cwd: root,
|
|
2304
|
-
maxTurns: turns
|
|
2315
|
+
maxTurns: turns,
|
|
2316
|
+
label: role
|
|
2305
2317
|
});
|
|
2306
|
-
console.log(`[
|
|
2318
|
+
console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
2307
2319
|
const artifactPath = writeArtifact(role, context, markdown, root);
|
|
2308
2320
|
if (artifactPath) {
|
|
2309
|
-
console.log(`[
|
|
2321
|
+
console.log(`[${role}] Artifact written to ${artifactPath}`);
|
|
2310
2322
|
}
|
|
2311
2323
|
const structured = await extractStructuredData(role, markdown);
|
|
2312
2324
|
return { output: markdown, structured };
|
|
@@ -2323,20 +2335,21 @@ ${contextJson}
|
|
|
2323
2335
|
|
|
2324
2336
|
${taskPrompt}`;
|
|
2325
2337
|
const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
|
|
2326
|
-
console.log(`[
|
|
2338
|
+
console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
|
|
2327
2339
|
const { text: markdown, costUsd } = await runQuery({
|
|
2328
2340
|
prompt,
|
|
2329
2341
|
model: "opus",
|
|
2330
2342
|
tools: ["Read", "Glob", "Grep"],
|
|
2331
2343
|
systemPrompt,
|
|
2332
2344
|
cwd: root,
|
|
2333
|
-
maxTurns: 5
|
|
2345
|
+
maxTurns: 5,
|
|
2346
|
+
label: "synthesiser"
|
|
2334
2347
|
});
|
|
2335
|
-
console.log(`[
|
|
2336
|
-
|
|
2337
|
-
return { output: markdown, structured };
|
|
2348
|
+
console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
|
|
2349
|
+
return { output: markdown, structured: { guidance: markdown } };
|
|
2338
2350
|
}
|
|
2339
2351
|
async function runQuery(opts) {
|
|
2352
|
+
const tag = opts.label ?? "majlis";
|
|
2340
2353
|
const conversation = (0, import_claude_agent_sdk2.query)({
|
|
2341
2354
|
prompt: opts.prompt,
|
|
2342
2355
|
options: {
|
|
@@ -2370,21 +2383,21 @@ async function runQuery(opts) {
|
|
|
2370
2383
|
const toolName = block.name ?? "tool";
|
|
2371
2384
|
const input = block.input ?? {};
|
|
2372
2385
|
const detail = formatToolDetail(toolName, input);
|
|
2373
|
-
process.stderr.write(`${DIM2}[
|
|
2386
|
+
process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
|
|
2374
2387
|
`);
|
|
2375
2388
|
}
|
|
2376
2389
|
}
|
|
2377
2390
|
if (hasText) {
|
|
2378
2391
|
const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
|
|
2379
2392
|
if (preview) {
|
|
2380
|
-
process.stderr.write(`${DIM2}[
|
|
2393
|
+
process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
|
|
2381
2394
|
`);
|
|
2382
2395
|
}
|
|
2383
2396
|
}
|
|
2384
2397
|
} else if (message.type === "tool_progress") {
|
|
2385
2398
|
const elapsed = Math.round(message.elapsed_time_seconds);
|
|
2386
2399
|
if (elapsed > 0 && elapsed % 5 === 0) {
|
|
2387
|
-
process.stderr.write(`${DIM2}[
|
|
2400
|
+
process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
|
|
2388
2401
|
`);
|
|
2389
2402
|
}
|
|
2390
2403
|
} else if (message.type === "result") {
|
|
@@ -2392,7 +2405,7 @@ async function runQuery(opts) {
|
|
|
2392
2405
|
costUsd = message.total_cost_usd;
|
|
2393
2406
|
} else if (message.subtype === "error_max_turns") {
|
|
2394
2407
|
costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
|
|
2395
|
-
console.warn(`[
|
|
2408
|
+
console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
|
|
2396
2409
|
} else {
|
|
2397
2410
|
const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
|
|
2398
2411
|
throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
|
|
@@ -2444,7 +2457,7 @@ function writeArtifact(role, context, markdown, projectRoot) {
|
|
|
2444
2457
|
}
|
|
2445
2458
|
const expSlug = context.experiment?.slug ?? "general";
|
|
2446
2459
|
const existing = fs7.readdirSync(fullDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
|
|
2447
|
-
const nextNum = String(existing.length + 1).padStart(3, "0");
|
|
2460
|
+
const nextNum = String(context.experiment?.id ?? existing.length + 1).padStart(3, "0");
|
|
2448
2461
|
const filename = role === "builder" ? `${nextNum}-${expSlug}.md` : `${nextNum}-${role}-${expSlug}.md`;
|
|
2449
2462
|
const target = path7.join(fullDir, filename);
|
|
2450
2463
|
fs7.writeFileSync(target, markdown);
|
|
@@ -2460,13 +2473,13 @@ var init_spawn = __esm({
|
|
|
2460
2473
|
init_parse();
|
|
2461
2474
|
init_connection();
|
|
2462
2475
|
ROLE_MAX_TURNS = {
|
|
2463
|
-
builder:
|
|
2464
|
-
critic:
|
|
2465
|
-
adversary:
|
|
2466
|
-
verifier:
|
|
2467
|
-
compressor:
|
|
2468
|
-
reframer:
|
|
2469
|
-
scout:
|
|
2476
|
+
builder: 50,
|
|
2477
|
+
critic: 30,
|
|
2478
|
+
adversary: 30,
|
|
2479
|
+
verifier: 50,
|
|
2480
|
+
compressor: 30,
|
|
2481
|
+
reframer: 20,
|
|
2482
|
+
scout: 20
|
|
2470
2483
|
};
|
|
2471
2484
|
DIM2 = "\x1B[2m";
|
|
2472
2485
|
RESET2 = "\x1B[0m";
|
|
@@ -2482,9 +2495,19 @@ function worstGrade(grades) {
|
|
|
2482
2495
|
return "sound";
|
|
2483
2496
|
}
|
|
2484
2497
|
async function resolve(db, exp, projectRoot) {
|
|
2485
|
-
|
|
2498
|
+
let grades = getVerificationsByExperiment(db, exp.id);
|
|
2486
2499
|
if (grades.length === 0) {
|
|
2487
|
-
|
|
2500
|
+
warn(`No verification records for ${exp.slug}. Defaulting to weak.`);
|
|
2501
|
+
insertVerification(
|
|
2502
|
+
db,
|
|
2503
|
+
exp.id,
|
|
2504
|
+
"auto-default",
|
|
2505
|
+
"weak",
|
|
2506
|
+
null,
|
|
2507
|
+
null,
|
|
2508
|
+
"No structured verification output. Auto-defaulted to weak."
|
|
2509
|
+
);
|
|
2510
|
+
grades = getVerificationsByExperiment(db, exp.id);
|
|
2488
2511
|
}
|
|
2489
2512
|
const overallGrade = worstGrade(grades);
|
|
2490
2513
|
switch (overallGrade) {
|
|
@@ -3230,7 +3253,16 @@ async function run(args) {
|
|
|
3230
3253
|
continue;
|
|
3231
3254
|
}
|
|
3232
3255
|
info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
|
|
3233
|
-
|
|
3256
|
+
try {
|
|
3257
|
+
await next([exp.slug], false);
|
|
3258
|
+
} catch (err) {
|
|
3259
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
3260
|
+
warn(`Step failed for ${exp.slug}: ${message}`);
|
|
3261
|
+
try {
|
|
3262
|
+
updateExperimentStatus(db, exp.id, "dead_end");
|
|
3263
|
+
} catch {
|
|
3264
|
+
}
|
|
3265
|
+
}
|
|
3234
3266
|
}
|
|
3235
3267
|
if (stepCount >= MAX_STEPS) {
|
|
3236
3268
|
warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
|
|
@@ -3286,7 +3318,8 @@ ${deadEnds.map((d) => `- ${d.approach}: ${d.why_failed} [constraint: ${d.structu
|
|
|
3286
3318
|
3. If NO \u2014 propose the SINGLE most promising next experiment hypothesis.
|
|
3287
3319
|
- It must NOT repeat a dead-ended approach (check the dead-end registry!)
|
|
3288
3320
|
- It should attack the weakest point revealed by synthesis/fragility
|
|
3289
|
-
- It must be specific and actionable \u2014 name the
|
|
3321
|
+
- It must be specific and actionable \u2014 name the function or mechanism to change
|
|
3322
|
+
- Do NOT reference specific line numbers \u2014 they shift between experiments
|
|
3290
3323
|
- The hypothesis should be a single sentence describing what to do, e.g.:
|
|
3291
3324
|
"Activate addSeamEdges() in the runEdgeFirst pipeline for full-revolution cylinder faces"
|
|
3292
3325
|
|