majlis 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +73 -40
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -522,24 +522,35 @@ Before building:
522
522
  3. Check docs/classification/ for problem taxonomy
523
523
  4. Check docs/experiments/ for prior work
524
524
 
525
- ## Scope Constraint (CRITICAL)
525
+ Read as much code as you need to understand the problem. Reading is free \u2014 spend
526
+ as many turns as necessary on Read, Grep, and Glob to build full context before
527
+ you touch anything.
528
+
529
+ ## The Rule: ONE Change, Then Document
530
+
531
+ You make ONE code change per cycle. Not two, not "one more quick fix." ONE.
526
532
 
527
- You get ONE attempt per cycle. Your job is:
528
- 1. Read and diagnose \u2014 understand the problem thoroughly
529
- 2. Form ONE hypothesis about what to fix
530
- 3. Implement ONE focused change (not a multi-step debug session)
531
- 4. Run the benchmark ONCE to see the result
532
- 5. Update the experiment doc in docs/experiments/ \u2014 fill in Approach, Results, and Metrics sections. This is NOT optional.
533
- 6. Output the structured majlis-json block with your decisions
534
- 7. STOP
533
+ The sequence:
534
+ 1. **Read and understand** \u2014 read synthesis, dead-ends, source code. Take your time.
535
+ 2. **Write the experiment doc FIRST** \u2014 before coding, fill in the Approach section
536
+ with what you plan to do and why. This ensures there is always a record.
537
+ 3. **Implement ONE focused change** \u2014 a single coherent edit to the codebase.
538
+ 4. **Run the benchmark ONCE** \u2014 observe the result.
539
+ 5. **Update the experiment doc** \u2014 fill in Results and Metrics with what happened.
540
+ 6. **Output the majlis-json block** \u2014 your structured decisions.
541
+ 7. **STOP.**
535
542
 
536
- Do NOT iterate. Do NOT try multiple approaches. Do NOT debug your own fix.
537
- If your change doesn't work, document why and let the cycle continue \u2014
538
- the adversary, critic, and verifier will help diagnose what went wrong.
539
- The cycle will come back to you with their insights.
543
+ If your change doesn't work, document what happened and STOP. Do NOT try to fix it.
544
+ Do NOT iterate. Do NOT "try one more thing." The adversary, critic, and verifier
545
+ exist to diagnose what went wrong. The cycle comes back to you with their insights.
540
546
 
541
- If you find yourself wanting to "try one more thing," that's the signal to stop
542
- and write up what you learned. The other agents exist precisely for this reason.
547
+ If you find yourself wanting to debug your own fix, that's the signal to stop
548
+ and write up what you learned.
549
+
550
+ ## Off-limits (DO NOT modify)
551
+ - \`fixtures/\` \u2014 test data, ground truth, STL files. Read-only.
552
+ - \`scripts/benchmark.py\` \u2014 the measurement tool. Never change how you're measured.
553
+ - \`.majlis/\` \u2014 framework config. Not your concern.
543
554
 
544
555
  ## During building:
545
556
  - Tag EVERY decision: proof / test / strong-consensus / consensus / analogy / judgment
@@ -2294,19 +2305,20 @@ ${contextJson}
2294
2305
 
2295
2306
  ${taskPrompt}`;
2296
2307
  const turns = ROLE_MAX_TURNS[role] ?? 15;
2297
- console.log(`[majlis] Spawning ${role} agent (model: ${agentDef.model}, maxTurns: ${turns})...`);
2308
+ console.log(`[${role}] Spawning (model: ${agentDef.model}, maxTurns: ${turns})...`);
2298
2309
  const { text: markdown, costUsd } = await runQuery({
2299
2310
  prompt,
2300
2311
  model: agentDef.model,
2301
2312
  tools: agentDef.tools,
2302
2313
  systemPrompt: agentDef.systemPrompt,
2303
2314
  cwd: root,
2304
- maxTurns: turns
2315
+ maxTurns: turns,
2316
+ label: role
2305
2317
  });
2306
- console.log(`[majlis] ${role} agent complete (cost: $${costUsd.toFixed(4)})`);
2318
+ console.log(`[${role}] Complete (cost: $${costUsd.toFixed(4)})`);
2307
2319
  const artifactPath = writeArtifact(role, context, markdown, root);
2308
2320
  if (artifactPath) {
2309
- console.log(`[majlis] ${role} artifact written to ${artifactPath}`);
2321
+ console.log(`[${role}] Artifact written to ${artifactPath}`);
2310
2322
  }
2311
2323
  const structured = await extractStructuredData(role, markdown);
2312
2324
  return { output: markdown, structured };
@@ -2323,20 +2335,21 @@ ${contextJson}
2323
2335
 
2324
2336
  ${taskPrompt}`;
2325
2337
  const systemPrompt = 'You are a Synthesis Agent. Be concrete: which decisions failed, which assumptions broke, what constraints must the next approach satisfy. CRITICAL: Your LAST line of output MUST be a <!-- majlis-json --> block. The framework parses this programmatically \u2014 if you omit it, the pipeline breaks. Format: <!-- majlis-json {"guidance": "your guidance here"} -->';
2326
- console.log(`[majlis] Spawning synthesiser micro-agent...`);
2338
+ console.log(`[synthesiser] Spawning (maxTurns: 5)...`);
2327
2339
  const { text: markdown, costUsd } = await runQuery({
2328
2340
  prompt,
2329
2341
  model: "opus",
2330
2342
  tools: ["Read", "Glob", "Grep"],
2331
2343
  systemPrompt,
2332
2344
  cwd: root,
2333
- maxTurns: 5
2345
+ maxTurns: 5,
2346
+ label: "synthesiser"
2334
2347
  });
2335
- console.log(`[majlis] Synthesiser complete (cost: $${costUsd.toFixed(4)})`);
2336
- const structured = await extractStructuredData("synthesiser", markdown);
2337
- return { output: markdown, structured };
2348
+ console.log(`[synthesiser] Complete (cost: $${costUsd.toFixed(4)})`);
2349
+ return { output: markdown, structured: { guidance: markdown } };
2338
2350
  }
2339
2351
  async function runQuery(opts) {
2352
+ const tag = opts.label ?? "majlis";
2340
2353
  const conversation = (0, import_claude_agent_sdk2.query)({
2341
2354
  prompt: opts.prompt,
2342
2355
  options: {
@@ -2370,21 +2383,21 @@ async function runQuery(opts) {
2370
2383
  const toolName = block.name ?? "tool";
2371
2384
  const input = block.input ?? {};
2372
2385
  const detail = formatToolDetail(toolName, input);
2373
- process.stderr.write(`${DIM2}[majlis] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
2386
+ process.stderr.write(`${DIM2}[${tag}] ${CYAN2}${toolName}${RESET2}${DIM2}${detail}${RESET2}
2374
2387
  `);
2375
2388
  }
2376
2389
  }
2377
2390
  if (hasText) {
2378
2391
  const preview = textParts[textParts.length - 1].slice(0, 120).replace(/\n/g, " ").trim();
2379
2392
  if (preview) {
2380
- process.stderr.write(`${DIM2}[majlis] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
2393
+ process.stderr.write(`${DIM2}[${tag}] writing: ${preview}${preview.length >= 120 ? "..." : ""}${RESET2}
2381
2394
  `);
2382
2395
  }
2383
2396
  }
2384
2397
  } else if (message.type === "tool_progress") {
2385
2398
  const elapsed = Math.round(message.elapsed_time_seconds);
2386
2399
  if (elapsed > 0 && elapsed % 5 === 0) {
2387
- process.stderr.write(`${DIM2}[majlis] ${message.tool_name} running (${elapsed}s)...${RESET2}
2400
+ process.stderr.write(`${DIM2}[${tag}] ${message.tool_name} running (${elapsed}s)...${RESET2}
2388
2401
  `);
2389
2402
  }
2390
2403
  } else if (message.type === "result") {
@@ -2392,7 +2405,7 @@ async function runQuery(opts) {
2392
2405
  costUsd = message.total_cost_usd;
2393
2406
  } else if (message.subtype === "error_max_turns") {
2394
2407
  costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
2395
- console.warn(`[majlis] Agent hit max turns (${turnCount}). Returning partial output.`);
2408
+ console.warn(`[${tag}] Hit max turns (${turnCount}). Returning partial output.`);
2396
2409
  } else {
2397
2410
  const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
2398
2411
  throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
@@ -2444,7 +2457,7 @@ function writeArtifact(role, context, markdown, projectRoot) {
2444
2457
  }
2445
2458
  const expSlug = context.experiment?.slug ?? "general";
2446
2459
  const existing = fs7.readdirSync(fullDir).filter((f) => f.endsWith(".md") && !f.startsWith("_"));
2447
- const nextNum = String(existing.length + 1).padStart(3, "0");
2460
+ const nextNum = String(context.experiment?.id ?? existing.length + 1).padStart(3, "0");
2448
2461
  const filename = role === "builder" ? `${nextNum}-${expSlug}.md` : `${nextNum}-${role}-${expSlug}.md`;
2449
2462
  const target = path7.join(fullDir, filename);
2450
2463
  fs7.writeFileSync(target, markdown);
@@ -2460,13 +2473,13 @@ var init_spawn = __esm({
2460
2473
  init_parse();
2461
2474
  init_connection();
2462
2475
  ROLE_MAX_TURNS = {
2463
- builder: 15,
2464
- critic: 12,
2465
- adversary: 12,
2466
- verifier: 15,
2467
- compressor: 15,
2468
- reframer: 12,
2469
- scout: 12
2476
+ builder: 50,
2477
+ critic: 30,
2478
+ adversary: 30,
2479
+ verifier: 50,
2480
+ compressor: 30,
2481
+ reframer: 20,
2482
+ scout: 20
2470
2483
  };
2471
2484
  DIM2 = "\x1B[2m";
2472
2485
  RESET2 = "\x1B[0m";
@@ -2482,9 +2495,19 @@ function worstGrade(grades) {
2482
2495
  return "sound";
2483
2496
  }
2484
2497
  async function resolve(db, exp, projectRoot) {
2485
- const grades = getVerificationsByExperiment(db, exp.id);
2498
+ let grades = getVerificationsByExperiment(db, exp.id);
2486
2499
  if (grades.length === 0) {
2487
- throw new Error(`No verifications found for experiment ${exp.slug}. Run verify first.`);
2500
+ warn(`No verification records for ${exp.slug}. Defaulting to weak.`);
2501
+ insertVerification(
2502
+ db,
2503
+ exp.id,
2504
+ "auto-default",
2505
+ "weak",
2506
+ null,
2507
+ null,
2508
+ "No structured verification output. Auto-defaulted to weak."
2509
+ );
2510
+ grades = getVerificationsByExperiment(db, exp.id);
2488
2511
  }
2489
2512
  const overallGrade = worstGrade(grades);
2490
2513
  switch (overallGrade) {
@@ -3230,7 +3253,16 @@ async function run(args) {
3230
3253
  continue;
3231
3254
  }
3232
3255
  info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
3233
- await next([exp.slug], false);
3256
+ try {
3257
+ await next([exp.slug], false);
3258
+ } catch (err) {
3259
+ const message = err instanceof Error ? err.message : String(err);
3260
+ warn(`Step failed for ${exp.slug}: ${message}`);
3261
+ try {
3262
+ updateExperimentStatus(db, exp.id, "dead_end");
3263
+ } catch {
3264
+ }
3265
+ }
3234
3266
  }
3235
3267
  if (stepCount >= MAX_STEPS) {
3236
3268
  warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
@@ -3286,7 +3318,8 @@ ${deadEnds.map((d) => `- ${d.approach}: ${d.why_failed} [constraint: ${d.structu
3286
3318
  3. If NO \u2014 propose the SINGLE most promising next experiment hypothesis.
3287
3319
  - It must NOT repeat a dead-ended approach (check the dead-end registry!)
3288
3320
  - It should attack the weakest point revealed by synthesis/fragility
3289
- - It must be specific and actionable \u2014 name the exact code/function/mechanism to change
3321
+ - It must be specific and actionable \u2014 name the function or mechanism to change
3322
+ - Do NOT reference specific line numbers \u2014 they shift between experiments
3290
3323
  - The hypothesis should be a single sentence describing what to do, e.g.:
3291
3324
  "Activate addSeamEdges() in the runEdgeFirst pipeline for full-revolution cylinder faces"
3292
3325
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "majlis",
3
- "version": "0.4.1",
3
+ "version": "0.4.3",
4
4
  "description": "Multi-agent workflow CLI for structured doubt, independent verification, and compressed knowledge",
5
5
  "bin": {
6
6
  "majlis": "./dist/cli.js"