majlis 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +40 -16
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -522,13 +522,29 @@ Before building:
|
|
|
522
522
|
3. Check docs/classification/ for problem taxonomy
|
|
523
523
|
4. Check docs/experiments/ for prior work
|
|
524
524
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
525
|
+
## Scope Constraint (CRITICAL)
|
|
526
|
+
|
|
527
|
+
You get ONE attempt per cycle. Your job is:
|
|
528
|
+
1. Read and diagnose \u2014 understand the problem thoroughly
|
|
529
|
+
2. Form ONE hypothesis about what to fix
|
|
530
|
+
3. Implement ONE focused change (not a multi-step debug session)
|
|
531
|
+
4. Run the benchmark ONCE to see the result
|
|
532
|
+
5. Document what you did, what happened, and what you think the next step should be
|
|
533
|
+
6. STOP
|
|
534
|
+
|
|
535
|
+
Do NOT iterate. Do NOT try multiple approaches. Do NOT debug your own fix.
|
|
536
|
+
If your change doesn't work, document why and let the cycle continue \u2014
|
|
537
|
+
the adversary, critic, and verifier will help diagnose what went wrong.
|
|
538
|
+
The cycle will come back to you with their insights.
|
|
539
|
+
|
|
540
|
+
If you find yourself wanting to "try one more thing," that's the signal to stop
|
|
541
|
+
and write up what you learned. The other agents exist precisely for this reason.
|
|
542
|
+
|
|
543
|
+
## During building:
|
|
528
544
|
- Tag EVERY decision: proof / test / strong-consensus / consensus / analogy / judgment
|
|
529
545
|
- When making judgment-level decisions, state: "This is judgment \u2014 reasoning without precedent"
|
|
530
546
|
- Run baseline metrics BEFORE making changes
|
|
531
|
-
- Run comparison metrics AFTER making changes
|
|
547
|
+
- Run comparison metrics AFTER making changes (once)
|
|
532
548
|
|
|
533
549
|
You may NOT verify your own work or mark your own decisions as proven.
|
|
534
550
|
Output your decisions in structured format so they can be recorded in the database.
|
|
@@ -564,9 +580,8 @@ Rules:
|
|
|
564
580
|
- Every doubt MUST reference evidence. "This feels wrong" is not a doubt.
|
|
565
581
|
- You may NOT suggest fixes. Identify problems only.
|
|
566
582
|
- Focus on judgment and analogy-level decisions first.
|
|
567
|
-
- You may NOT modify any files. Produce
|
|
568
|
-
|
|
569
|
-
Write to docs/doubts/NNN-against-experiment-NNN.md
|
|
583
|
+
- You may NOT modify any files. Produce your doubt document as output only.
|
|
584
|
+
- Do NOT attempt to write files. The framework saves your output automatically.
|
|
570
585
|
|
|
571
586
|
## Structured Output Format
|
|
572
587
|
<!-- majlis-json
|
|
@@ -592,9 +607,8 @@ For each approach the builder takes, ask:
|
|
|
592
607
|
- Under what conditions do two things the builder treats as distinct become identical?
|
|
593
608
|
|
|
594
609
|
Produce constructed counterexamples with reasoning.
|
|
595
|
-
Do NOT suggest fixes. Do NOT modify files.
|
|
596
|
-
|
|
597
|
-
Write to docs/challenges/NNN-against-experiment-NNN.md
|
|
610
|
+
Do NOT suggest fixes. Do NOT modify files. Do NOT attempt to write files.
|
|
611
|
+
The framework saves your output automatically.
|
|
598
612
|
|
|
599
613
|
## Structured Output Format
|
|
600
614
|
<!-- majlis-json
|
|
@@ -624,7 +638,7 @@ CONTENT CHECK:
|
|
|
624
638
|
Grade each component: sound / good / weak / rejected
|
|
625
639
|
Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
|
|
626
640
|
|
|
627
|
-
|
|
641
|
+
Produce your verification report as output. The framework saves it automatically.
|
|
628
642
|
|
|
629
643
|
## Structured Output Format
|
|
630
644
|
<!-- majlis-json
|
|
@@ -658,7 +672,8 @@ Independently propose:
|
|
|
658
672
|
Compare your decomposition with the existing classification.
|
|
659
673
|
Flag structural divergences \u2014 these are the most valuable signals.
|
|
660
674
|
|
|
661
|
-
|
|
675
|
+
Produce your reframe document as output. Do NOT attempt to write files.
|
|
676
|
+
The framework saves your output automatically.`,
|
|
662
677
|
compressor: `---
|
|
663
678
|
name: compressor
|
|
664
679
|
model: opus
|
|
@@ -705,9 +720,8 @@ For the given experiment:
|
|
|
705
720
|
Rules:
|
|
706
721
|
- Present findings neutrally. Report each approach on its own terms.
|
|
707
722
|
- Note where external approaches contradict the current one \u2014 these are the most valuable signals.
|
|
708
|
-
- You may NOT modify code or make decisions. Produce
|
|
709
|
-
|
|
710
|
-
Write to docs/rihla/NNN-scout-for-experiment-NNN.md
|
|
723
|
+
- You may NOT modify code or make decisions. Produce your rihla document as output only.
|
|
724
|
+
- Do NOT attempt to write files. The framework saves your output automatically.
|
|
711
725
|
|
|
712
726
|
## Structured Output Format
|
|
713
727
|
<!-- majlis-json
|
|
@@ -2058,6 +2072,9 @@ function determineNextStep(exp, valid, hasDoubts2, hasChallenges2) {
|
|
|
2058
2072
|
throw new Error(`Experiment ${exp.slug} is terminal (${exp.status})`);
|
|
2059
2073
|
}
|
|
2060
2074
|
const status2 = exp.status;
|
|
2075
|
+
if (status2 === "classified" /* CLASSIFIED */) {
|
|
2076
|
+
return valid.includes("building" /* BUILDING */) ? "building" /* BUILDING */ : valid[0];
|
|
2077
|
+
}
|
|
2061
2078
|
if (status2 === "built" /* BUILT */ && !hasDoubts2) {
|
|
2062
2079
|
return valid.includes("doubted" /* DOUBTED */) ? "doubted" /* DOUBTED */ : valid[0];
|
|
2063
2080
|
}
|
|
@@ -2322,7 +2339,7 @@ async function runQuery(opts) {
|
|
|
2322
2339
|
cwd: opts.cwd,
|
|
2323
2340
|
permissionMode: "bypassPermissions",
|
|
2324
2341
|
allowDangerouslySkipPermissions: true,
|
|
2325
|
-
maxTurns:
|
|
2342
|
+
maxTurns: 30,
|
|
2326
2343
|
persistSession: false,
|
|
2327
2344
|
settingSources: ["project"]
|
|
2328
2345
|
}
|
|
@@ -2362,6 +2379,9 @@ async function runQuery(opts) {
|
|
|
2362
2379
|
} else if (message.type === "result") {
|
|
2363
2380
|
if (message.subtype === "success") {
|
|
2364
2381
|
costUsd = message.total_cost_usd;
|
|
2382
|
+
} else if (message.subtype === "error_max_turns") {
|
|
2383
|
+
costUsd = "total_cost_usd" in message ? message.total_cost_usd : 0;
|
|
2384
|
+
console.warn(`[majlis] Agent hit max turns (${turnCount}). Returning partial output.`);
|
|
2365
2385
|
} else {
|
|
2366
2386
|
const errors = "errors" in message ? message.errors?.join("; ") ?? "Unknown error" : "Unknown error";
|
|
2367
2387
|
throw new Error(`Agent query failed (${message.subtype}): ${errors}`);
|
|
@@ -3095,6 +3115,10 @@ async function executeStep(step, exp, root) {
|
|
|
3095
3115
|
case "compressed" /* COMPRESSED */:
|
|
3096
3116
|
await cycle("compress", []);
|
|
3097
3117
|
break;
|
|
3118
|
+
case "reframed" /* REFRAMED */:
|
|
3119
|
+
updateExperimentStatus(getDb(root), exp.id, "reframed");
|
|
3120
|
+
info(`Reframe acknowledged for ${exp.slug}. Proceeding to build.`);
|
|
3121
|
+
break;
|
|
3098
3122
|
default:
|
|
3099
3123
|
warn(`Don't know how to execute step: ${step}`);
|
|
3100
3124
|
}
|