create-majlis 0.4.5 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +138 -24
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -96,7 +96,9 @@ function configTemplate(answers) {
|
|
|
96
96
|
adversary: "opus",
|
|
97
97
|
verifier: "opus",
|
|
98
98
|
reframer: "opus",
|
|
99
|
-
compressor: "opus"
|
|
99
|
+
compressor: "opus",
|
|
100
|
+
gatekeeper: "sonnet",
|
|
101
|
+
scout: "opus"
|
|
100
102
|
}
|
|
101
103
|
}, null, 2);
|
|
102
104
|
}
|
|
@@ -155,11 +157,20 @@ NEVER use \`git stash\`, \`git checkout\`, \`git reset\`, or any git command tha
|
|
|
155
157
|
the working tree or index. The \`.majlis/majlis.db\` database is in the working tree \u2014
|
|
156
158
|
these commands will corrupt framework state. Use \`git diff\` and \`git show\` for read-only comparison.
|
|
157
159
|
|
|
160
|
+
## Confirmed Doubts
|
|
161
|
+
If your context includes confirmedDoubts, these are weaknesses that the verifier has
|
|
162
|
+
confirmed from a previous cycle. You MUST address each one. Do not ignore them \u2014
|
|
163
|
+
the verifier will check again.
|
|
164
|
+
|
|
165
|
+
## Metrics
|
|
166
|
+
The framework captures baseline and post-build metrics automatically. Do NOT claim
|
|
167
|
+
specific metric numbers unless quoting framework output. Do NOT run the benchmark
|
|
168
|
+
yourself unless instructed to. If you need to verify your change works, do a minimal
|
|
169
|
+
targeted test, not a full benchmark run.
|
|
170
|
+
|
|
158
171
|
## During building:
|
|
159
172
|
- Tag EVERY decision: proof / test / strong-consensus / consensus / analogy / judgment
|
|
160
173
|
- When making judgment-level decisions, state: "This is judgment \u2014 reasoning without precedent"
|
|
161
|
-
- Run baseline metrics BEFORE making changes
|
|
162
|
-
- Run comparison metrics AFTER making changes (once)
|
|
163
174
|
|
|
164
175
|
## CRITICAL: You MUST finish cleanly.
|
|
165
176
|
|
|
@@ -194,8 +205,14 @@ tools: [Read, Glob, Grep]
|
|
|
194
205
|
---
|
|
195
206
|
You are the Critic. You practise constructive doubt.
|
|
196
207
|
|
|
197
|
-
You receive
|
|
198
|
-
|
|
208
|
+
You receive:
|
|
209
|
+
- The builder's experiment document (the artifact, not the reasoning chain)
|
|
210
|
+
- The current synthesis (project state)
|
|
211
|
+
- Dead-ends (approaches that have been tried and failed)
|
|
212
|
+
- The hypothesis and experiment metadata
|
|
213
|
+
|
|
214
|
+
You do NOT see the builder's reasoning chain \u2014 only their documented output.
|
|
215
|
+
Use the experiment doc, synthesis, and dead-ends to find weaknesses.
|
|
199
216
|
|
|
200
217
|
For each doubt:
|
|
201
218
|
- What specific claim, decision, or assumption you doubt
|
|
@@ -226,6 +243,13 @@ tools: [Read, Glob, Grep]
|
|
|
226
243
|
You are the Adversary. You do NOT review code for bugs.
|
|
227
244
|
You reason about problem structure to CONSTRUCT pathological cases.
|
|
228
245
|
|
|
246
|
+
You receive:
|
|
247
|
+
- The git diff of the builder's code changes (the actual code, not prose)
|
|
248
|
+
- The current synthesis (project state)
|
|
249
|
+
- The hypothesis and experiment metadata
|
|
250
|
+
|
|
251
|
+
Study the CODE DIFF carefully \u2014 that is where the builder's assumptions are exposed.
|
|
252
|
+
|
|
229
253
|
For each approach the builder takes, ask:
|
|
230
254
|
- What input would make this fail?
|
|
231
255
|
- What boundary condition was not tested?
|
|
@@ -252,6 +276,12 @@ tools: [Read, Glob, Grep, Bash]
|
|
|
252
276
|
---
|
|
253
277
|
You are the Verifier. Perform dual verification:
|
|
254
278
|
|
|
279
|
+
You receive:
|
|
280
|
+
- All doubts with explicit DOUBT-{id} identifiers (use these in your doubt_resolutions)
|
|
281
|
+
- Challenge documents from the adversary
|
|
282
|
+
- Framework-captured metrics (baseline vs post-build) \u2014 this is GROUND TRUTH
|
|
283
|
+
- The hypothesis and experiment metadata
|
|
284
|
+
|
|
255
285
|
## Scope Constraint (CRITICAL)
|
|
256
286
|
|
|
257
287
|
You must produce your structured output (grades + doubt resolutions) within your turn budget.
|
|
@@ -261,6 +291,11 @@ Reserve your final turns for writing the structured majlis-json output.
|
|
|
261
291
|
|
|
262
292
|
The framework saves your output automatically. Do NOT attempt to write files.
|
|
263
293
|
|
|
294
|
+
## Metrics (GROUND TRUTH)
|
|
295
|
+
If framework-captured metrics are in your context, these are the canonical before/after numbers.
|
|
296
|
+
Do NOT trust numbers claimed by the builder \u2014 compare against the framework metrics.
|
|
297
|
+
If the builder claims improvement but the framework metrics show regression, flag this.
|
|
298
|
+
|
|
264
299
|
## Git Safety (CRITICAL)
|
|
265
300
|
|
|
266
301
|
NEVER use \`git stash\`, \`git checkout\`, \`git reset\`, or any git command that modifies
|
|
@@ -289,13 +324,17 @@ Grade each component: sound / good / weak / rejected
|
|
|
289
324
|
Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
|
|
290
325
|
|
|
291
326
|
## Structured Output Format
|
|
327
|
+
IMPORTANT: For doubt_resolutions, use the DOUBT-{id} numbers from your context.
|
|
328
|
+
Example: if your context lists "DOUBT-7: [critical] The algorithm fails on X",
|
|
329
|
+
use doubt_id: 7 in your output.
|
|
330
|
+
|
|
292
331
|
<!-- majlis-json
|
|
293
332
|
{
|
|
294
333
|
"grades": [
|
|
295
334
|
{ "component": "...", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "..." }
|
|
296
335
|
],
|
|
297
336
|
"doubt_resolutions": [
|
|
298
|
-
{ "doubt_id":
|
|
337
|
+
{ "doubt_id": 7, "resolution": "confirmed|dismissed|inconclusive" }
|
|
299
338
|
]
|
|
300
339
|
}
|
|
301
340
|
-->`,
|
|
@@ -321,7 +360,18 @@ Compare your decomposition with the existing classification.
|
|
|
321
360
|
Flag structural divergences \u2014 these are the most valuable signals.
|
|
322
361
|
|
|
323
362
|
Produce your reframe document as output. Do NOT attempt to write files.
|
|
324
|
-
The framework saves your output automatically
|
|
363
|
+
The framework saves your output automatically.
|
|
364
|
+
|
|
365
|
+
## Structured Output Format
|
|
366
|
+
<!-- majlis-json
|
|
367
|
+
{
|
|
368
|
+
"reframe": {
|
|
369
|
+
"decomposition": "How you decomposed the problem",
|
|
370
|
+
"divergences": ["List of structural divergences from current classification"],
|
|
371
|
+
"recommendation": "What should change based on your independent analysis"
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
-->`,
|
|
325
375
|
compressor: `---
|
|
326
376
|
name: compressor
|
|
327
377
|
model: opus
|
|
@@ -329,25 +379,80 @@ tools: [Read, Write, Edit, Glob, Grep]
|
|
|
329
379
|
---
|
|
330
380
|
You are the Compressor. Hold the entire project in view and compress it.
|
|
331
381
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
382
|
+
Your taskPrompt includes a "Structured Data (CANONICAL)" section exported directly
|
|
383
|
+
from the SQLite database. This is the source of truth. docs/ files are agent artifacts
|
|
384
|
+
that may contain stale or incorrect information. Cross-reference everything against
|
|
385
|
+
the database export.
|
|
386
|
+
|
|
387
|
+
1. Read the database export in your context FIRST \u2014 it has all experiments, decisions,
|
|
388
|
+
doubts (with resolutions), verifications (with grades), challenges, and dead-ends.
|
|
389
|
+
2. Read docs/ files for narrative context, but trust the database when they conflict.
|
|
390
|
+
3. Cross-reference: same question in different language? contradicting decisions?
|
|
335
391
|
workaround masking root cause?
|
|
336
|
-
|
|
392
|
+
4. Update fragility map: thin coverage, weak components, untested judgment
|
|
337
393
|
decisions, broken provenance.
|
|
338
|
-
|
|
339
|
-
|
|
394
|
+
5. Update dead-end registry: compress rejected experiments into structural constraints.
|
|
395
|
+
Mark each dead-end as [structural] or [procedural].
|
|
396
|
+
6. REWRITE synthesis using the Write tool \u2014 shorter and denser. If it's growing,
|
|
340
397
|
you're accumulating, not compressing. You MUST use the Write tool to update
|
|
341
398
|
docs/synthesis/current.md, docs/synthesis/fragility.md, and docs/synthesis/dead-ends.md.
|
|
342
399
|
The framework does NOT auto-save your output for these files.
|
|
343
|
-
|
|
400
|
+
7. Review classification: new sub-types? resolved sub-types?
|
|
344
401
|
|
|
345
402
|
You may NOT write code, make decisions, or run experiments.
|
|
346
403
|
|
|
347
404
|
## Structured Output Format
|
|
348
405
|
<!-- majlis-json
|
|
349
406
|
{
|
|
350
|
-
"
|
|
407
|
+
"compression_report": {
|
|
408
|
+
"synthesis_delta": "What changed in synthesis and why",
|
|
409
|
+
"new_dead_ends": ["List of newly identified dead-end constraints"],
|
|
410
|
+
"fragility_changes": ["List of changes to the fragility map"]
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
-->`,
|
|
414
|
+
gatekeeper: `---
|
|
415
|
+
name: gatekeeper
|
|
416
|
+
model: sonnet
|
|
417
|
+
tools: [Read, Glob, Grep]
|
|
418
|
+
---
|
|
419
|
+
You are the Gatekeeper. You check hypotheses before expensive build cycles.
|
|
420
|
+
|
|
421
|
+
Your job is a fast quality gate \u2014 prevent wasted Opus builds on hypotheses that
|
|
422
|
+
are stale, redundant with dead-ends, or too vague to produce a focused change.
|
|
423
|
+
|
|
424
|
+
## Checks (in order)
|
|
425
|
+
|
|
426
|
+
### 1. Stale References
|
|
427
|
+
Does the hypothesis reference specific functions, line numbers, or structures that
|
|
428
|
+
may not exist in the current code? Read the relevant files to verify.
|
|
429
|
+
- If references are stale, list them in stale_references.
|
|
430
|
+
|
|
431
|
+
### 2. Dead-End Overlap
|
|
432
|
+
Does this hypothesis repeat an approach already ruled out by structural dead-ends?
|
|
433
|
+
Check each structural dead-end in your context \u2014 if the hypothesis matches the
|
|
434
|
+
approach or violates the structural_constraint, flag it.
|
|
435
|
+
- If overlapping, list the dead-end IDs in overlapping_dead_ends.
|
|
436
|
+
|
|
437
|
+
### 3. Scope Check
|
|
438
|
+
Is this a single focused change? A good hypothesis names ONE function, mechanism,
|
|
439
|
+
or parameter to change. A bad hypothesis says "improve X and also Y and also Z."
|
|
440
|
+
- Flag if the hypothesis tries to do multiple things.
|
|
441
|
+
|
|
442
|
+
## Output
|
|
443
|
+
|
|
444
|
+
gate_decision:
|
|
445
|
+
- **approve** \u2014 all checks pass, proceed to build
|
|
446
|
+
- **flag** \u2014 concerns found but not blocking (warnings only)
|
|
447
|
+
- **reject** \u2014 hypothesis must be revised (stale refs, dead-end repeat, or too vague)
|
|
448
|
+
|
|
449
|
+
## Structured Output Format
|
|
450
|
+
<!-- majlis-json
|
|
451
|
+
{
|
|
452
|
+
"gate_decision": "approve|reject|flag",
|
|
453
|
+
"reason": "Brief explanation of decision",
|
|
454
|
+
"stale_references": ["list of stale references found, if any"],
|
|
455
|
+
"overlapping_dead_ends": [0]
|
|
351
456
|
}
|
|
352
457
|
-->`,
|
|
353
458
|
scout: `---
|
|
@@ -360,6 +465,11 @@ You are the Scout. You practise rihla \u2014 travel in search of knowledge.
|
|
|
360
465
|
Your job is to search externally for alternative approaches, contradictory evidence,
|
|
361
466
|
and perspectives from other fields that could inform the current experiment.
|
|
362
467
|
|
|
468
|
+
You receive:
|
|
469
|
+
- The current synthesis and fragility map
|
|
470
|
+
- Dead-ends (approaches that have been tried and failed) \u2014 search for alternatives that circumvent these
|
|
471
|
+
- The hypothesis and experiment metadata
|
|
472
|
+
|
|
363
473
|
For the given experiment:
|
|
364
474
|
1. Describe the problem in domain-neutral terms
|
|
365
475
|
2. Search for alternative approaches in other fields or frameworks
|
|
@@ -370,13 +480,16 @@ For the given experiment:
|
|
|
370
480
|
Rules:
|
|
371
481
|
- Present findings neutrally. Report each approach on its own terms.
|
|
372
482
|
- Note where external approaches contradict the current one \u2014 these are the most valuable signals.
|
|
483
|
+
- Focus on approaches that CIRCUMVENT known dead-ends \u2014 these are the most valuable.
|
|
373
484
|
- You may NOT modify code or make decisions. Produce your rihla document as output only.
|
|
374
485
|
- Do NOT attempt to write files. The framework saves your output automatically.
|
|
375
486
|
|
|
376
487
|
## Structured Output Format
|
|
377
488
|
<!-- majlis-json
|
|
378
489
|
{
|
|
379
|
-
"
|
|
490
|
+
"findings": [
|
|
491
|
+
{ "approach": "Name of alternative approach", "source": "Where you found it", "relevance": "How it applies", "contradicts_current": true }
|
|
492
|
+
]
|
|
380
493
|
}
|
|
381
494
|
-->`
|
|
382
495
|
};
|
|
@@ -519,13 +632,14 @@ var WORKFLOW_MD = `# Majlis Workflow \u2014 Quick Reference
|
|
|
519
632
|
\`\`\`
|
|
520
633
|
1. CLASSIFY \u2192 Taxonomy before solution (Al-Khwarizmi)
|
|
521
634
|
2. REFRAME \u2192 Independent decomposition (Al-Biruni)
|
|
522
|
-
3.
|
|
523
|
-
4.
|
|
524
|
-
5.
|
|
525
|
-
6.
|
|
526
|
-
7.
|
|
527
|
-
8.
|
|
528
|
-
9.
|
|
635
|
+
3. GATE \u2192 Hypothesis quality check ('Ilm al-'Ilal)
|
|
636
|
+
4. BUILD \u2192 Write code with tagged decisions (Ijtihad)
|
|
637
|
+
5. CHALLENGE \u2192 Construct breaking inputs (Ibn al-Haytham)
|
|
638
|
+
6. DOUBT \u2192 Systematic challenge with evidence (Shukuk)
|
|
639
|
+
7. SCOUT \u2192 External search for alternatives (Rihla)
|
|
640
|
+
8. VERIFY \u2192 Provenance + content checks (Isnad + Matn)
|
|
641
|
+
9. RESOLVE \u2192 Route based on grades
|
|
642
|
+
10. COMPRESS \u2192 Shorter and denser (Hifz)
|
|
529
643
|
\`\`\`
|
|
530
644
|
|
|
531
645
|
## Resolution
|
|
@@ -859,7 +973,7 @@ function scaffoldInit(targetDir, answers, noHooks, minimal) {
|
|
|
859
973
|
console.log(' majlis session start "First session"\n');
|
|
860
974
|
}
|
|
861
975
|
function scaffoldMajlisFiles(projectRoot, answers, noHooks, minimal) {
|
|
862
|
-
const agentNames = minimal ? ["builder", "critic", "verifier", "compressor"] : ["builder", "critic", "adversary", "verifier", "reframer", "compressor", "scout"];
|
|
976
|
+
const agentNames = minimal ? ["builder", "critic", "verifier", "compressor", "gatekeeper"] : ["builder", "critic", "adversary", "verifier", "reframer", "compressor", "scout", "gatekeeper"];
|
|
863
977
|
const majlisDir = path.join(projectRoot, ".majlis");
|
|
864
978
|
mkdirSafe(majlisDir);
|
|
865
979
|
const configPath = path.join(majlisDir, "config.json");
|