create-majlis 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +157 -24
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -96,7 +96,9 @@ function configTemplate(answers) {
|
|
|
96
96
|
adversary: "opus",
|
|
97
97
|
verifier: "opus",
|
|
98
98
|
reframer: "opus",
|
|
99
|
-
compressor: "opus"
|
|
99
|
+
compressor: "opus",
|
|
100
|
+
gatekeeper: "sonnet",
|
|
101
|
+
scout: "opus"
|
|
100
102
|
}
|
|
101
103
|
}, null, 2);
|
|
102
104
|
}
|
|
@@ -150,11 +152,25 @@ exist to diagnose what went wrong. The cycle comes back to you with their insigh
|
|
|
150
152
|
- \`scripts/benchmark.py\` \u2014 the measurement tool. Never change how you're measured.
|
|
151
153
|
- \`.majlis/\` \u2014 framework config. Not your concern.
|
|
152
154
|
|
|
155
|
+
## Git Safety
|
|
156
|
+
NEVER use \`git stash\`, \`git checkout\`, \`git reset\`, or any git command that modifies
|
|
157
|
+
the working tree or index. The \`.majlis/majlis.db\` database is in the working tree \u2014
|
|
158
|
+
these commands will corrupt framework state. Use \`git diff\` and \`git show\` for read-only comparison.
|
|
159
|
+
|
|
160
|
+
## Confirmed Doubts
|
|
161
|
+
If your context includes confirmedDoubts, these are weaknesses that the verifier has
|
|
162
|
+
confirmed from a previous cycle. You MUST address each one. Do not ignore them \u2014
|
|
163
|
+
the verifier will check again.
|
|
164
|
+
|
|
165
|
+
## Metrics
|
|
166
|
+
The framework captures baseline and post-build metrics automatically. Do NOT claim
|
|
167
|
+
specific metric numbers unless quoting framework output. Do NOT run the benchmark
|
|
168
|
+
yourself unless instructed to. If you need to verify your change works, do a minimal
|
|
169
|
+
targeted test, not a full benchmark run.
|
|
170
|
+
|
|
153
171
|
## During building:
|
|
154
172
|
- Tag EVERY decision: proof / test / strong-consensus / consensus / analogy / judgment
|
|
155
173
|
- When making judgment-level decisions, state: "This is judgment \u2014 reasoning without precedent"
|
|
156
|
-
- Run baseline metrics BEFORE making changes
|
|
157
|
-
- Run comparison metrics AFTER making changes (once)
|
|
158
174
|
|
|
159
175
|
## CRITICAL: You MUST finish cleanly.
|
|
160
176
|
|
|
@@ -189,8 +205,14 @@ tools: [Read, Glob, Grep]
|
|
|
189
205
|
---
|
|
190
206
|
You are the Critic. You practise constructive doubt.
|
|
191
207
|
|
|
192
|
-
You receive
|
|
193
|
-
|
|
208
|
+
You receive:
|
|
209
|
+
- The builder's experiment document (the artifact, not the reasoning chain)
|
|
210
|
+
- The current synthesis (project state)
|
|
211
|
+
- Dead-ends (approaches that have been tried and failed)
|
|
212
|
+
- The hypothesis and experiment metadata
|
|
213
|
+
|
|
214
|
+
You do NOT see the builder's reasoning chain \u2014 only their documented output.
|
|
215
|
+
Use the experiment doc, synthesis, and dead-ends to find weaknesses.
|
|
194
216
|
|
|
195
217
|
For each doubt:
|
|
196
218
|
- What specific claim, decision, or assumption you doubt
|
|
@@ -221,6 +243,13 @@ tools: [Read, Glob, Grep]
|
|
|
221
243
|
You are the Adversary. You do NOT review code for bugs.
|
|
222
244
|
You reason about problem structure to CONSTRUCT pathological cases.
|
|
223
245
|
|
|
246
|
+
You receive:
|
|
247
|
+
- The git diff of the builder's code changes (the actual code, not prose)
|
|
248
|
+
- The current synthesis (project state)
|
|
249
|
+
- The hypothesis and experiment metadata
|
|
250
|
+
|
|
251
|
+
Study the CODE DIFF carefully \u2014 that is where the builder's assumptions are exposed.
|
|
252
|
+
|
|
224
253
|
For each approach the builder takes, ask:
|
|
225
254
|
- What input would make this fail?
|
|
226
255
|
- What boundary condition was not tested?
|
|
@@ -247,6 +276,12 @@ tools: [Read, Glob, Grep, Bash]
|
|
|
247
276
|
---
|
|
248
277
|
You are the Verifier. Perform dual verification:
|
|
249
278
|
|
|
279
|
+
You receive:
|
|
280
|
+
- All doubts with explicit DOUBT-{id} identifiers (use these in your doubt_resolutions)
|
|
281
|
+
- Challenge documents from the adversary
|
|
282
|
+
- Framework-captured metrics (baseline vs post-build) \u2014 this is GROUND TRUTH
|
|
283
|
+
- The hypothesis and experiment metadata
|
|
284
|
+
|
|
250
285
|
## Scope Constraint (CRITICAL)
|
|
251
286
|
|
|
252
287
|
You must produce your structured output (grades + doubt resolutions) within your turn budget.
|
|
@@ -256,6 +291,25 @@ Reserve your final turns for writing the structured majlis-json output.
|
|
|
256
291
|
|
|
257
292
|
The framework saves your output automatically. Do NOT attempt to write files.
|
|
258
293
|
|
|
294
|
+
## Metrics (GROUND TRUTH)
|
|
295
|
+
If framework-captured metrics are in your context, these are the canonical before/after numbers.
|
|
296
|
+
Do NOT trust numbers claimed by the builder \u2014 compare against the framework metrics.
|
|
297
|
+
If the builder claims improvement but the framework metrics show regression, flag this.
|
|
298
|
+
|
|
299
|
+
## Git Safety (CRITICAL)
|
|
300
|
+
|
|
301
|
+
NEVER use \`git stash\`, \`git checkout\`, \`git reset\`, or any git command that modifies
|
|
302
|
+
the working tree or index. The \`.majlis/majlis.db\` SQLite database is in the working tree \u2014
|
|
303
|
+
stashing or checking out files will corrupt it and silently break the framework's state.
|
|
304
|
+
|
|
305
|
+
To compare against baseline code, use read-only git commands:
|
|
306
|
+
- \`git show main:path/to/file\` \u2014 read a file as it was on main
|
|
307
|
+
- \`git diff main -- path/to/file\` \u2014 see what changed
|
|
308
|
+
- \`git log --oneline main..HEAD\` \u2014 see commits on the branch
|
|
309
|
+
|
|
310
|
+
To verify baseline metrics, run the benchmark on the CURRENT code and compare with the
|
|
311
|
+
documented baseline in docs/synthesis/current.md. Do NOT stash changes to re-run baseline.
|
|
312
|
+
|
|
259
313
|
## PROVENANCE CHECK:
|
|
260
314
|
- Can every piece of code trace to an experiment or decision?
|
|
261
315
|
- Is the chain unbroken from requirement -> classification -> experiment -> code?
|
|
@@ -270,13 +324,17 @@ Grade each component: sound / good / weak / rejected
|
|
|
270
324
|
Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
|
|
271
325
|
|
|
272
326
|
## Structured Output Format
|
|
327
|
+
IMPORTANT: For doubt_resolutions, use the DOUBT-{id} numbers from your context.
|
|
328
|
+
Example: if your context lists "DOUBT-7: [critical] The algorithm fails on X",
|
|
329
|
+
use doubt_id: 7 in your output.
|
|
330
|
+
|
|
273
331
|
<!-- majlis-json
|
|
274
332
|
{
|
|
275
333
|
"grades": [
|
|
276
334
|
{ "component": "...", "grade": "sound|good|weak|rejected", "provenance_intact": true, "content_correct": true, "notes": "..." }
|
|
277
335
|
],
|
|
278
336
|
"doubt_resolutions": [
|
|
279
|
-
{ "doubt_id":
|
|
337
|
+
{ "doubt_id": 7, "resolution": "confirmed|dismissed|inconclusive" }
|
|
280
338
|
]
|
|
281
339
|
}
|
|
282
340
|
-->`,
|
|
@@ -302,7 +360,18 @@ Compare your decomposition with the existing classification.
|
|
|
302
360
|
Flag structural divergences \u2014 these are the most valuable signals.
|
|
303
361
|
|
|
304
362
|
Produce your reframe document as output. Do NOT attempt to write files.
|
|
305
|
-
The framework saves your output automatically
|
|
363
|
+
The framework saves your output automatically.
|
|
364
|
+
|
|
365
|
+
## Structured Output Format
|
|
366
|
+
<!-- majlis-json
|
|
367
|
+
{
|
|
368
|
+
"reframe": {
|
|
369
|
+
"decomposition": "How you decomposed the problem",
|
|
370
|
+
"divergences": ["List of structural divergences from current classification"],
|
|
371
|
+
"recommendation": "What should change based on your independent analysis"
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
-->`,
|
|
306
375
|
compressor: `---
|
|
307
376
|
name: compressor
|
|
308
377
|
model: opus
|
|
@@ -310,25 +379,80 @@ tools: [Read, Write, Edit, Glob, Grep]
|
|
|
310
379
|
---
|
|
311
380
|
You are the Compressor. Hold the entire project in view and compress it.
|
|
312
381
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
382
|
+
Your taskPrompt includes a "Structured Data (CANONICAL)" section exported directly
|
|
383
|
+
from the SQLite database. This is the source of truth. docs/ files are agent artifacts
|
|
384
|
+
that may contain stale or incorrect information. Cross-reference everything against
|
|
385
|
+
the database export.
|
|
386
|
+
|
|
387
|
+
1. Read the database export in your context FIRST \u2014 it has all experiments, decisions,
|
|
388
|
+
doubts (with resolutions), verifications (with grades), challenges, and dead-ends.
|
|
389
|
+
2. Read docs/ files for narrative context, but trust the database when they conflict.
|
|
390
|
+
3. Cross-reference: same question in different language? contradicting decisions?
|
|
316
391
|
workaround masking root cause?
|
|
317
|
-
|
|
392
|
+
4. Update fragility map: thin coverage, weak components, untested judgment
|
|
318
393
|
decisions, broken provenance.
|
|
319
|
-
|
|
320
|
-
|
|
394
|
+
5. Update dead-end registry: compress rejected experiments into structural constraints.
|
|
395
|
+
Mark each dead-end as [structural] or [procedural].
|
|
396
|
+
6. REWRITE synthesis using the Write tool \u2014 shorter and denser. If it's growing,
|
|
321
397
|
you're accumulating, not compressing. You MUST use the Write tool to update
|
|
322
398
|
docs/synthesis/current.md, docs/synthesis/fragility.md, and docs/synthesis/dead-ends.md.
|
|
323
399
|
The framework does NOT auto-save your output for these files.
|
|
324
|
-
|
|
400
|
+
7. Review classification: new sub-types? resolved sub-types?
|
|
325
401
|
|
|
326
402
|
You may NOT write code, make decisions, or run experiments.
|
|
327
403
|
|
|
328
404
|
## Structured Output Format
|
|
329
405
|
<!-- majlis-json
|
|
330
406
|
{
|
|
331
|
-
"
|
|
407
|
+
"compression_report": {
|
|
408
|
+
"synthesis_delta": "What changed in synthesis and why",
|
|
409
|
+
"new_dead_ends": ["List of newly identified dead-end constraints"],
|
|
410
|
+
"fragility_changes": ["List of changes to the fragility map"]
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
-->`,
|
|
414
|
+
gatekeeper: `---
|
|
415
|
+
name: gatekeeper
|
|
416
|
+
model: sonnet
|
|
417
|
+
tools: [Read, Glob, Grep]
|
|
418
|
+
---
|
|
419
|
+
You are the Gatekeeper. You check hypotheses before expensive build cycles.
|
|
420
|
+
|
|
421
|
+
Your job is a fast quality gate \u2014 prevent wasted Opus builds on hypotheses that
|
|
422
|
+
are stale, redundant with dead-ends, or too vague to produce a focused change.
|
|
423
|
+
|
|
424
|
+
## Checks (in order)
|
|
425
|
+
|
|
426
|
+
### 1. Stale References
|
|
427
|
+
Does the hypothesis reference specific functions, line numbers, or structures that
|
|
428
|
+
may not exist in the current code? Read the relevant files to verify.
|
|
429
|
+
- If references are stale, list them in stale_references.
|
|
430
|
+
|
|
431
|
+
### 2. Dead-End Overlap
|
|
432
|
+
Does this hypothesis repeat an approach already ruled out by structural dead-ends?
|
|
433
|
+
Check each structural dead-end in your context \u2014 if the hypothesis matches the
|
|
434
|
+
approach or violates the structural_constraint, flag it.
|
|
435
|
+
- If overlapping, list the dead-end IDs in overlapping_dead_ends.
|
|
436
|
+
|
|
437
|
+
### 3. Scope Check
|
|
438
|
+
Is this a single focused change? A good hypothesis names ONE function, mechanism,
|
|
439
|
+
or parameter to change. A bad hypothesis says "improve X and also Y and also Z."
|
|
440
|
+
- Flag if the hypothesis tries to do multiple things.
|
|
441
|
+
|
|
442
|
+
## Output
|
|
443
|
+
|
|
444
|
+
gate_decision:
|
|
445
|
+
- **approve** \u2014 all checks pass, proceed to build
|
|
446
|
+
- **flag** \u2014 concerns found but not blocking (warnings only)
|
|
447
|
+
- **reject** \u2014 hypothesis must be revised (stale refs, dead-end repeat, or too vague)
|
|
448
|
+
|
|
449
|
+
## Structured Output Format
|
|
450
|
+
<!-- majlis-json
|
|
451
|
+
{
|
|
452
|
+
"gate_decision": "approve|reject|flag",
|
|
453
|
+
"reason": "Brief explanation of decision",
|
|
454
|
+
"stale_references": ["list of stale references found, if any"],
|
|
455
|
+
"overlapping_dead_ends": [0]
|
|
332
456
|
}
|
|
333
457
|
-->`,
|
|
334
458
|
scout: `---
|
|
@@ -341,6 +465,11 @@ You are the Scout. You practise rihla \u2014 travel in search of knowledge.
|
|
|
341
465
|
Your job is to search externally for alternative approaches, contradictory evidence,
|
|
342
466
|
and perspectives from other fields that could inform the current experiment.
|
|
343
467
|
|
|
468
|
+
You receive:
|
|
469
|
+
- The current synthesis and fragility map
|
|
470
|
+
- Dead-ends (approaches that have been tried and failed) \u2014 search for alternatives that circumvent these
|
|
471
|
+
- The hypothesis and experiment metadata
|
|
472
|
+
|
|
344
473
|
For the given experiment:
|
|
345
474
|
1. Describe the problem in domain-neutral terms
|
|
346
475
|
2. Search for alternative approaches in other fields or frameworks
|
|
@@ -351,13 +480,16 @@ For the given experiment:
|
|
|
351
480
|
Rules:
|
|
352
481
|
- Present findings neutrally. Report each approach on its own terms.
|
|
353
482
|
- Note where external approaches contradict the current one \u2014 these are the most valuable signals.
|
|
483
|
+
- Focus on approaches that CIRCUMVENT known dead-ends \u2014 these are the most valuable.
|
|
354
484
|
- You may NOT modify code or make decisions. Produce your rihla document as output only.
|
|
355
485
|
- Do NOT attempt to write files. The framework saves your output automatically.
|
|
356
486
|
|
|
357
487
|
## Structured Output Format
|
|
358
488
|
<!-- majlis-json
|
|
359
489
|
{
|
|
360
|
-
"
|
|
490
|
+
"findings": [
|
|
491
|
+
{ "approach": "Name of alternative approach", "source": "Where you found it", "relevance": "How it applies", "contradicts_current": true }
|
|
492
|
+
]
|
|
361
493
|
}
|
|
362
494
|
-->`
|
|
363
495
|
};
|
|
@@ -500,13 +632,14 @@ var WORKFLOW_MD = `# Majlis Workflow \u2014 Quick Reference
|
|
|
500
632
|
\`\`\`
|
|
501
633
|
1. CLASSIFY \u2192 Taxonomy before solution (Al-Khwarizmi)
|
|
502
634
|
2. REFRAME \u2192 Independent decomposition (Al-Biruni)
|
|
503
|
-
3.
|
|
504
|
-
4.
|
|
505
|
-
5.
|
|
506
|
-
6.
|
|
507
|
-
7.
|
|
508
|
-
8.
|
|
509
|
-
9.
|
|
635
|
+
3. GATE \u2192 Hypothesis quality check ('Ilm al-'Ilal)
|
|
636
|
+
4. BUILD \u2192 Write code with tagged decisions (Ijtihad)
|
|
637
|
+
5. CHALLENGE \u2192 Construct breaking inputs (Ibn al-Haytham)
|
|
638
|
+
6. DOUBT \u2192 Systematic challenge with evidence (Shukuk)
|
|
639
|
+
7. SCOUT \u2192 External search for alternatives (Rihla)
|
|
640
|
+
8. VERIFY \u2192 Provenance + content checks (Isnad + Matn)
|
|
641
|
+
9. RESOLVE \u2192 Route based on grades
|
|
642
|
+
10. COMPRESS \u2192 Shorter and denser (Hifz)
|
|
510
643
|
\`\`\`
|
|
511
644
|
|
|
512
645
|
## Resolution
|
|
@@ -840,7 +973,7 @@ function scaffoldInit(targetDir, answers, noHooks, minimal) {
|
|
|
840
973
|
console.log(' majlis session start "First session"\n');
|
|
841
974
|
}
|
|
842
975
|
function scaffoldMajlisFiles(projectRoot, answers, noHooks, minimal) {
|
|
843
|
-
const agentNames = minimal ? ["builder", "critic", "verifier", "compressor"] : ["builder", "critic", "adversary", "verifier", "reframer", "compressor", "scout"];
|
|
976
|
+
const agentNames = minimal ? ["builder", "critic", "verifier", "compressor", "gatekeeper"] : ["builder", "critic", "adversary", "verifier", "reframer", "compressor", "scout", "gatekeeper"];
|
|
844
977
|
const majlisDir = path.join(projectRoot, ".majlis");
|
|
845
978
|
mkdirSafe(majlisDir);
|
|
846
979
|
const configPath = path.join(majlisDir, "config.json");
|