majlis 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +185 -39
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -529,8 +529,9 @@ You get ONE attempt per cycle. Your job is:
|
|
|
529
529
|
2. Form ONE hypothesis about what to fix
|
|
530
530
|
3. Implement ONE focused change (not a multi-step debug session)
|
|
531
531
|
4. Run the benchmark ONCE to see the result
|
|
532
|
-
5.
|
|
533
|
-
6.
|
|
532
|
+
5. Update the experiment doc in docs/experiments/ \u2014 fill in Approach, Results, and Metrics sections. This is NOT optional.
|
|
533
|
+
6. Output the structured majlis-json block with your decisions
|
|
534
|
+
7. STOP
|
|
534
535
|
|
|
535
536
|
Do NOT iterate. Do NOT try multiple approaches. Do NOT debug your own fix.
|
|
536
537
|
If your change doesn't work, document why and let the cycle continue \u2014
|
|
@@ -625,21 +626,28 @@ tools: [Read, Glob, Grep, Bash]
|
|
|
625
626
|
---
|
|
626
627
|
You are the Verifier. Perform dual verification:
|
|
627
628
|
|
|
628
|
-
|
|
629
|
+
## Scope Constraint (CRITICAL)
|
|
630
|
+
|
|
631
|
+
You must produce your structured output (grades + doubt resolutions) within your turn budget.
|
|
632
|
+
Do NOT exhaustively test every doubt and challenge \u2014 prioritize the critical ones.
|
|
633
|
+
For each doubt/challenge: one targeted check is enough. Confirm, dismiss, or mark inconclusive.
|
|
634
|
+
Reserve your final turns for writing the structured majlis-json output.
|
|
635
|
+
|
|
636
|
+
The framework saves your output automatically. Do NOT attempt to write files.
|
|
637
|
+
|
|
638
|
+
## PROVENANCE CHECK:
|
|
629
639
|
- Can every piece of code trace to an experiment or decision?
|
|
630
640
|
- Is the chain unbroken from requirement -> classification -> experiment -> code?
|
|
631
641
|
- Flag any broken chains.
|
|
632
642
|
|
|
633
|
-
CONTENT CHECK:
|
|
643
|
+
## CONTENT CHECK:
|
|
634
644
|
- Does the code do what the experiment log says?
|
|
635
|
-
-
|
|
636
|
-
-
|
|
645
|
+
- Run at most 3-5 targeted diagnostic scripts, focused on the critical doubts/challenges.
|
|
646
|
+
- Do NOT run exhaustive diagnostics on every claim.
|
|
637
647
|
|
|
638
648
|
Grade each component: sound / good / weak / rejected
|
|
639
649
|
Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
|
|
640
650
|
|
|
641
|
-
Produce your verification report as output. The framework saves it automatically.
|
|
642
|
-
|
|
643
651
|
## Structured Output Format
|
|
644
652
|
<!-- majlis-json
|
|
645
653
|
{
|
|
@@ -3169,63 +3177,201 @@ async function run(args) {
|
|
|
3169
3177
|
throw new Error('Usage: majlis run "goal description"');
|
|
3170
3178
|
}
|
|
3171
3179
|
const db = getDb(root);
|
|
3172
|
-
const
|
|
3173
|
-
|
|
3174
|
-
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3180
|
+
const config = loadConfig7(root);
|
|
3181
|
+
const MAX_EXPERIMENTS = 10;
|
|
3182
|
+
const MAX_STEPS = 200;
|
|
3183
|
+
let experimentCount = 0;
|
|
3184
|
+
let stepCount = 0;
|
|
3185
|
+
header(`Autonomous Mode \u2014 ${goal}`);
|
|
3186
|
+
while (stepCount < MAX_STEPS && experimentCount < MAX_EXPERIMENTS) {
|
|
3187
|
+
stepCount++;
|
|
3188
|
+
let exp = getLatestExperiment(db);
|
|
3178
3189
|
if (!exp) {
|
|
3179
|
-
|
|
3180
|
-
|
|
3190
|
+
experimentCount++;
|
|
3191
|
+
if (experimentCount > MAX_EXPERIMENTS) {
|
|
3192
|
+
warn(`Reached max experiments (${MAX_EXPERIMENTS}). Stopping.`);
|
|
3193
|
+
break;
|
|
3194
|
+
}
|
|
3195
|
+
const sessionsSinceCompression = getSessionsSinceCompression(db);
|
|
3196
|
+
if (sessionsSinceCompression >= config.cycle.compression_interval) {
|
|
3197
|
+
info("Compressing before next experiment...");
|
|
3198
|
+
await cycle("compress", []);
|
|
3199
|
+
}
|
|
3200
|
+
info(`[Experiment ${experimentCount}/${MAX_EXPERIMENTS}] Planning next experiment...`);
|
|
3201
|
+
const hypothesis = await deriveNextHypothesis(goal, root, db);
|
|
3202
|
+
if (!hypothesis) {
|
|
3203
|
+
success("Planner says the goal has been met. Stopping.");
|
|
3204
|
+
break;
|
|
3205
|
+
}
|
|
3206
|
+
info(`Next hypothesis: ${hypothesis}`);
|
|
3207
|
+
exp = createNewExperiment(db, root, hypothesis);
|
|
3208
|
+
success(`Created experiment #${exp.id}: ${exp.slug}`);
|
|
3181
3209
|
}
|
|
3182
3210
|
if (isTerminal(exp.status)) {
|
|
3183
3211
|
if (exp.status === "merged") {
|
|
3184
|
-
success(`Experiment ${exp.slug} merged
|
|
3212
|
+
success(`Experiment ${exp.slug} merged.`);
|
|
3185
3213
|
} else if (exp.status === "dead_end") {
|
|
3186
|
-
info(`Experiment ${exp.slug}
|
|
3187
|
-
}
|
|
3188
|
-
const active = listActiveExperiments(db);
|
|
3189
|
-
if (active.length === 0) {
|
|
3190
|
-
info("No more active experiments. Goal assessment:");
|
|
3191
|
-
info(`Original goal: ${goal}`);
|
|
3192
|
-
warn("Review synthesis and metrics to determine if the goal has been met.");
|
|
3193
|
-
break;
|
|
3214
|
+
info(`Experiment ${exp.slug} dead-ended.`);
|
|
3194
3215
|
}
|
|
3195
|
-
info(`Continuing with experiment: ${active[0].slug}`);
|
|
3196
|
-
}
|
|
3197
|
-
info(`[Cycle ${cycleCount}/${MAX_CYCLES}] Processing ${exp.slug} (${exp.status})`);
|
|
3198
|
-
await next([exp.slug], false);
|
|
3199
|
-
const updated = getLatestExperiment(db);
|
|
3200
|
-
if (updated && isTerminal(updated.status)) {
|
|
3201
3216
|
continue;
|
|
3202
3217
|
}
|
|
3203
|
-
|
|
3204
|
-
|
|
3205
|
-
warn("Compression due during autonomous run \u2014 consider compressing.");
|
|
3206
|
-
}
|
|
3218
|
+
info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
|
|
3219
|
+
await next([exp.slug], false);
|
|
3207
3220
|
}
|
|
3208
|
-
if (
|
|
3209
|
-
warn(`Reached
|
|
3221
|
+
if (stepCount >= MAX_STEPS) {
|
|
3222
|
+
warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
|
|
3210
3223
|
}
|
|
3211
3224
|
header("Autonomous Mode Complete");
|
|
3212
3225
|
info(`Goal: ${goal}`);
|
|
3213
|
-
info(`
|
|
3226
|
+
info(`Experiments: ${experimentCount}, Steps: ${stepCount}`);
|
|
3214
3227
|
info("Run `majlis status` to see final state.");
|
|
3215
3228
|
}
|
|
3229
|
+
async function deriveNextHypothesis(goal, root, db) {
|
|
3230
|
+
const synthesis = readFileOrEmpty(path13.join(root, "docs", "synthesis", "current.md"));
|
|
3231
|
+
const fragility = readFileOrEmpty(path13.join(root, "docs", "synthesis", "fragility.md"));
|
|
3232
|
+
const deadEndsDoc = readFileOrEmpty(path13.join(root, "docs", "synthesis", "dead-ends.md"));
|
|
3233
|
+
const deadEnds = listAllDeadEnds(db);
|
|
3234
|
+
const config = loadConfig7(root);
|
|
3235
|
+
let metricsOutput = "";
|
|
3236
|
+
if (config.metrics?.command) {
|
|
3237
|
+
try {
|
|
3238
|
+
metricsOutput = (0, import_node_child_process4.execSync)(config.metrics.command, {
|
|
3239
|
+
cwd: root,
|
|
3240
|
+
encoding: "utf-8",
|
|
3241
|
+
timeout: 6e4,
|
|
3242
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
3243
|
+
}).trim();
|
|
3244
|
+
} catch {
|
|
3245
|
+
metricsOutput = "(metrics command failed)";
|
|
3246
|
+
}
|
|
3247
|
+
}
|
|
3248
|
+
const result = await spawnSynthesiser({
|
|
3249
|
+
taskPrompt: `You are the Planner for an autonomous Majlis run.
|
|
3250
|
+
|
|
3251
|
+
## Goal
|
|
3252
|
+
${goal}
|
|
3253
|
+
|
|
3254
|
+
## Current Metrics
|
|
3255
|
+
${metricsOutput || "(no metrics configured)"}
|
|
3256
|
+
|
|
3257
|
+
## Synthesis (what we know so far)
|
|
3258
|
+
${synthesis || "(empty \u2014 first experiment)"}
|
|
3259
|
+
|
|
3260
|
+
## Fragility Map (known weak areas)
|
|
3261
|
+
${fragility || "(none)"}
|
|
3262
|
+
|
|
3263
|
+
## Dead-End Registry
|
|
3264
|
+
${deadEndsDoc || "(none)"}
|
|
3265
|
+
|
|
3266
|
+
## Dead Ends (from DB \u2014 ${deadEnds.length} total)
|
|
3267
|
+
${deadEnds.map((d) => `- ${d.approach}: ${d.why_failed} [constraint: ${d.structural_constraint}]`).join("\n") || "(none)"}
|
|
3268
|
+
|
|
3269
|
+
## Your Task
|
|
3270
|
+
1. Assess: based on the metrics and synthesis, has the goal been met? Be specific.
|
|
3271
|
+
2. If YES \u2014 output: <!-- majlis-json {"goal_met": true, "hypothesis": null} -->
|
|
3272
|
+
3. If NO \u2014 propose the SINGLE most promising next experiment hypothesis.
|
|
3273
|
+
- It must NOT repeat a dead-ended approach
|
|
3274
|
+
- It should attack the weakest point revealed by synthesis/fragility
|
|
3275
|
+
- It should be specific and actionable (not vague)
|
|
3276
|
+
- Output: <!-- majlis-json {"goal_met": false, "hypothesis": "your hypothesis here"} -->
|
|
3277
|
+
|
|
3278
|
+
IMPORTANT: You MUST output the <!-- majlis-json --> block. This is how the framework reads your decision.`
|
|
3279
|
+
}, root);
|
|
3280
|
+
const structured = result.structured;
|
|
3281
|
+
if (structured?.goal_met === true) {
|
|
3282
|
+
return null;
|
|
3283
|
+
}
|
|
3284
|
+
if (structured?.hypothesis) {
|
|
3285
|
+
return structured.hypothesis;
|
|
3286
|
+
}
|
|
3287
|
+
const match = result.output.match(/hypothesis["\s:]+([^"}\n]+)/i);
|
|
3288
|
+
if (match) return match[1].trim();
|
|
3289
|
+
warn("Planner did not return a structured hypothesis. Using goal as fallback.");
|
|
3290
|
+
return goal;
|
|
3291
|
+
}
|
|
3292
|
+
function createNewExperiment(db, root, hypothesis) {
|
|
3293
|
+
const slug = slugify2(hypothesis);
|
|
3294
|
+
let finalSlug = slug;
|
|
3295
|
+
let attempt = 0;
|
|
3296
|
+
while (getExperimentBySlug(db, finalSlug)) {
|
|
3297
|
+
attempt++;
|
|
3298
|
+
finalSlug = `${slug}-${attempt}`;
|
|
3299
|
+
}
|
|
3300
|
+
const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
|
|
3301
|
+
const num = allExps.count + 1;
|
|
3302
|
+
const paddedNum = String(num).padStart(3, "0");
|
|
3303
|
+
const branch = `exp/${paddedNum}-${finalSlug}`;
|
|
3304
|
+
try {
|
|
3305
|
+
(0, import_node_child_process4.execSync)(`git checkout -b ${branch}`, {
|
|
3306
|
+
cwd: root,
|
|
3307
|
+
encoding: "utf-8",
|
|
3308
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
3309
|
+
});
|
|
3310
|
+
info(`Created branch: ${branch}`);
|
|
3311
|
+
} catch {
|
|
3312
|
+
warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
|
|
3313
|
+
}
|
|
3314
|
+
const exp = createExperiment(db, finalSlug, branch, hypothesis, null, null);
|
|
3315
|
+
const docsDir = path13.join(root, "docs", "experiments");
|
|
3316
|
+
const templatePath = path13.join(docsDir, "_TEMPLATE.md");
|
|
3317
|
+
if (fs13.existsSync(templatePath)) {
|
|
3318
|
+
const template = fs13.readFileSync(templatePath, "utf-8");
|
|
3319
|
+
const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
|
|
3320
|
+
const logPath = path13.join(docsDir, `${paddedNum}-${finalSlug}.md`);
|
|
3321
|
+
fs13.writeFileSync(logPath, logContent);
|
|
3322
|
+
info(`Created experiment log: docs/experiments/${paddedNum}-${finalSlug}.md`);
|
|
3323
|
+
}
|
|
3324
|
+
return exp;
|
|
3325
|
+
}
|
|
3326
|
+
function readFileOrEmpty(filePath) {
|
|
3327
|
+
try {
|
|
3328
|
+
return fs13.readFileSync(filePath, "utf-8");
|
|
3329
|
+
} catch {
|
|
3330
|
+
return "";
|
|
3331
|
+
}
|
|
3332
|
+
}
|
|
3333
|
+
function slugify2(text) {
|
|
3334
|
+
return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
|
|
3335
|
+
}
|
|
3336
|
+
function loadConfig7(projectRoot) {
|
|
3337
|
+
const configPath = path13.join(projectRoot, ".majlis", "config.json");
|
|
3338
|
+
if (!fs13.existsSync(configPath)) {
|
|
3339
|
+
return {
|
|
3340
|
+
project: { name: "", description: "", objective: "" },
|
|
3341
|
+
cycle: {
|
|
3342
|
+
compression_interval: 5,
|
|
3343
|
+
circuit_breaker_threshold: 3,
|
|
3344
|
+
require_doubt_before_verify: true,
|
|
3345
|
+
require_challenge_before_verify: false,
|
|
3346
|
+
auto_baseline_on_new_experiment: true
|
|
3347
|
+
}
|
|
3348
|
+
};
|
|
3349
|
+
}
|
|
3350
|
+
return JSON.parse(fs13.readFileSync(configPath, "utf-8"));
|
|
3351
|
+
}
|
|
3352
|
+
var fs13, path13, import_node_child_process4;
|
|
3216
3353
|
var init_run = __esm({
|
|
3217
3354
|
"src/commands/run.ts"() {
|
|
3218
3355
|
"use strict";
|
|
3356
|
+
fs13 = __toESM(require("fs"));
|
|
3357
|
+
path13 = __toESM(require("path"));
|
|
3358
|
+
import_node_child_process4 = require("child_process");
|
|
3219
3359
|
init_connection();
|
|
3220
3360
|
init_queries();
|
|
3221
3361
|
init_machine();
|
|
3222
3362
|
init_next();
|
|
3363
|
+
init_cycle();
|
|
3364
|
+
init_spawn();
|
|
3223
3365
|
init_format();
|
|
3224
3366
|
}
|
|
3225
3367
|
});
|
|
3226
3368
|
|
|
3227
3369
|
// src/cli.ts
|
|
3228
|
-
var
|
|
3370
|
+
var fs14 = __toESM(require("fs"));
|
|
3371
|
+
var path14 = __toESM(require("path"));
|
|
3372
|
+
var VERSION = JSON.parse(
|
|
3373
|
+
fs14.readFileSync(path14.join(__dirname, "..", "package.json"), "utf-8")
|
|
3374
|
+
).version;
|
|
3229
3375
|
async function main() {
|
|
3230
3376
|
const args = process.argv.slice(2);
|
|
3231
3377
|
if (args.includes("--version") || args.includes("-v")) {
|