majlis 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +185 -39
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -529,8 +529,9 @@ You get ONE attempt per cycle. Your job is:
529
529
  2. Form ONE hypothesis about what to fix
530
530
  3. Implement ONE focused change (not a multi-step debug session)
531
531
  4. Run the benchmark ONCE to see the result
532
- 5. Document what you did, what happened, and what you think the next step should be
533
- 6. STOP
532
+ 5. Update the experiment doc in docs/experiments/ \u2014 fill in Approach, Results, and Metrics sections. This is NOT optional.
533
+ 6. Output the structured majlis-json block with your decisions
534
+ 7. STOP
534
535
 
535
536
  Do NOT iterate. Do NOT try multiple approaches. Do NOT debug your own fix.
536
537
  If your change doesn't work, document why and let the cycle continue \u2014
@@ -625,21 +626,28 @@ tools: [Read, Glob, Grep, Bash]
625
626
  ---
626
627
  You are the Verifier. Perform dual verification:
627
628
 
628
- PROVENANCE CHECK:
629
+ ## Scope Constraint (CRITICAL)
630
+
631
+ You must produce your structured output (grades + doubt resolutions) within your turn budget.
632
+ Do NOT exhaustively test every doubt and challenge \u2014 prioritize the critical ones.
633
+ For each doubt/challenge: one targeted check is enough. Confirm, dismiss, or mark inconclusive.
634
+ Reserve your final turns for writing the structured majlis-json output.
635
+
636
+ The framework saves your output automatically. Do NOT attempt to write files.
637
+
638
+ ## PROVENANCE CHECK:
629
639
  - Can every piece of code trace to an experiment or decision?
630
640
  - Is the chain unbroken from requirement -> classification -> experiment -> code?
631
641
  - Flag any broken chains.
632
642
 
633
- CONTENT CHECK:
643
+ ## CONTENT CHECK:
634
644
  - Does the code do what the experiment log says?
635
- - Do tests demonstrate the hypothesis?
636
- - Write and run targeted tests against the critic's doubts AND the adversary's cases.
645
+ - Run at most 3-5 targeted diagnostic scripts, focused on the critical doubts/challenges.
646
+ - Do NOT run exhaustive diagnostics on every claim.
637
647
 
638
648
  Grade each component: sound / good / weak / rejected
639
649
  Grade each doubt/challenge: confirmed / dismissed (with evidence) / inconclusive
640
650
 
641
- Produce your verification report as output. The framework saves it automatically.
642
-
643
651
  ## Structured Output Format
644
652
  <!-- majlis-json
645
653
  {
@@ -3169,63 +3177,201 @@ async function run(args) {
3169
3177
  throw new Error('Usage: majlis run "goal description"');
3170
3178
  }
3171
3179
  const db = getDb(root);
3172
- const MAX_CYCLES = 50;
3173
- header(`Autonomous Mode \u2014 Goal: ${goal}`);
3174
- let cycleCount = 0;
3175
- while (cycleCount < MAX_CYCLES) {
3176
- cycleCount++;
3177
- const exp = getLatestExperiment(db);
3180
+ const config = loadConfig7(root);
3181
+ const MAX_EXPERIMENTS = 10;
3182
+ const MAX_STEPS = 200;
3183
+ let experimentCount = 0;
3184
+ let stepCount = 0;
3185
+ header(`Autonomous Mode \u2014 ${goal}`);
3186
+ while (stepCount < MAX_STEPS && experimentCount < MAX_EXPERIMENTS) {
3187
+ stepCount++;
3188
+ let exp = getLatestExperiment(db);
3178
3189
  if (!exp) {
3179
- warn('No active experiments. Create one with `majlis new "hypothesis"` and re-run.');
3180
- break;
3190
+ experimentCount++;
3191
+ if (experimentCount > MAX_EXPERIMENTS) {
3192
+ warn(`Reached max experiments (${MAX_EXPERIMENTS}). Stopping.`);
3193
+ break;
3194
+ }
3195
+ const sessionsSinceCompression = getSessionsSinceCompression(db);
3196
+ if (sessionsSinceCompression >= config.cycle.compression_interval) {
3197
+ info("Compressing before next experiment...");
3198
+ await cycle("compress", []);
3199
+ }
3200
+ info(`[Experiment ${experimentCount}/${MAX_EXPERIMENTS}] Planning next experiment...`);
3201
+ const hypothesis = await deriveNextHypothesis(goal, root, db);
3202
+ if (!hypothesis) {
3203
+ success("Planner says the goal has been met. Stopping.");
3204
+ break;
3205
+ }
3206
+ info(`Next hypothesis: ${hypothesis}`);
3207
+ exp = createNewExperiment(db, root, hypothesis);
3208
+ success(`Created experiment #${exp.id}: ${exp.slug}`);
3181
3209
  }
3182
3210
  if (isTerminal(exp.status)) {
3183
3211
  if (exp.status === "merged") {
3184
- success(`Experiment ${exp.slug} merged successfully.`);
3212
+ success(`Experiment ${exp.slug} merged.`);
3185
3213
  } else if (exp.status === "dead_end") {
3186
- info(`Experiment ${exp.slug} ended as dead-end.`);
3187
- }
3188
- const active = listActiveExperiments(db);
3189
- if (active.length === 0) {
3190
- info("No more active experiments. Goal assessment:");
3191
- info(`Original goal: ${goal}`);
3192
- warn("Review synthesis and metrics to determine if the goal has been met.");
3193
- break;
3214
+ info(`Experiment ${exp.slug} dead-ended.`);
3194
3215
  }
3195
- info(`Continuing with experiment: ${active[0].slug}`);
3196
- }
3197
- info(`[Cycle ${cycleCount}/${MAX_CYCLES}] Processing ${exp.slug} (${exp.status})`);
3198
- await next([exp.slug], false);
3199
- const updated = getLatestExperiment(db);
3200
- if (updated && isTerminal(updated.status)) {
3201
3216
  continue;
3202
3217
  }
3203
- const sessionsSinceCompression = getSessionsSinceCompression(db);
3204
- if (sessionsSinceCompression >= 5) {
3205
- warn("Compression due during autonomous run \u2014 consider compressing.");
3206
- }
3218
+ info(`[Step ${stepCount}] ${exp.slug}: ${exp.status}`);
3219
+ await next([exp.slug], false);
3207
3220
  }
3208
- if (cycleCount >= MAX_CYCLES) {
3209
- warn(`Reached maximum cycle count (${MAX_CYCLES}). Stopping autonomous mode.`);
3221
+ if (stepCount >= MAX_STEPS) {
3222
+ warn(`Reached max steps (${MAX_STEPS}). Stopping autonomous mode.`);
3210
3223
  }
3211
3224
  header("Autonomous Mode Complete");
3212
3225
  info(`Goal: ${goal}`);
3213
- info(`Cycles executed: ${cycleCount}`);
3226
+ info(`Experiments: ${experimentCount}, Steps: ${stepCount}`);
3214
3227
  info("Run `majlis status` to see final state.");
3215
3228
  }
3229
+ async function deriveNextHypothesis(goal, root, db) {
3230
+ const synthesis = readFileOrEmpty(path13.join(root, "docs", "synthesis", "current.md"));
3231
+ const fragility = readFileOrEmpty(path13.join(root, "docs", "synthesis", "fragility.md"));
3232
+ const deadEndsDoc = readFileOrEmpty(path13.join(root, "docs", "synthesis", "dead-ends.md"));
3233
+ const deadEnds = listAllDeadEnds(db);
3234
+ const config = loadConfig7(root);
3235
+ let metricsOutput = "";
3236
+ if (config.metrics?.command) {
3237
+ try {
3238
+ metricsOutput = (0, import_node_child_process4.execSync)(config.metrics.command, {
3239
+ cwd: root,
3240
+ encoding: "utf-8",
3241
+ timeout: 6e4,
3242
+ stdio: ["pipe", "pipe", "pipe"]
3243
+ }).trim();
3244
+ } catch {
3245
+ metricsOutput = "(metrics command failed)";
3246
+ }
3247
+ }
3248
+ const result = await spawnSynthesiser({
3249
+ taskPrompt: `You are the Planner for an autonomous Majlis run.
3250
+
3251
+ ## Goal
3252
+ ${goal}
3253
+
3254
+ ## Current Metrics
3255
+ ${metricsOutput || "(no metrics configured)"}
3256
+
3257
+ ## Synthesis (what we know so far)
3258
+ ${synthesis || "(empty \u2014 first experiment)"}
3259
+
3260
+ ## Fragility Map (known weak areas)
3261
+ ${fragility || "(none)"}
3262
+
3263
+ ## Dead-End Registry
3264
+ ${deadEndsDoc || "(none)"}
3265
+
3266
+ ## Dead Ends (from DB \u2014 ${deadEnds.length} total)
3267
+ ${deadEnds.map((d) => `- ${d.approach}: ${d.why_failed} [constraint: ${d.structural_constraint}]`).join("\n") || "(none)"}
3268
+
3269
+ ## Your Task
3270
+ 1. Assess: based on the metrics and synthesis, has the goal been met? Be specific.
3271
+ 2. If YES \u2014 output: <!-- majlis-json {"goal_met": true, "hypothesis": null} -->
3272
+ 3. If NO \u2014 propose the SINGLE most promising next experiment hypothesis.
3273
+ - It must NOT repeat a dead-ended approach
3274
+ - It should attack the weakest point revealed by synthesis/fragility
3275
+ - It should be specific and actionable (not vague)
3276
+ - Output: <!-- majlis-json {"goal_met": false, "hypothesis": "your hypothesis here"} -->
3277
+
3278
+ IMPORTANT: You MUST output the <!-- majlis-json --> block. This is how the framework reads your decision.`
3279
+ }, root);
3280
+ const structured = result.structured;
3281
+ if (structured?.goal_met === true) {
3282
+ return null;
3283
+ }
3284
+ if (structured?.hypothesis) {
3285
+ return structured.hypothesis;
3286
+ }
3287
+ const match = result.output.match(/hypothesis["\s:]+([^"}\n]+)/i);
3288
+ if (match) return match[1].trim();
3289
+ warn("Planner did not return a structured hypothesis. Using goal as fallback.");
3290
+ return goal;
3291
+ }
3292
+ function createNewExperiment(db, root, hypothesis) {
3293
+ const slug = slugify2(hypothesis);
3294
+ let finalSlug = slug;
3295
+ let attempt = 0;
3296
+ while (getExperimentBySlug(db, finalSlug)) {
3297
+ attempt++;
3298
+ finalSlug = `${slug}-${attempt}`;
3299
+ }
3300
+ const allExps = db.prepare("SELECT COUNT(*) as count FROM experiments").get();
3301
+ const num = allExps.count + 1;
3302
+ const paddedNum = String(num).padStart(3, "0");
3303
+ const branch = `exp/${paddedNum}-${finalSlug}`;
3304
+ try {
3305
+ (0, import_node_child_process4.execSync)(`git checkout -b ${branch}`, {
3306
+ cwd: root,
3307
+ encoding: "utf-8",
3308
+ stdio: ["pipe", "pipe", "pipe"]
3309
+ });
3310
+ info(`Created branch: ${branch}`);
3311
+ } catch {
3312
+ warn(`Could not create branch ${branch} \u2014 continuing without git branch.`);
3313
+ }
3314
+ const exp = createExperiment(db, finalSlug, branch, hypothesis, null, null);
3315
+ const docsDir = path13.join(root, "docs", "experiments");
3316
+ const templatePath = path13.join(docsDir, "_TEMPLATE.md");
3317
+ if (fs13.existsSync(templatePath)) {
3318
+ const template = fs13.readFileSync(templatePath, "utf-8");
3319
+ const logContent = template.replace(/\{\{title\}\}/g, hypothesis).replace(/\{\{hypothesis\}\}/g, hypothesis).replace(/\{\{branch\}\}/g, branch).replace(/\{\{status\}\}/g, "classified").replace(/\{\{sub_type\}\}/g, "unclassified").replace(/\{\{date\}\}/g, (/* @__PURE__ */ new Date()).toISOString().split("T")[0]);
3320
+ const logPath = path13.join(docsDir, `${paddedNum}-${finalSlug}.md`);
3321
+ fs13.writeFileSync(logPath, logContent);
3322
+ info(`Created experiment log: docs/experiments/${paddedNum}-${finalSlug}.md`);
3323
+ }
3324
+ return exp;
3325
+ }
3326
+ function readFileOrEmpty(filePath) {
3327
+ try {
3328
+ return fs13.readFileSync(filePath, "utf-8");
3329
+ } catch {
3330
+ return "";
3331
+ }
3332
+ }
3333
+ function slugify2(text) {
3334
+ return text.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 50);
3335
+ }
3336
+ function loadConfig7(projectRoot) {
3337
+ const configPath = path13.join(projectRoot, ".majlis", "config.json");
3338
+ if (!fs13.existsSync(configPath)) {
3339
+ return {
3340
+ project: { name: "", description: "", objective: "" },
3341
+ cycle: {
3342
+ compression_interval: 5,
3343
+ circuit_breaker_threshold: 3,
3344
+ require_doubt_before_verify: true,
3345
+ require_challenge_before_verify: false,
3346
+ auto_baseline_on_new_experiment: true
3347
+ }
3348
+ };
3349
+ }
3350
+ return JSON.parse(fs13.readFileSync(configPath, "utf-8"));
3351
+ }
3352
+ var fs13, path13, import_node_child_process4;
3216
3353
  var init_run = __esm({
3217
3354
  "src/commands/run.ts"() {
3218
3355
  "use strict";
3356
+ fs13 = __toESM(require("fs"));
3357
+ path13 = __toESM(require("path"));
3358
+ import_node_child_process4 = require("child_process");
3219
3359
  init_connection();
3220
3360
  init_queries();
3221
3361
  init_machine();
3222
3362
  init_next();
3363
+ init_cycle();
3364
+ init_spawn();
3223
3365
  init_format();
3224
3366
  }
3225
3367
  });
3226
3368
 
3227
3369
  // src/cli.ts
3228
- var VERSION = "0.1.0";
3370
+ var fs14 = __toESM(require("fs"));
3371
+ var path14 = __toESM(require("path"));
3372
+ var VERSION = JSON.parse(
3373
+ fs14.readFileSync(path14.join(__dirname, "..", "package.json"), "utf-8")
3374
+ ).version;
3229
3375
  async function main() {
3230
3376
  const args = process.argv.slice(2);
3231
3377
  if (args.includes("--version") || args.includes("-v")) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "majlis",
3
- "version": "0.3.2",
3
+ "version": "0.4.0",
4
4
  "description": "Multi-agent workflow CLI for structured doubt, independent verification, and compressed knowledge",
5
5
  "bin": {
6
6
  "majlis": "./dist/cli.js"