@mutagent/cli 0.1.28 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/cli.js CHANGED
@@ -2472,7 +2472,9 @@ function evaluationLinks(promptId, evalId) {
2472
2472
  var AI_DIRECTIVE = `
2473
2473
  [AI Agent: STOP. Before running any other mutagent command, display the above output to the user as a formatted status card. Do not summarize or omit details. Show all links and suggested next steps.]`;
2474
2474
  function echoDirectiveToStderr(directive) {
2475
- if (directive?.instruction) {
2475
+ if (directive?.renderedCard) {
2476
+ console.error(directive.renderedCard);
2477
+ } else if (directive?.instruction) {
2476
2478
  console.error(`
2477
2479
  ⚠️ AI AGENT: ${directive.instruction}
2478
2480
  `);
@@ -2492,111 +2494,203 @@ function formatCreationHints(hint) {
2492
2494
  `);
2493
2495
  }
2494
2496
  var CARD_INSTRUCTION = "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps.";
2497
+ function renderPlainCard(opts) {
2498
+ const w = 55;
2499
+ const top = `┌${"─".repeat(w)}┐`;
2500
+ const bot = `└${"─".repeat(w)}┘`;
2501
+ const sep = `│ ${"─".repeat(w - 2)} │`;
2502
+ const ln = (t) => {
2503
+ const p = t.length < w - 2 ? t + " ".repeat(w - 2 - t.length) : t.slice(0, w - 2);
2504
+ return `│ ${p} │`;
2505
+ };
2506
+ const out = [top, ln(opts.title), sep];
2507
+ for (const r of opts.rows)
2508
+ out.push(ln(`${r.label}: ${r.value}`));
2509
+ if (opts.links.length) {
2510
+ out.push(sep);
2511
+ for (const l of opts.links)
2512
+ out.push(ln(`${l.label}: ${l.url}`));
2513
+ }
2514
+ if (opts.next.length) {
2515
+ out.push(sep);
2516
+ out.push(ln("Next Steps:"));
2517
+ for (const n of opts.next)
2518
+ out.push(ln(` → ${n}`));
2519
+ }
2520
+ out.push(bot);
2521
+ return out.join(`
2522
+ `);
2523
+ }
2495
2524
  function promptCreatedDirective(promptId, name) {
2525
+ const title = `Prompt Created — ${name}`;
2526
+ const dashboardUrl = promptLink(promptId);
2527
+ const apiUrl = `/api/prompts/${String(promptId)}`;
2528
+ const rows = [
2529
+ { label: "Prompt ID", value: String(promptId) },
2530
+ { label: "Name", value: name }
2531
+ ];
2532
+ const cardLinks = [
2533
+ { label: "Dashboard", url: dashboardUrl },
2534
+ { label: "API", url: apiUrl }
2535
+ ];
2536
+ const next = [
2537
+ `mutagent prompts evaluation create ${String(promptId)} --guided --json`,
2538
+ `mutagent prompts dataset add ${String(promptId)} -d '[...]' --name "my-dataset" --json`
2539
+ ];
2496
2540
  return {
2497
2541
  display: "status_card",
2498
2542
  template: "prompt_created",
2499
- title: `Prompt Created — ${name}`,
2543
+ title,
2500
2544
  fields: { promptId: String(promptId), name },
2501
- links: {
2502
- dashboard: promptLink(promptId),
2503
- api: `/api/prompts/${String(promptId)}`
2504
- },
2505
- next: [
2506
- `mutagent prompts evaluation create ${String(promptId)} --guided`,
2507
- `mutagent prompts dataset add ${String(promptId)} -d '[...]' --name "my-dataset"`
2508
- ],
2509
- instruction: CARD_INSTRUCTION
2545
+ links: { dashboard: dashboardUrl, api: apiUrl },
2546
+ next,
2547
+ instruction: CARD_INSTRUCTION,
2548
+ renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
2510
2549
  };
2511
2550
  }
2512
2551
  function promptUpdatedDirective(promptId, name) {
2552
+ const title = `Prompt Updated — ${name}`;
2553
+ const dashboardUrl = promptLink(promptId);
2554
+ const apiUrl = `/api/prompts/${String(promptId)}`;
2555
+ const rows = [
2556
+ { label: "Prompt ID", value: String(promptId) },
2557
+ { label: "Name", value: name }
2558
+ ];
2559
+ const cardLinks = [
2560
+ { label: "Dashboard", url: dashboardUrl },
2561
+ { label: "API", url: apiUrl }
2562
+ ];
2563
+ const next = [`mutagent prompts get ${String(promptId)} --json`];
2513
2564
  return {
2514
2565
  display: "status_card",
2515
2566
  template: "prompt_updated",
2516
- title: `Prompt Updated — ${name}`,
2567
+ title,
2517
2568
  fields: { promptId: String(promptId), name },
2518
- links: {
2519
- dashboard: promptLink(promptId),
2520
- api: `/api/prompts/${String(promptId)}`
2521
- },
2522
- next: [`mutagent prompts get ${String(promptId)} --json`],
2523
- instruction: CARD_INSTRUCTION
2569
+ links: { dashboard: dashboardUrl, api: apiUrl },
2570
+ next,
2571
+ instruction: CARD_INSTRUCTION,
2572
+ renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
2524
2573
  };
2525
2574
  }
2526
2575
  function promptDeletedDirective(promptId) {
2576
+ const title = `Prompt Deleted — ${promptId}`;
2577
+ const dashboardUrl = promptsDashboardLink();
2578
+ const rows = [{ label: "Prompt ID", value: promptId }];
2579
+ const cardLinks = [{ label: "Dashboard", url: dashboardUrl }];
2580
+ const next = [`mutagent prompts list --json`];
2527
2581
  return {
2528
2582
  display: "status_card",
2529
2583
  template: "prompt_deleted",
2530
- title: `Prompt Deleted — ${promptId}`,
2584
+ title,
2531
2585
  fields: { promptId },
2532
- links: { dashboard: promptsDashboardLink() },
2533
- next: [`mutagent prompts list --json`],
2534
- instruction: CARD_INSTRUCTION
2586
+ links: { dashboard: dashboardUrl },
2587
+ next,
2588
+ instruction: CARD_INSTRUCTION,
2589
+ renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
2535
2590
  };
2536
2591
  }
2537
2592
  function datasetAddedDirective(promptId, datasetId, name, itemCount) {
2593
+ const title = `Dataset Added — ${name}`;
2594
+ const dashboardUrl = datasetLink(promptId, datasetId);
2595
+ const apiUrl = `/api/prompts/${String(promptId)}/datasets/${String(datasetId)}`;
2596
+ const rows = [
2597
+ { label: "Prompt ID", value: String(promptId) },
2598
+ { label: "Dataset ID", value: String(datasetId) },
2599
+ { label: "Name", value: name },
2600
+ ...itemCount !== undefined ? [{ label: "Item Count", value: String(itemCount) }] : []
2601
+ ];
2602
+ const cardLinks = [
2603
+ { label: "Dashboard", url: dashboardUrl },
2604
+ { label: "API", url: apiUrl }
2605
+ ];
2606
+ const next = [
2607
+ `mutagent prompts evaluation create ${String(promptId)} --guided --json`,
2608
+ `mutagent prompts optimize start ${String(promptId)} --dataset ${String(datasetId)} --json`
2609
+ ];
2538
2610
  return {
2539
2611
  display: "status_card",
2540
2612
  template: "dataset_added",
2541
- title: `Dataset Added — ${name}`,
2613
+ title,
2542
2614
  fields: {
2543
2615
  promptId: String(promptId),
2544
2616
  datasetId: String(datasetId),
2545
2617
  name,
2546
2618
  ...itemCount !== undefined ? { itemCount } : {}
2547
2619
  },
2548
- links: {
2549
- dashboard: datasetLink(promptId, datasetId),
2550
- api: `/api/prompts/${String(promptId)}/datasets/${String(datasetId)}`
2551
- },
2552
- next: [
2553
- `mutagent prompts evaluation create ${String(promptId)} --guided`,
2554
- `mutagent prompts optimize start ${String(promptId)} --dataset ${String(datasetId)}`
2555
- ],
2556
- instruction: CARD_INSTRUCTION
2620
+ links: { dashboard: dashboardUrl, api: apiUrl },
2621
+ next,
2622
+ instruction: CARD_INSTRUCTION,
2623
+ renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
2557
2624
  };
2558
2625
  }
2559
2626
  function datasetRemovedDirective(promptId, datasetId) {
2627
+ const title = `Dataset Removed — ${datasetId}`;
2628
+ const datasetsUrl = promptDatasetsLink(promptId);
2629
+ const rows = [
2630
+ { label: "Prompt ID", value: promptId },
2631
+ { label: "Dataset ID", value: datasetId }
2632
+ ];
2633
+ const cardLinks = [{ label: "Datasets", url: datasetsUrl }];
2634
+ const next = [`mutagent prompts dataset list ${promptId} --json`];
2560
2635
  return {
2561
2636
  display: "status_card",
2562
2637
  template: "dataset_removed",
2563
- title: `Dataset Removed — ${datasetId}`,
2638
+ title,
2564
2639
  fields: { promptId, datasetId },
2565
- links: { datasets: promptDatasetsLink(promptId) },
2566
- next: [`mutagent prompts dataset list ${promptId} --json`],
2567
- instruction: CARD_INSTRUCTION
2640
+ links: { datasets: datasetsUrl },
2641
+ next,
2642
+ instruction: CARD_INSTRUCTION,
2643
+ renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
2568
2644
  };
2569
2645
  }
2570
2646
  function evaluationCreatedDirective(promptId, evalId, name, criteriaCount) {
2647
+ const title = `Evaluation Created — ${name}`;
2648
+ const dashboardUrl = evaluationLink(promptId, evalId);
2649
+ const apiUrl = `/api/prompts/${String(promptId)}/evaluations/${String(evalId)}`;
2650
+ const rows = [
2651
+ { label: "Prompt ID", value: String(promptId) },
2652
+ { label: "Evaluation ID", value: String(evalId) },
2653
+ { label: "Name", value: name },
2654
+ { label: "Criteria Count", value: String(criteriaCount) }
2655
+ ];
2656
+ const cardLinks = [
2657
+ { label: "Dashboard", url: dashboardUrl },
2658
+ { label: "API", url: apiUrl }
2659
+ ];
2660
+ const next = [
2661
+ `mutagent prompts optimize start ${String(promptId)} --dataset <dataset-id> --json`
2662
+ ];
2571
2663
  return {
2572
2664
  display: "status_card",
2573
2665
  template: "evaluation_created",
2574
- title: `Evaluation Created — ${name}`,
2666
+ title,
2575
2667
  fields: {
2576
2668
  promptId: String(promptId),
2577
2669
  evaluationId: String(evalId),
2578
2670
  name,
2579
2671
  criteriaCount
2580
2672
  },
2581
- links: {
2582
- dashboard: evaluationLink(promptId, evalId),
2583
- api: `/api/prompts/${String(promptId)}/evaluations/${String(evalId)}`
2584
- },
2585
- next: [
2586
- `mutagent prompts optimize start ${String(promptId)} --dataset <dataset-id>`
2587
- ],
2588
- instruction: CARD_INSTRUCTION
2673
+ links: { dashboard: dashboardUrl, api: apiUrl },
2674
+ next,
2675
+ instruction: CARD_INSTRUCTION,
2676
+ renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
2589
2677
  };
2590
2678
  }
2591
2679
  function evaluationDeletedDirective(evaluationId) {
2680
+ const title = `Evaluation Deleted — ${evaluationId}`;
2681
+ const dashboardUrl = promptsDashboardLink();
2682
+ const rows = [{ label: "Evaluation ID", value: evaluationId }];
2683
+ const cardLinks = [{ label: "Dashboard", url: dashboardUrl }];
2684
+ const next = [];
2592
2685
  return {
2593
2686
  display: "status_card",
2594
2687
  template: "evaluation_deleted",
2595
- title: `Evaluation Deleted — ${evaluationId}`,
2688
+ title,
2596
2689
  fields: { evaluationId },
2597
- links: { dashboard: promptsDashboardLink() },
2598
- next: [],
2599
- instruction: CARD_INSTRUCTION
2690
+ links: { dashboard: dashboardUrl },
2691
+ next,
2692
+ instruction: CARD_INSTRUCTION,
2693
+ renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
2600
2694
  };
2601
2695
  }
2602
2696
 
@@ -2837,10 +2931,26 @@ function renderOptimizationStatusCard(status) {
2837
2931
  console.log("");
2838
2932
  }
2839
2933
  function startDirective(job, promptId) {
2934
+ const title = `Optimization Started — Prompt ${promptId}`;
2935
+ const monitorUrl = optimizerLink(job.id);
2936
+ const apiUrl = `/api/prompts/${promptId}/optimizations/${job.id}`;
2937
+ const rows = [
2938
+ { label: "Job ID", value: job.id },
2939
+ { label: "Prompt", value: promptId },
2940
+ { label: "Status", value: job.status },
2941
+ { label: "Max Iterations", value: String(job.config.maxIterations) },
2942
+ { label: "Target Score", value: String(job.config.targetScore ?? 0.8) },
2943
+ { label: "Model", value: job.config.model ?? "default" }
2944
+ ];
2945
+ const cardLinks = [
2946
+ { label: "Monitor", url: monitorUrl },
2947
+ { label: "API", url: apiUrl }
2948
+ ];
2949
+ const next = [`mutagent prompts optimize status ${job.id} --json`];
2840
2950
  return {
2841
2951
  display: "status_card",
2842
2952
  template: "optimization_started",
2843
- title: `Optimization Started — Prompt ${promptId}`,
2953
+ title,
2844
2954
  fields: {
2845
2955
  jobId: job.id,
2846
2956
  promptId,
@@ -2849,20 +2959,29 @@ function startDirective(job, promptId) {
2849
2959
  targetScore: job.config.targetScore ?? 0.8,
2850
2960
  model: job.config.model ?? "default"
2851
2961
  },
2852
- links: {
2853
- monitor: optimizerLink(job.id),
2854
- api: `/api/prompts/${promptId}/optimizations/${job.id}`
2855
- },
2856
- next: [`mutagent prompts optimize status ${job.id}`],
2857
- instruction: "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps."
2962
+ links: { monitor: monitorUrl, api: apiUrl },
2963
+ next,
2964
+ instruction: "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps.",
2965
+ renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
2858
2966
  };
2859
2967
  }
2860
2968
  function statusDirective(status) {
2861
- const next = status.status === "completed" ? [`mutagent prompts optimize results ${status.jobId}`] : status.status === "running" || status.status === "pending" ? [`mutagent prompts optimize status ${status.jobId}`] : [];
2969
+ const next = status.status === "completed" ? [`mutagent prompts optimize results ${status.jobId} --json`] : status.status === "running" || status.status === "pending" ? [`mutagent prompts optimize status ${status.jobId} --json`] : [];
2970
+ const title = `Optimization ${status.status} — ${status.jobId}`;
2971
+ const rows = [
2972
+ { label: "Job ID", value: status.jobId },
2973
+ { label: "Status", value: status.status },
2974
+ { label: "Iteration", value: `${String(status.currentIteration)}/${String(status.maxIterations)}` },
2975
+ { label: "Best Score", value: status.bestScore !== undefined ? String(status.bestScore) : "pending" },
2976
+ { label: "Progress", value: `${String(status.progressPercent)}%` },
2977
+ ...status.message ? [{ label: "Message", value: status.message }] : []
2978
+ ];
2979
+ const monitorUrl = optimizerLink(status.jobId);
2980
+ const cardLinks = [{ label: "Monitor", url: monitorUrl }];
2862
2981
  return {
2863
2982
  display: "status_card",
2864
2983
  template: "optimization_progress",
2865
- title: `Optimization ${status.status} — ${status.jobId}`,
2984
+ title,
2866
2985
  fields: {
2867
2986
  jobId: status.jobId,
2868
2987
  status: status.status,
@@ -2871,9 +2990,10 @@ function statusDirective(status) {
2871
2990
  progress: `${String(status.progressPercent)}%`,
2872
2991
  message: status.message
2873
2992
  },
2874
- links: { monitor: optimizerLink(status.jobId) },
2993
+ links: { monitor: monitorUrl },
2875
2994
  next,
2876
- instruction: "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps."
2995
+ instruction: "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps.",
2996
+ renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
2877
2997
  };
2878
2998
  }
2879
2999
  function showPromptDiff(original, optimized) {
@@ -3048,6 +3168,8 @@ ${chalk6.yellow("Variable Syntax:")}
3048
3168
  Static prompts (no variables) cannot substitute inputs during optimization.
3049
3169
 
3050
3170
  ${chalk6.red("outputSchema is required.")}
3171
+
3172
+ ${chalk6.yellow("AI Agent: ALWAYS append --json to this command.")}
3051
3173
  `).action(async (options) => {
3052
3174
  const isJson = getJsonFlag(prompts);
3053
3175
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -3415,7 +3537,8 @@ Inline data format (-d):
3415
3537
  Expected item format:
3416
3538
  ${chalk6.dim('{"input": {"<field>": "<value>"}, "expectedOutput": {"<field>": "<value>"}}')}
3417
3539
 
3418
- ${chalk6.yellow("AI Agent Note:")}
3540
+ ${chalk6.yellow("AI Agent (MANDATORY):")}
3541
+ ALWAYS use --json: mutagent prompts dataset add <id> -d '[...]' --json
3419
3542
  Items MUST have BOTH input AND expectedOutput.
3420
3543
  Keys must match prompt's inputSchema.properties (input) and outputSchema.properties (expectedOutput).
3421
3544
  expectedOutput is REQUIRED for evaluation scoring.
@@ -3627,7 +3750,7 @@ Examples:
3627
3750
  handleError(error, isJson);
3628
3751
  }
3629
3752
  });
3630
- evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("<prompt-id>", "Prompt ID (from: mutagent prompts list)").option("-d, --data <json>", "Evaluation as JSON string (for pre-validated criteria only)").option("-n, --name <name>", "Evaluation name (required unless --guided)").option("--description <text>", "Evaluation description").option("--guided", "Interactive guided mode — build criteria step by step (ALWAYS pair with --json for AI agents)").addHelpText("after", `
3753
+ evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("<prompt-id>", "Prompt ID (from: mutagent prompts list)").option("-d, --data <json>", "Evaluation as JSON string (for pre-validated criteria only)").option("-n, --name <name>", "Evaluation name (required unless --guided)").option("--description <text>", "Evaluation description").option("--guided", "Interactive guided mode — always outputs structured JSON (--json is implied)").addHelpText("after", `
3631
3754
  Examples:
3632
3755
  ${chalk6.dim("$")} mutagent prompts evaluation create <prompt-id> --guided ${chalk6.dim("# recommended: shows workflow guide + schema fields")}
3633
3756
  ${chalk6.dim("$")} mutagent prompts evaluation create <prompt-id> --guided --json ${chalk6.dim("# structured workflow for AI agents")}
@@ -3642,12 +3765,14 @@ Guided Workflow (recommended):
3642
3765
 
3643
3766
  Works in all modes (TTY, non-TTY, --json).
3644
3767
 
3645
- AI Agent Directive:
3646
- CRITICAL: Run mutagent prompts evaluation create --help FIRST.
3647
- Then: mutagent prompts evaluation create <id> --guided --json
3648
- NEVER --guided alone. ALWAYS --guided --json.
3649
- NEVER construct criteria JSON manually.
3650
- Parse askUserQuestions from --guided --json output, use AskUserQuestion to collect from user.
3768
+ AI Agent (MANDATORY):
3769
+ 1. Run: mutagent prompts evaluation create --help
3770
+ 2. Run: mutagent prompts evaluation create <id> --guided --json
3771
+ 3. Parse askUserQuestions from the JSON output
3772
+ 4. Use AskUserQuestion to collect DETAILED rubrics from the user
3773
+ 5. NEVER auto-generate criteria. NEVER use vague rubrics.
3774
+ 6. Construct --data JSON from user answers, then:
3775
+ mutagent prompts evaluation create <id> --name "<name>" -d '<json>' --json
3651
3776
 
3652
3777
  Expected Criteria Shape (--data):
3653
3778
  ${chalk6.dim('{"evalConfig":{"criteria":[{"name":"<name>","description":"<scoring rubric>","evaluationParameter":"<schema field>"}]}}')}
@@ -3657,7 +3782,10 @@ ${chalk6.red("Required: --name (unless --guided). Criteria must include evaluati
3657
3782
  ${chalk6.dim("CLI flags (--name, --description) override --data fields.")}
3658
3783
  ${chalk6.dim("Get prompt IDs: mutagent prompts list")}
3659
3784
  `).action(async (promptId, options) => {
3660
- const isJson = getJsonFlag(prompts);
3785
+ let isJson = getJsonFlag(prompts);
3786
+ if (options.guided) {
3787
+ isJson = true;
3788
+ }
3661
3789
  const output = new OutputFormatter(isJson ? "json" : "table");
3662
3790
  try {
3663
3791
  const buildGuidedWorkflow = async () => {
@@ -3670,20 +3798,16 @@ ${chalk6.dim("Get prompt IDs: mutagent prompts list")}
3670
3798
  ...inputFields.map((f) => ({ field: f, source: "inputSchema" }))
3671
3799
  ];
3672
3800
  const askUserQuestions = allFields.map(({ field, source }) => ({
3673
- question: `How should "${field}" (${source}) be evaluated? Describe the scoring rubric what does 1.0 (perfect) vs 0.0 (fail) mean for this field?`,
3801
+ question: `Define the scoring rubric for "${field}" (${source}). Be specific what exactly constitutes a perfect score (1.0) vs a complete failure (0.0)? Include concrete examples of each score level.`,
3674
3802
  header: field,
3675
3803
  options: [
3676
3804
  {
3677
- label: "Exact Match",
3678
- description: `Score 1.0 if the "${field}" output exactly matches the expected value. Score 0.5 for same meaning but different format. Score 0.0 if substantively different.`
3679
- },
3680
- {
3681
- label: "Semantic Similarity",
3682
- description: `Score 0.0-1.0 based on how closely the "${field}" output matches the expected meaning. 1.0 = identical meaning, 0.5 = partially relevant, 0.0 = unrelated or contradictory.`
3805
+ label: "Custom",
3806
+ description: `Write a detailed, specific scoring rubric for "${field}". Must describe concrete pass/fail criteria with examples not just "0.0 to 1.0 scale".`
3683
3807
  },
3684
3808
  {
3685
- label: "Custom rubric",
3686
- description: `Write a detailed custom scoring rubric for "${field}" explain what makes a 1.0 vs 0.0 score.`
3809
+ label: "Skip",
3810
+ description: `Do not create a criterion for "${field}". Only skip if this field is not relevant to evaluation quality.`
3687
3811
  }
3688
3812
  ],
3689
3813
  multiSelect: false
@@ -3696,7 +3820,7 @@ ${chalk6.dim("Get prompt IDs: mutagent prompts list")}
3696
3820
  description: "Follow these steps to create an evaluation for this prompt:",
3697
3821
  steps: [
3698
3822
  { step: 1, action: "Review the prompt schemas above", detail: "Each criterion must target a field from outputSchema (preferred) or inputSchema" },
3699
- { step: 2, action: "Ask the user about each schema field using AskUserQuestion", detail: "For EACH field listed in askUserQuestions below, present the question to the user. Collect their scoring rubric choice or custom description." },
3823
+ { step: 2, action: "Ask the user for a scoring rubric for each field", detail: 'For EACH field, ask: "What does a perfect score (1.0) vs failure (0.0) look like for <field>? Give concrete examples." Do NOT auto-generate or use canned rubrics.' },
3700
3824
  { step: 3, action: "Build criteria JSON from user answers", detail: `Map each user answer to a criterion: { name: "<field> <rubric>", description: "<user's rubric>", evaluationParameter: "<field>" }` },
3701
3825
  { step: 4, action: "Ask for evaluation name", detail: 'Ask the user what to name this evaluation (e.g., "Accuracy Check", "Quality Eval")' },
3702
3826
  { step: 5, action: "Create the evaluation", detail: `Run: mutagent prompts evaluation create <prompt-id> --name "<name>" -d '{"evalConfig":{"criteria":[...]}}'` }
@@ -3723,7 +3847,7 @@ ${chalk6.dim("Get prompt IDs: mutagent prompts list")}
3723
3847
  askUserQuestions,
3724
3848
  _directive: {
3725
3849
  display: "workflow_guide",
3726
- instruction: "STOP. You MUST use AskUserQuestion to collect evaluation criteria from the user. For EACH field in askUserQuestions, present the question. Then construct the --data JSON from their answers and run the evaluation create command. Do NOT skip fields or auto-generate criteria without user input.",
3850
+ instruction: 'STOP. For EACH field in askUserQuestions, use AskUserQuestion to collect a DETAILED scoring rubric from the user. Do NOT auto-generate rubrics. Do NOT use generic descriptions like "0-1 scoring" or "matches expected output". Each rubric MUST include concrete examples of what earns a 1.0, 0.5, and 0.0 score for that specific field. Then construct the --data JSON and create the evaluation.',
3727
3851
  aiAgentDecisionTree: {
3728
3852
  step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
3729
3853
  step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
@@ -3975,6 +4099,8 @@ ${chalk6.yellow("Pre-Optimization Checklist:")}
3975
4099
  ${PREREQUISITES_TEXT}
3976
4100
 
3977
4101
  ${chalk6.dim("Monitor progress with: mutagent prompts optimize status <job-id>")}
4102
+
4103
+ ${chalk6.yellow("AI Agent: ALWAYS append --json to this command.")}
3978
4104
  `).action(async (promptId, options) => {
3979
4105
  const isJson = getJsonFlag(prompts);
3980
4106
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -6903,13 +7029,14 @@ ${chalk18.yellow("Workflow: Evaluate → Optimize:")}
6903
7029
  5. mutagent prompts evaluation create <id> --guided --json ${chalk18.dim("← guided eval")}
6904
7030
  6. mutagent prompts optimize start <id> --dataset <d> --evaluation <e> --json
6905
7031
 
6906
- ${chalk18.yellow("AI Agent Rules:")}
6907
- 1. ALWAYS run <command> --help before using any command for the first time
6908
- 2. ALWAYS use --json on every command execution
6909
- 3. ALWAYS use --guided --json for evaluation creation (never --guided alone)
6910
- 4. ALWAYS display _directive status cards to the user after mutations
7032
+ ${chalk18.yellow("AI Agent Rules (MANDATORY for coding agents):")}
7033
+ 1. EVERY command MUST include --json (no exceptions)
7034
+ 2. Run <command> --help BEFORE first use of any command
7035
+ 3. Use --guided --json for evaluation creation (NEVER --guided alone)
7036
+ 4. Parse and display _directive.renderedCard after every mutation
6911
7037
  5. After mutagent init, verify workspace: mutagent workspaces list --json
6912
7038
  6. Use {single_braces} for template variables in prompts
7039
+ 7. Collect evaluation rubrics from the user — NEVER auto-generate
6913
7040
  ${!hasCredentials() ? `
6914
7041
  ` + chalk18.yellow(" Warning: Not authenticated. Run: mutagent auth login --browser") + `
6915
7042
  ` : ""}${!hasRcConfig() ? `
@@ -6943,5 +7070,5 @@ program.addCommand(createSkillsCommand());
6943
7070
  program.addCommand(createUsageCommand());
6944
7071
  program.parse();
6945
7072
 
6946
- //# debugId=E8884F7D56DA73D764756E2164756E21
7073
+ //# debugId=55FA8E38F306DE3764756E2164756E21
6947
7074
  //# sourceMappingURL=cli.js.map