@mutagent/cli 0.1.28 → 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cli.js +213 -86
- package/dist/bin/cli.js.map +6 -6
- package/package.json +2 -3
package/dist/bin/cli.js
CHANGED
|
@@ -2472,7 +2472,9 @@ function evaluationLinks(promptId, evalId) {
|
|
|
2472
2472
|
var AI_DIRECTIVE = `
|
|
2473
2473
|
[AI Agent: STOP. Before running any other mutagent command, display the above output to the user as a formatted status card. Do not summarize or omit details. Show all links and suggested next steps.]`;
|
|
2474
2474
|
function echoDirectiveToStderr(directive) {
|
|
2475
|
-
if (directive?.
|
|
2475
|
+
if (directive?.renderedCard) {
|
|
2476
|
+
console.error(directive.renderedCard);
|
|
2477
|
+
} else if (directive?.instruction) {
|
|
2476
2478
|
console.error(`
|
|
2477
2479
|
⚠️ AI AGENT: ${directive.instruction}
|
|
2478
2480
|
`);
|
|
@@ -2492,111 +2494,203 @@ function formatCreationHints(hint) {
|
|
|
2492
2494
|
`);
|
|
2493
2495
|
}
|
|
2494
2496
|
var CARD_INSTRUCTION = "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps.";
|
|
2497
|
+
function renderPlainCard(opts) {
|
|
2498
|
+
const w = 55;
|
|
2499
|
+
const top = `┌${"─".repeat(w)}┐`;
|
|
2500
|
+
const bot = `└${"─".repeat(w)}┘`;
|
|
2501
|
+
const sep = `│ ${"─".repeat(w - 2)} │`;
|
|
2502
|
+
const ln = (t) => {
|
|
2503
|
+
const p = t.length < w - 2 ? t + " ".repeat(w - 2 - t.length) : t.slice(0, w - 2);
|
|
2504
|
+
return `│ ${p} │`;
|
|
2505
|
+
};
|
|
2506
|
+
const out = [top, ln(opts.title), sep];
|
|
2507
|
+
for (const r of opts.rows)
|
|
2508
|
+
out.push(ln(`${r.label}: ${r.value}`));
|
|
2509
|
+
if (opts.links.length) {
|
|
2510
|
+
out.push(sep);
|
|
2511
|
+
for (const l of opts.links)
|
|
2512
|
+
out.push(ln(`${l.label}: ${l.url}`));
|
|
2513
|
+
}
|
|
2514
|
+
if (opts.next.length) {
|
|
2515
|
+
out.push(sep);
|
|
2516
|
+
out.push(ln("Next Steps:"));
|
|
2517
|
+
for (const n of opts.next)
|
|
2518
|
+
out.push(ln(` → ${n}`));
|
|
2519
|
+
}
|
|
2520
|
+
out.push(bot);
|
|
2521
|
+
return out.join(`
|
|
2522
|
+
`);
|
|
2523
|
+
}
|
|
2495
2524
|
function promptCreatedDirective(promptId, name) {
|
|
2525
|
+
const title = `Prompt Created — ${name}`;
|
|
2526
|
+
const dashboardUrl = promptLink(promptId);
|
|
2527
|
+
const apiUrl = `/api/prompts/${String(promptId)}`;
|
|
2528
|
+
const rows = [
|
|
2529
|
+
{ label: "Prompt ID", value: String(promptId) },
|
|
2530
|
+
{ label: "Name", value: name }
|
|
2531
|
+
];
|
|
2532
|
+
const cardLinks = [
|
|
2533
|
+
{ label: "Dashboard", url: dashboardUrl },
|
|
2534
|
+
{ label: "API", url: apiUrl }
|
|
2535
|
+
];
|
|
2536
|
+
const next = [
|
|
2537
|
+
`mutagent prompts evaluation create ${String(promptId)} --guided --json`,
|
|
2538
|
+
`mutagent prompts dataset add ${String(promptId)} -d '[...]' --name "my-dataset" --json`
|
|
2539
|
+
];
|
|
2496
2540
|
return {
|
|
2497
2541
|
display: "status_card",
|
|
2498
2542
|
template: "prompt_created",
|
|
2499
|
-
title
|
|
2543
|
+
title,
|
|
2500
2544
|
fields: { promptId: String(promptId), name },
|
|
2501
|
-
links: {
|
|
2502
|
-
|
|
2503
|
-
|
|
2504
|
-
}
|
|
2505
|
-
next: [
|
|
2506
|
-
`mutagent prompts evaluation create ${String(promptId)} --guided`,
|
|
2507
|
-
`mutagent prompts dataset add ${String(promptId)} -d '[...]' --name "my-dataset"`
|
|
2508
|
-
],
|
|
2509
|
-
instruction: CARD_INSTRUCTION
|
|
2545
|
+
links: { dashboard: dashboardUrl, api: apiUrl },
|
|
2546
|
+
next,
|
|
2547
|
+
instruction: CARD_INSTRUCTION,
|
|
2548
|
+
renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
|
|
2510
2549
|
};
|
|
2511
2550
|
}
|
|
2512
2551
|
function promptUpdatedDirective(promptId, name) {
|
|
2552
|
+
const title = `Prompt Updated — ${name}`;
|
|
2553
|
+
const dashboardUrl = promptLink(promptId);
|
|
2554
|
+
const apiUrl = `/api/prompts/${String(promptId)}`;
|
|
2555
|
+
const rows = [
|
|
2556
|
+
{ label: "Prompt ID", value: String(promptId) },
|
|
2557
|
+
{ label: "Name", value: name }
|
|
2558
|
+
];
|
|
2559
|
+
const cardLinks = [
|
|
2560
|
+
{ label: "Dashboard", url: dashboardUrl },
|
|
2561
|
+
{ label: "API", url: apiUrl }
|
|
2562
|
+
];
|
|
2563
|
+
const next = [`mutagent prompts get ${String(promptId)} --json`];
|
|
2513
2564
|
return {
|
|
2514
2565
|
display: "status_card",
|
|
2515
2566
|
template: "prompt_updated",
|
|
2516
|
-
title
|
|
2567
|
+
title,
|
|
2517
2568
|
fields: { promptId: String(promptId), name },
|
|
2518
|
-
links: {
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
}
|
|
2522
|
-
next: [`mutagent prompts get ${String(promptId)} --json`],
|
|
2523
|
-
instruction: CARD_INSTRUCTION
|
|
2569
|
+
links: { dashboard: dashboardUrl, api: apiUrl },
|
|
2570
|
+
next,
|
|
2571
|
+
instruction: CARD_INSTRUCTION,
|
|
2572
|
+
renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
|
|
2524
2573
|
};
|
|
2525
2574
|
}
|
|
2526
2575
|
function promptDeletedDirective(promptId) {
|
|
2576
|
+
const title = `Prompt Deleted — ${promptId}`;
|
|
2577
|
+
const dashboardUrl = promptsDashboardLink();
|
|
2578
|
+
const rows = [{ label: "Prompt ID", value: promptId }];
|
|
2579
|
+
const cardLinks = [{ label: "Dashboard", url: dashboardUrl }];
|
|
2580
|
+
const next = [`mutagent prompts list --json`];
|
|
2527
2581
|
return {
|
|
2528
2582
|
display: "status_card",
|
|
2529
2583
|
template: "prompt_deleted",
|
|
2530
|
-
title
|
|
2584
|
+
title,
|
|
2531
2585
|
fields: { promptId },
|
|
2532
|
-
links: { dashboard:
|
|
2533
|
-
next
|
|
2534
|
-
instruction: CARD_INSTRUCTION
|
|
2586
|
+
links: { dashboard: dashboardUrl },
|
|
2587
|
+
next,
|
|
2588
|
+
instruction: CARD_INSTRUCTION,
|
|
2589
|
+
renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
|
|
2535
2590
|
};
|
|
2536
2591
|
}
|
|
2537
2592
|
function datasetAddedDirective(promptId, datasetId, name, itemCount) {
|
|
2593
|
+
const title = `Dataset Added — ${name}`;
|
|
2594
|
+
const dashboardUrl = datasetLink(promptId, datasetId);
|
|
2595
|
+
const apiUrl = `/api/prompts/${String(promptId)}/datasets/${String(datasetId)}`;
|
|
2596
|
+
const rows = [
|
|
2597
|
+
{ label: "Prompt ID", value: String(promptId) },
|
|
2598
|
+
{ label: "Dataset ID", value: String(datasetId) },
|
|
2599
|
+
{ label: "Name", value: name },
|
|
2600
|
+
...itemCount !== undefined ? [{ label: "Item Count", value: String(itemCount) }] : []
|
|
2601
|
+
];
|
|
2602
|
+
const cardLinks = [
|
|
2603
|
+
{ label: "Dashboard", url: dashboardUrl },
|
|
2604
|
+
{ label: "API", url: apiUrl }
|
|
2605
|
+
];
|
|
2606
|
+
const next = [
|
|
2607
|
+
`mutagent prompts evaluation create ${String(promptId)} --guided --json`,
|
|
2608
|
+
`mutagent prompts optimize start ${String(promptId)} --dataset ${String(datasetId)} --json`
|
|
2609
|
+
];
|
|
2538
2610
|
return {
|
|
2539
2611
|
display: "status_card",
|
|
2540
2612
|
template: "dataset_added",
|
|
2541
|
-
title
|
|
2613
|
+
title,
|
|
2542
2614
|
fields: {
|
|
2543
2615
|
promptId: String(promptId),
|
|
2544
2616
|
datasetId: String(datasetId),
|
|
2545
2617
|
name,
|
|
2546
2618
|
...itemCount !== undefined ? { itemCount } : {}
|
|
2547
2619
|
},
|
|
2548
|
-
links: {
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
}
|
|
2552
|
-
next: [
|
|
2553
|
-
`mutagent prompts evaluation create ${String(promptId)} --guided`,
|
|
2554
|
-
`mutagent prompts optimize start ${String(promptId)} --dataset ${String(datasetId)}`
|
|
2555
|
-
],
|
|
2556
|
-
instruction: CARD_INSTRUCTION
|
|
2620
|
+
links: { dashboard: dashboardUrl, api: apiUrl },
|
|
2621
|
+
next,
|
|
2622
|
+
instruction: CARD_INSTRUCTION,
|
|
2623
|
+
renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
|
|
2557
2624
|
};
|
|
2558
2625
|
}
|
|
2559
2626
|
function datasetRemovedDirective(promptId, datasetId) {
|
|
2627
|
+
const title = `Dataset Removed — ${datasetId}`;
|
|
2628
|
+
const datasetsUrl = promptDatasetsLink(promptId);
|
|
2629
|
+
const rows = [
|
|
2630
|
+
{ label: "Prompt ID", value: promptId },
|
|
2631
|
+
{ label: "Dataset ID", value: datasetId }
|
|
2632
|
+
];
|
|
2633
|
+
const cardLinks = [{ label: "Datasets", url: datasetsUrl }];
|
|
2634
|
+
const next = [`mutagent prompts dataset list ${promptId} --json`];
|
|
2560
2635
|
return {
|
|
2561
2636
|
display: "status_card",
|
|
2562
2637
|
template: "dataset_removed",
|
|
2563
|
-
title
|
|
2638
|
+
title,
|
|
2564
2639
|
fields: { promptId, datasetId },
|
|
2565
|
-
links: { datasets:
|
|
2566
|
-
next
|
|
2567
|
-
instruction: CARD_INSTRUCTION
|
|
2640
|
+
links: { datasets: datasetsUrl },
|
|
2641
|
+
next,
|
|
2642
|
+
instruction: CARD_INSTRUCTION,
|
|
2643
|
+
renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
|
|
2568
2644
|
};
|
|
2569
2645
|
}
|
|
2570
2646
|
function evaluationCreatedDirective(promptId, evalId, name, criteriaCount) {
|
|
2647
|
+
const title = `Evaluation Created — ${name}`;
|
|
2648
|
+
const dashboardUrl = evaluationLink(promptId, evalId);
|
|
2649
|
+
const apiUrl = `/api/prompts/${String(promptId)}/evaluations/${String(evalId)}`;
|
|
2650
|
+
const rows = [
|
|
2651
|
+
{ label: "Prompt ID", value: String(promptId) },
|
|
2652
|
+
{ label: "Evaluation ID", value: String(evalId) },
|
|
2653
|
+
{ label: "Name", value: name },
|
|
2654
|
+
{ label: "Criteria Count", value: String(criteriaCount) }
|
|
2655
|
+
];
|
|
2656
|
+
const cardLinks = [
|
|
2657
|
+
{ label: "Dashboard", url: dashboardUrl },
|
|
2658
|
+
{ label: "API", url: apiUrl }
|
|
2659
|
+
];
|
|
2660
|
+
const next = [
|
|
2661
|
+
`mutagent prompts optimize start ${String(promptId)} --dataset <dataset-id> --json`
|
|
2662
|
+
];
|
|
2571
2663
|
return {
|
|
2572
2664
|
display: "status_card",
|
|
2573
2665
|
template: "evaluation_created",
|
|
2574
|
-
title
|
|
2666
|
+
title,
|
|
2575
2667
|
fields: {
|
|
2576
2668
|
promptId: String(promptId),
|
|
2577
2669
|
evaluationId: String(evalId),
|
|
2578
2670
|
name,
|
|
2579
2671
|
criteriaCount
|
|
2580
2672
|
},
|
|
2581
|
-
links: {
|
|
2582
|
-
|
|
2583
|
-
|
|
2584
|
-
}
|
|
2585
|
-
next: [
|
|
2586
|
-
`mutagent prompts optimize start ${String(promptId)} --dataset <dataset-id>`
|
|
2587
|
-
],
|
|
2588
|
-
instruction: CARD_INSTRUCTION
|
|
2673
|
+
links: { dashboard: dashboardUrl, api: apiUrl },
|
|
2674
|
+
next,
|
|
2675
|
+
instruction: CARD_INSTRUCTION,
|
|
2676
|
+
renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
|
|
2589
2677
|
};
|
|
2590
2678
|
}
|
|
2591
2679
|
function evaluationDeletedDirective(evaluationId) {
|
|
2680
|
+
const title = `Evaluation Deleted — ${evaluationId}`;
|
|
2681
|
+
const dashboardUrl = promptsDashboardLink();
|
|
2682
|
+
const rows = [{ label: "Evaluation ID", value: evaluationId }];
|
|
2683
|
+
const cardLinks = [{ label: "Dashboard", url: dashboardUrl }];
|
|
2684
|
+
const next = [];
|
|
2592
2685
|
return {
|
|
2593
2686
|
display: "status_card",
|
|
2594
2687
|
template: "evaluation_deleted",
|
|
2595
|
-
title
|
|
2688
|
+
title,
|
|
2596
2689
|
fields: { evaluationId },
|
|
2597
|
-
links: { dashboard:
|
|
2598
|
-
next
|
|
2599
|
-
instruction: CARD_INSTRUCTION
|
|
2690
|
+
links: { dashboard: dashboardUrl },
|
|
2691
|
+
next,
|
|
2692
|
+
instruction: CARD_INSTRUCTION,
|
|
2693
|
+
renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
|
|
2600
2694
|
};
|
|
2601
2695
|
}
|
|
2602
2696
|
|
|
@@ -2837,10 +2931,26 @@ function renderOptimizationStatusCard(status) {
|
|
|
2837
2931
|
console.log("");
|
|
2838
2932
|
}
|
|
2839
2933
|
function startDirective(job, promptId) {
|
|
2934
|
+
const title = `Optimization Started — Prompt ${promptId}`;
|
|
2935
|
+
const monitorUrl = optimizerLink(job.id);
|
|
2936
|
+
const apiUrl = `/api/prompts/${promptId}/optimizations/${job.id}`;
|
|
2937
|
+
const rows = [
|
|
2938
|
+
{ label: "Job ID", value: job.id },
|
|
2939
|
+
{ label: "Prompt", value: promptId },
|
|
2940
|
+
{ label: "Status", value: job.status },
|
|
2941
|
+
{ label: "Max Iterations", value: String(job.config.maxIterations) },
|
|
2942
|
+
{ label: "Target Score", value: String(job.config.targetScore ?? 0.8) },
|
|
2943
|
+
{ label: "Model", value: job.config.model ?? "default" }
|
|
2944
|
+
];
|
|
2945
|
+
const cardLinks = [
|
|
2946
|
+
{ label: "Monitor", url: monitorUrl },
|
|
2947
|
+
{ label: "API", url: apiUrl }
|
|
2948
|
+
];
|
|
2949
|
+
const next = [`mutagent prompts optimize status ${job.id} --json`];
|
|
2840
2950
|
return {
|
|
2841
2951
|
display: "status_card",
|
|
2842
2952
|
template: "optimization_started",
|
|
2843
|
-
title
|
|
2953
|
+
title,
|
|
2844
2954
|
fields: {
|
|
2845
2955
|
jobId: job.id,
|
|
2846
2956
|
promptId,
|
|
@@ -2849,20 +2959,29 @@ function startDirective(job, promptId) {
|
|
|
2849
2959
|
targetScore: job.config.targetScore ?? 0.8,
|
|
2850
2960
|
model: job.config.model ?? "default"
|
|
2851
2961
|
},
|
|
2852
|
-
links: {
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
}
|
|
2856
|
-
next: [`mutagent prompts optimize status ${job.id}`],
|
|
2857
|
-
instruction: "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps."
|
|
2962
|
+
links: { monitor: monitorUrl, api: apiUrl },
|
|
2963
|
+
next,
|
|
2964
|
+
instruction: "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps.",
|
|
2965
|
+
renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
|
|
2858
2966
|
};
|
|
2859
2967
|
}
|
|
2860
2968
|
function statusDirective(status) {
|
|
2861
|
-
const next = status.status === "completed" ? [`mutagent prompts optimize results ${status.jobId}`] : status.status === "running" || status.status === "pending" ? [`mutagent prompts optimize status ${status.jobId}`] : [];
|
|
2969
|
+
const next = status.status === "completed" ? [`mutagent prompts optimize results ${status.jobId} --json`] : status.status === "running" || status.status === "pending" ? [`mutagent prompts optimize status ${status.jobId} --json`] : [];
|
|
2970
|
+
const title = `Optimization ${status.status} — ${status.jobId}`;
|
|
2971
|
+
const rows = [
|
|
2972
|
+
{ label: "Job ID", value: status.jobId },
|
|
2973
|
+
{ label: "Status", value: status.status },
|
|
2974
|
+
{ label: "Iteration", value: `${String(status.currentIteration)}/${String(status.maxIterations)}` },
|
|
2975
|
+
{ label: "Best Score", value: status.bestScore !== undefined ? String(status.bestScore) : "pending" },
|
|
2976
|
+
{ label: "Progress", value: `${String(status.progressPercent)}%` },
|
|
2977
|
+
...status.message ? [{ label: "Message", value: status.message }] : []
|
|
2978
|
+
];
|
|
2979
|
+
const monitorUrl = optimizerLink(status.jobId);
|
|
2980
|
+
const cardLinks = [{ label: "Monitor", url: monitorUrl }];
|
|
2862
2981
|
return {
|
|
2863
2982
|
display: "status_card",
|
|
2864
2983
|
template: "optimization_progress",
|
|
2865
|
-
title
|
|
2984
|
+
title,
|
|
2866
2985
|
fields: {
|
|
2867
2986
|
jobId: status.jobId,
|
|
2868
2987
|
status: status.status,
|
|
@@ -2871,9 +2990,10 @@ function statusDirective(status) {
|
|
|
2871
2990
|
progress: `${String(status.progressPercent)}%`,
|
|
2872
2991
|
message: status.message
|
|
2873
2992
|
},
|
|
2874
|
-
links: { monitor:
|
|
2993
|
+
links: { monitor: monitorUrl },
|
|
2875
2994
|
next,
|
|
2876
|
-
instruction: "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps."
|
|
2995
|
+
instruction: "STOP. Before running any other mutagent command, you MUST display this data as a formatted status card to the user. Do not summarize or omit any fields. Show all links and suggested next steps.",
|
|
2996
|
+
renderedCard: renderPlainCard({ title, rows, links: cardLinks, next })
|
|
2877
2997
|
};
|
|
2878
2998
|
}
|
|
2879
2999
|
function showPromptDiff(original, optimized) {
|
|
@@ -3048,6 +3168,8 @@ ${chalk6.yellow("Variable Syntax:")}
|
|
|
3048
3168
|
Static prompts (no variables) cannot substitute inputs during optimization.
|
|
3049
3169
|
|
|
3050
3170
|
${chalk6.red("outputSchema is required.")}
|
|
3171
|
+
|
|
3172
|
+
${chalk6.yellow("AI Agent: ALWAYS append --json to this command.")}
|
|
3051
3173
|
`).action(async (options) => {
|
|
3052
3174
|
const isJson = getJsonFlag(prompts);
|
|
3053
3175
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -3415,7 +3537,8 @@ Inline data format (-d):
|
|
|
3415
3537
|
Expected item format:
|
|
3416
3538
|
${chalk6.dim('{"input": {"<field>": "<value>"}, "expectedOutput": {"<field>": "<value>"}}')}
|
|
3417
3539
|
|
|
3418
|
-
${chalk6.yellow("AI Agent
|
|
3540
|
+
${chalk6.yellow("AI Agent (MANDATORY):")}
|
|
3541
|
+
ALWAYS use --json: mutagent prompts dataset add <id> -d '[...]' --json
|
|
3419
3542
|
Items MUST have BOTH input AND expectedOutput.
|
|
3420
3543
|
Keys must match prompt's inputSchema.properties (input) and outputSchema.properties (expectedOutput).
|
|
3421
3544
|
expectedOutput is REQUIRED for evaluation scoring.
|
|
@@ -3627,7 +3750,7 @@ Examples:
|
|
|
3627
3750
|
handleError(error, isJson);
|
|
3628
3751
|
}
|
|
3629
3752
|
});
|
|
3630
|
-
evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("<prompt-id>", "Prompt ID (from: mutagent prompts list)").option("-d, --data <json>", "Evaluation as JSON string (for pre-validated criteria only)").option("-n, --name <name>", "Evaluation name (required unless --guided)").option("--description <text>", "Evaluation description").option("--guided", "Interactive guided mode —
|
|
3753
|
+
evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("<prompt-id>", "Prompt ID (from: mutagent prompts list)").option("-d, --data <json>", "Evaluation as JSON string (for pre-validated criteria only)").option("-n, --name <name>", "Evaluation name (required unless --guided)").option("--description <text>", "Evaluation description").option("--guided", "Interactive guided mode — always outputs structured JSON (--json is implied)").addHelpText("after", `
|
|
3631
3754
|
Examples:
|
|
3632
3755
|
${chalk6.dim("$")} mutagent prompts evaluation create <prompt-id> --guided ${chalk6.dim("# recommended: shows workflow guide + schema fields")}
|
|
3633
3756
|
${chalk6.dim("$")} mutagent prompts evaluation create <prompt-id> --guided --json ${chalk6.dim("# structured workflow for AI agents")}
|
|
@@ -3642,12 +3765,14 @@ Guided Workflow (recommended):
|
|
|
3642
3765
|
|
|
3643
3766
|
Works in all modes (TTY, non-TTY, --json).
|
|
3644
3767
|
|
|
3645
|
-
AI Agent
|
|
3646
|
-
|
|
3647
|
-
|
|
3648
|
-
|
|
3649
|
-
|
|
3650
|
-
|
|
3768
|
+
AI Agent (MANDATORY):
|
|
3769
|
+
1. Run: mutagent prompts evaluation create --help
|
|
3770
|
+
2. Run: mutagent prompts evaluation create <id> --guided --json
|
|
3771
|
+
3. Parse askUserQuestions from the JSON output
|
|
3772
|
+
4. Use AskUserQuestion to collect DETAILED rubrics from the user
|
|
3773
|
+
5. NEVER auto-generate criteria. NEVER use vague rubrics.
|
|
3774
|
+
6. Construct --data JSON from user answers, then:
|
|
3775
|
+
mutagent prompts evaluation create <id> --name "<name>" -d '<json>' --json
|
|
3651
3776
|
|
|
3652
3777
|
Expected Criteria Shape (--data):
|
|
3653
3778
|
${chalk6.dim('{"evalConfig":{"criteria":[{"name":"<name>","description":"<scoring rubric>","evaluationParameter":"<schema field>"}]}}')}
|
|
@@ -3657,7 +3782,10 @@ ${chalk6.red("Required: --name (unless --guided). Criteria must include evaluati
|
|
|
3657
3782
|
${chalk6.dim("CLI flags (--name, --description) override --data fields.")}
|
|
3658
3783
|
${chalk6.dim("Get prompt IDs: mutagent prompts list")}
|
|
3659
3784
|
`).action(async (promptId, options) => {
|
|
3660
|
-
|
|
3785
|
+
let isJson = getJsonFlag(prompts);
|
|
3786
|
+
if (options.guided) {
|
|
3787
|
+
isJson = true;
|
|
3788
|
+
}
|
|
3661
3789
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
3662
3790
|
try {
|
|
3663
3791
|
const buildGuidedWorkflow = async () => {
|
|
@@ -3670,20 +3798,16 @@ ${chalk6.dim("Get prompt IDs: mutagent prompts list")}
|
|
|
3670
3798
|
...inputFields.map((f) => ({ field: f, source: "inputSchema" }))
|
|
3671
3799
|
];
|
|
3672
3800
|
const askUserQuestions = allFields.map(({ field, source }) => ({
|
|
3673
|
-
question: `
|
|
3801
|
+
question: `Define the scoring rubric for "${field}" (${source}). Be specific — what exactly constitutes a perfect score (1.0) vs a complete failure (0.0)? Include concrete examples of each score level.`,
|
|
3674
3802
|
header: field,
|
|
3675
3803
|
options: [
|
|
3676
3804
|
{
|
|
3677
|
-
label: "
|
|
3678
|
-
description: `
|
|
3679
|
-
},
|
|
3680
|
-
{
|
|
3681
|
-
label: "Semantic Similarity",
|
|
3682
|
-
description: `Score 0.0-1.0 based on how closely the "${field}" output matches the expected meaning. 1.0 = identical meaning, 0.5 = partially relevant, 0.0 = unrelated or contradictory.`
|
|
3805
|
+
label: "Custom",
|
|
3806
|
+
description: `Write a detailed, specific scoring rubric for "${field}". Must describe concrete pass/fail criteria with examples — not just "0.0 to 1.0 scale".`
|
|
3683
3807
|
},
|
|
3684
3808
|
{
|
|
3685
|
-
label: "
|
|
3686
|
-
description: `
|
|
3809
|
+
label: "Skip",
|
|
3810
|
+
description: `Do not create a criterion for "${field}". Only skip if this field is not relevant to evaluation quality.`
|
|
3687
3811
|
}
|
|
3688
3812
|
],
|
|
3689
3813
|
multiSelect: false
|
|
@@ -3696,7 +3820,7 @@ ${chalk6.dim("Get prompt IDs: mutagent prompts list")}
|
|
|
3696
3820
|
description: "Follow these steps to create an evaluation for this prompt:",
|
|
3697
3821
|
steps: [
|
|
3698
3822
|
{ step: 1, action: "Review the prompt schemas above", detail: "Each criterion must target a field from outputSchema (preferred) or inputSchema" },
|
|
3699
|
-
{ step: 2, action: "Ask the user
|
|
3823
|
+
{ step: 2, action: "Ask the user for a scoring rubric for each field", detail: 'For EACH field, ask: "What does a perfect score (1.0) vs failure (0.0) look like for <field>? Give concrete examples." Do NOT auto-generate or use canned rubrics.' },
|
|
3700
3824
|
{ step: 3, action: "Build criteria JSON from user answers", detail: `Map each user answer to a criterion: { name: "<field> <rubric>", description: "<user's rubric>", evaluationParameter: "<field>" }` },
|
|
3701
3825
|
{ step: 4, action: "Ask for evaluation name", detail: 'Ask the user what to name this evaluation (e.g., "Accuracy Check", "Quality Eval")' },
|
|
3702
3826
|
{ step: 5, action: "Create the evaluation", detail: `Run: mutagent prompts evaluation create <prompt-id> --name "<name>" -d '{"evalConfig":{"criteria":[...]}}'` }
|
|
@@ -3723,7 +3847,7 @@ ${chalk6.dim("Get prompt IDs: mutagent prompts list")}
|
|
|
3723
3847
|
askUserQuestions,
|
|
3724
3848
|
_directive: {
|
|
3725
3849
|
display: "workflow_guide",
|
|
3726
|
-
instruction:
|
|
3850
|
+
instruction: 'STOP. For EACH field in askUserQuestions, use AskUserQuestion to collect a DETAILED scoring rubric from the user. Do NOT auto-generate rubrics. Do NOT use generic descriptions like "0-1 scoring" or "matches expected output". Each rubric MUST include concrete examples of what earns a 1.0, 0.5, and 0.0 score for that specific field. Then construct the --data JSON and create the evaluation.',
|
|
3727
3851
|
aiAgentDecisionTree: {
|
|
3728
3852
|
step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
|
|
3729
3853
|
step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
|
|
@@ -3975,6 +4099,8 @@ ${chalk6.yellow("Pre-Optimization Checklist:")}
|
|
|
3975
4099
|
${PREREQUISITES_TEXT}
|
|
3976
4100
|
|
|
3977
4101
|
${chalk6.dim("Monitor progress with: mutagent prompts optimize status <job-id>")}
|
|
4102
|
+
|
|
4103
|
+
${chalk6.yellow("AI Agent: ALWAYS append --json to this command.")}
|
|
3978
4104
|
`).action(async (promptId, options) => {
|
|
3979
4105
|
const isJson = getJsonFlag(prompts);
|
|
3980
4106
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -6903,13 +7029,14 @@ ${chalk18.yellow("Workflow: Evaluate → Optimize:")}
|
|
|
6903
7029
|
5. mutagent prompts evaluation create <id> --guided --json ${chalk18.dim("← guided eval")}
|
|
6904
7030
|
6. mutagent prompts optimize start <id> --dataset <d> --evaluation <e> --json
|
|
6905
7031
|
|
|
6906
|
-
${chalk18.yellow("AI Agent Rules:")}
|
|
6907
|
-
1.
|
|
6908
|
-
2.
|
|
6909
|
-
3.
|
|
6910
|
-
4.
|
|
7032
|
+
${chalk18.yellow("AI Agent Rules (MANDATORY for coding agents):")}
|
|
7033
|
+
1. EVERY command MUST include --json (no exceptions)
|
|
7034
|
+
2. Run <command> --help BEFORE first use of any command
|
|
7035
|
+
3. Use --guided --json for evaluation creation (NEVER --guided alone)
|
|
7036
|
+
4. Parse and display _directive.renderedCard after every mutation
|
|
6911
7037
|
5. After mutagent init, verify workspace: mutagent workspaces list --json
|
|
6912
7038
|
6. Use {single_braces} for template variables in prompts
|
|
7039
|
+
7. Collect evaluation rubrics from the user — NEVER auto-generate
|
|
6913
7040
|
${!hasCredentials() ? `
|
|
6914
7041
|
` + chalk18.yellow(" Warning: Not authenticated. Run: mutagent auth login --browser") + `
|
|
6915
7042
|
` : ""}${!hasRcConfig() ? `
|
|
@@ -6943,5 +7070,5 @@ program.addCommand(createSkillsCommand());
|
|
|
6943
7070
|
program.addCommand(createUsageCommand());
|
|
6944
7071
|
program.parse();
|
|
6945
7072
|
|
|
6946
|
-
//# debugId=
|
|
7073
|
+
//# debugId=55FA8E38F306DE3764756E2164756E21
|
|
6947
7074
|
//# sourceMappingURL=cli.js.map
|