@mutagent/cli 0.1.30 → 0.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cli.js +101 -35
- package/dist/bin/cli.js.map +7 -7
- package/dist/index.js +9 -2
- package/dist/index.js.map +3 -3
- package/package.json +1 -1
package/dist/bin/cli.js
CHANGED
|
@@ -644,6 +644,11 @@ class SDKClientWrapper {
|
|
|
644
644
|
const job = await this.request(`/api/optimization/${jobId}`);
|
|
645
645
|
const progress = await this.request(`/api/optimization/${jobId}/progress`);
|
|
646
646
|
const prompt = await this.getPrompt(String(job.promptId ?? ""));
|
|
647
|
+
const statesRes = await this.request(`/api/optimization/${jobId}/states`).catch(() => ({ states: [] }));
|
|
648
|
+
const latestState = statesRes.states[statesRes.states.length - 1];
|
|
649
|
+
const iterCtx = latestState?.state.iterationContext;
|
|
650
|
+
const mutatedPromptText = iterCtx?.currentPrompt?.prompt;
|
|
651
|
+
const originalPromptText = iterCtx?.basePrompt?.prompt;
|
|
647
652
|
return {
|
|
648
653
|
job: {
|
|
649
654
|
id: job.id ?? jobId,
|
|
@@ -654,7 +659,9 @@ class SDKClientWrapper {
|
|
|
654
659
|
prompt,
|
|
655
660
|
bestScore: job.bestScore,
|
|
656
661
|
iterationsCompleted: job.currentIteration,
|
|
657
|
-
scoreProgression: Array.isArray(progress.progression) ? progress.progression.map((p) => typeof p.score === "number" ? p.score : 0) : undefined
|
|
662
|
+
scoreProgression: Array.isArray(progress.progression) ? progress.progression.map((p) => typeof p.score === "number" ? p.score : 0) : undefined,
|
|
663
|
+
mutatedPromptText,
|
|
664
|
+
originalPromptText
|
|
658
665
|
};
|
|
659
666
|
} catch (error) {
|
|
660
667
|
this.handleError(error);
|
|
@@ -3352,8 +3359,8 @@ async function buildGuidedWorkflow(promptId) {
|
|
|
3352
3359
|
const inputFields = Object.keys(inputProperties);
|
|
3353
3360
|
const outputFields = Object.keys(outputProperties);
|
|
3354
3361
|
const allFields = [
|
|
3355
|
-
...
|
|
3356
|
-
...
|
|
3362
|
+
...inputFields.map((f) => ({ field: f, source: "inputSchema", fieldSchema: inputProperties[f] })),
|
|
3363
|
+
...outputFields.map((f) => ({ field: f, source: "outputSchema", fieldSchema: outputProperties[f] }))
|
|
3357
3364
|
];
|
|
3358
3365
|
let datasetExample = null;
|
|
3359
3366
|
try {
|
|
@@ -3369,27 +3376,36 @@ async function buildGuidedWorkflow(promptId) {
|
|
|
3369
3376
|
} catch {}
|
|
3370
3377
|
}
|
|
3371
3378
|
} catch {}
|
|
3372
|
-
const askUserQuestions = allFields.map(({ field, fieldSchema }) =>
|
|
3373
|
-
|
|
3374
|
-
|
|
3375
|
-
|
|
3376
|
-
|
|
3377
|
-
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
3392
|
-
|
|
3379
|
+
const askUserQuestions = allFields.map(({ field, source, fieldSchema }) => {
|
|
3380
|
+
const isInput = source === "inputSchema";
|
|
3381
|
+
const question = isInput ? `Define the Minimum Viable Context for "${field}". What data MUST be present in this input for the prompt to produce a correct output? Describe what constitutes complete vs incomplete input, and WHY this field matters.` : `Define what correct "${field}" looks like. What structure, content, or qualities make it good vs bad? Give concrete examples of good and bad outputs.`;
|
|
3382
|
+
const hint = isInput ? `Input fields define what data the prompt NEEDS to work correctly. Without defining minimum viable context, the optimizer cannot detect whether failures come from bad input or bad prompt logic.` : null;
|
|
3383
|
+
const header = isInput ? `${field} [INPUT]` : `${field} [OUTPUT]`;
|
|
3384
|
+
const defineDesc = isInput ? `Describe what data MUST be present in "${field}" and WHY the prompt needs it. Focus on completeness and minimum viable context.` : `Describe what a correct vs incorrect "${field}" looks like. Focus on structure, content quality, and concrete examples — not numeric scores.`;
|
|
3385
|
+
return {
|
|
3386
|
+
question,
|
|
3387
|
+
header,
|
|
3388
|
+
...hint != null ? { hint } : {},
|
|
3389
|
+
options: [
|
|
3390
|
+
{
|
|
3391
|
+
label: "Define rubric",
|
|
3392
|
+
description: defineDesc
|
|
3393
|
+
},
|
|
3394
|
+
{
|
|
3395
|
+
label: "See suggestion",
|
|
3396
|
+
description: `Get a suggested rubric based on the prompt and schema definition for "${field}". You can refine it.`
|
|
3397
|
+
}
|
|
3398
|
+
],
|
|
3399
|
+
multiSelect: false,
|
|
3400
|
+
context: {
|
|
3401
|
+
fieldType: fieldSchema?.type ?? "unknown",
|
|
3402
|
+
fieldDescription: fieldSchema?.description ?? null,
|
|
3403
|
+
fieldSource: source,
|
|
3404
|
+
promptExcerpt: truncate(prompt.humanPrompt ?? prompt.systemPrompt ?? prompt.rawPrompt ?? "", 200),
|
|
3405
|
+
exampleValue: datasetExample?.[field] ?? null
|
|
3406
|
+
}
|
|
3407
|
+
};
|
|
3408
|
+
});
|
|
3393
3409
|
return {
|
|
3394
3410
|
prompt: { id: promptId, name: prompt.name },
|
|
3395
3411
|
inputSchema: { fields: inputFields },
|
|
@@ -3398,7 +3414,7 @@ async function buildGuidedWorkflow(promptId) {
|
|
|
3398
3414
|
description: "Follow these steps to create an evaluation for this prompt:",
|
|
3399
3415
|
steps: [
|
|
3400
3416
|
{ step: 1, action: "Review prompt schemas and context", detail: "Understand what the prompt does and what each field means." },
|
|
3401
|
-
{ step: 2, action: "Define correctness criteria for EVERY field", detail:
|
|
3417
|
+
{ step: 2, action: "Define correctness criteria for EVERY field", detail: `For EACH of the ${String(allFields.length)} fields in askUserQuestions, use AskUserQuestion to collect a rubric. INPUT fields (${String(inputFields.length)}): collect minimum viable context definitions — what data MUST exist. OUTPUT fields (${String(outputFields.length)}): collect correctness criteria — what correct vs incorrect looks like. Do NOT skip any field. Use the context hints to suggest rubrics when asked.` },
|
|
3402
3418
|
{ step: 3, action: "Build criteria JSON", detail: "Map each rubric to: { name: field, description: rubric, evaluationParameter: field }" },
|
|
3403
3419
|
{ step: 4, action: "Ask for evaluation name", detail: "Ask user what to name this evaluation" },
|
|
3404
3420
|
{ step: 5, action: "Create the evaluation", detail: `mutagent prompts evaluation create ${promptId} --name "<name>" -d '<json>' --json` }
|
|
@@ -3425,7 +3441,14 @@ async function buildGuidedWorkflow(promptId) {
|
|
|
3425
3441
|
askUserQuestions,
|
|
3426
3442
|
_directive: {
|
|
3427
3443
|
display: "workflow_guide",
|
|
3428
|
-
instruction:
|
|
3444
|
+
instruction: `MANDATORY ORDERS — READ CAREFULLY:
|
|
3445
|
+
` + `1. You MUST ask the user about EVERY SINGLE field listed in askUserQuestions. There are ${String(allFields.length)} fields total.
|
|
3446
|
+
` + `2. Do NOT skip ANY field. Do NOT decide that a field "doesn't need" a definition.
|
|
3447
|
+
` + `3. INPUT fields (${String(inputFields.length)}): These define MINIMUM VIABLE CONTEXT — the data that MUST exist in the input for the prompt to work. Ask: "What data MUST be present in this field and WHY does the prompt need it?"
|
|
3448
|
+
` + ` WHY THIS MATTERS: Without minimum viable context definitions, the optimizer cannot distinguish between failures caused by incomplete input vs failures caused by bad prompt logic.
|
|
3449
|
+
` + `4. OUTPUT fields (${String(outputFields.length)}): These define correctness criteria. Ask: "What does correct vs incorrect for this field look like?"
|
|
3450
|
+
` + `5. After collecting ALL ${String(allFields.length)} definitions, construct the --data JSON with criteria for EVERY field.
|
|
3451
|
+
` + "6. VIOLATION: Skipping ANY field or telling the user a field doesn't need a definition is a protocol violation.",
|
|
3429
3452
|
aiAgentDecisionTree: {
|
|
3430
3453
|
step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
|
|
3431
3454
|
step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
|
|
@@ -3646,9 +3669,9 @@ ${chalk7.dim("Get prompt IDs: mutagent prompts list")}
|
|
|
3646
3669
|
let fieldsHint = "";
|
|
3647
3670
|
try {
|
|
3648
3671
|
const client2 = getSDKClient();
|
|
3649
|
-
const
|
|
3650
|
-
if (
|
|
3651
|
-
const props =
|
|
3672
|
+
const prompt2 = await client2.getPrompt(promptId);
|
|
3673
|
+
if (prompt2.outputSchema && typeof prompt2.outputSchema === "object") {
|
|
3674
|
+
const props = prompt2.outputSchema.properties;
|
|
3652
3675
|
if (props && typeof props === "object") {
|
|
3653
3676
|
const fields = Object.keys(props);
|
|
3654
3677
|
if (fields.length > 0) {
|
|
@@ -3699,9 +3722,9 @@ Example JSON (--data flag):
|
|
|
3699
3722
|
let availableFields = [];
|
|
3700
3723
|
try {
|
|
3701
3724
|
const client2 = getSDKClient();
|
|
3702
|
-
const
|
|
3703
|
-
if (
|
|
3704
|
-
const props =
|
|
3725
|
+
const prompt2 = await client2.getPrompt(promptId);
|
|
3726
|
+
if (prompt2.outputSchema && typeof prompt2.outputSchema === "object") {
|
|
3727
|
+
const props = prompt2.outputSchema.properties;
|
|
3705
3728
|
if (props && typeof props === "object") {
|
|
3706
3729
|
availableFields = Object.keys(props);
|
|
3707
3730
|
}
|
|
@@ -3716,6 +3739,44 @@ Available output fields: ${availableFields.join(", ")}` : "";
|
|
|
3716
3739
|
Example:
|
|
3717
3740
|
--data '{"evalConfig":{"criteria":[` + '{"name":"Accuracy","description":"...","evaluationParameter":"classification"},' + '{"name":"Confidence","description":"...","evaluationParameter":"confidence"}' + "]}}'");
|
|
3718
3741
|
}
|
|
3742
|
+
const schemaClient = getSDKClient();
|
|
3743
|
+
const prompt = await schemaClient.getPrompt(promptId);
|
|
3744
|
+
const requiredFields = [];
|
|
3745
|
+
if (prompt.inputSchema && typeof prompt.inputSchema === "object") {
|
|
3746
|
+
const props = prompt.inputSchema.properties;
|
|
3747
|
+
if (props && typeof props === "object") {
|
|
3748
|
+
for (const field of Object.keys(props)) {
|
|
3749
|
+
requiredFields.push({ field, source: "inputSchema" });
|
|
3750
|
+
}
|
|
3751
|
+
}
|
|
3752
|
+
}
|
|
3753
|
+
if (prompt.outputSchema && typeof prompt.outputSchema === "object") {
|
|
3754
|
+
const props = prompt.outputSchema.properties;
|
|
3755
|
+
if (props && typeof props === "object") {
|
|
3756
|
+
for (const field of Object.keys(props)) {
|
|
3757
|
+
requiredFields.push({ field, source: "outputSchema" });
|
|
3758
|
+
}
|
|
3759
|
+
}
|
|
3760
|
+
}
|
|
3761
|
+
if (requiredFields.length > 0) {
|
|
3762
|
+
const coveredParams = new Set(criteria.map((c) => c.evaluationParameter));
|
|
3763
|
+
const missing = requiredFields.filter((f) => !coveredParams.has(f.field));
|
|
3764
|
+
if (missing.length > 0) {
|
|
3765
|
+
const missingList = missing.map((m) => `${m.field} (${m.source})`).join(", ");
|
|
3766
|
+
throw new MutagentError("VALIDATION_ERROR", `Evaluation criteria are INCOMPLETE. Missing fields: ${missingList}`, `Each input and output schema field MUST have a corresponding criterion.
|
|
3767
|
+
` + `Total required: ${requiredFields.length}, provided: ${coveredParams.size}
|
|
3768
|
+
` + "Run: mutagent prompts evaluation create " + promptId + " --guided --json");
|
|
3769
|
+
}
|
|
3770
|
+
}
|
|
3771
|
+
if (requiredFields.length > 0) {
|
|
3772
|
+
const validFieldNames = new Set(requiredFields.map((f) => f.field));
|
|
3773
|
+
const invalidParams = criteria.filter((c) => c.evaluationParameter && !validFieldNames.has(c.evaluationParameter));
|
|
3774
|
+
if (invalidParams.length > 0) {
|
|
3775
|
+
const invalidList = invalidParams.map((c) => `"${c.evaluationParameter}" (criterion: ${c.name})`).join(", ");
|
|
3776
|
+
throw new MutagentError("VALIDATION_ERROR", `Invalid evaluationParameter(s): ${invalidList}. Must match a schema field.`, `Available fields: ${requiredFields.map((f) => f.field).join(", ")}
|
|
3777
|
+
` + "Run: mutagent prompts evaluation create " + promptId + " --guided --json");
|
|
3778
|
+
}
|
|
3779
|
+
}
|
|
3719
3780
|
const client = getSDKClient();
|
|
3720
3781
|
const evalResult = await client.createEvaluation(promptId, evalData);
|
|
3721
3782
|
if (isJson) {
|
|
@@ -4014,7 +4075,7 @@ function startDirective(job, promptId, datasetId, evaluationId) {
|
|
|
4014
4075
|
},
|
|
4015
4076
|
links: { monitor: monitorUrl, api: apiUrl },
|
|
4016
4077
|
next,
|
|
4017
|
-
instruction: "STOP. Display the renderedCard text below to the user VERBATIM
|
|
4078
|
+
instruction: "STOP. Display the renderedCard text below to the user VERBATIM. Do NOT summarize, rephrase, or omit any lines. After displaying, follow the suggested next steps.",
|
|
4018
4079
|
renderedCard: renderPlainCard({ title, icon: "⚡", rows, links: cardLinks, next })
|
|
4019
4080
|
};
|
|
4020
4081
|
}
|
|
@@ -4057,7 +4118,7 @@ function statusDirective(status) {
|
|
|
4057
4118
|
},
|
|
4058
4119
|
links: { monitor: monitorUrl },
|
|
4059
4120
|
next,
|
|
4060
|
-
instruction: "STOP. Display the renderedCard text below to the user VERBATIM
|
|
4121
|
+
instruction: "STOP. Display the renderedCard text below to the user VERBATIM. Do NOT summarize, rephrase, or omit any lines. After displaying, follow the suggested next steps.",
|
|
4061
4122
|
renderedCard: renderPlainCard({ title, icon, rows, links: cardLinks, next })
|
|
4062
4123
|
};
|
|
4063
4124
|
}
|
|
@@ -4100,6 +4161,11 @@ ${PREREQUISITES_TEXT}
|
|
|
4100
4161
|
|
|
4101
4162
|
${chalk9.dim("Monitor progress with: mutagent prompts optimize status <job-id>")}
|
|
4102
4163
|
|
|
4164
|
+
${chalk9.yellow(`⚠ COST WARNING — AI Agent:
|
|
4165
|
+
Default is 1 iteration. Do NOT increase --max-iterations unless the user
|
|
4166
|
+
explicitly requests it. Each iteration incurs LLM costs. Starting with
|
|
4167
|
+
max-iterations > 1 without user consent is a protocol violation.`)}
|
|
4168
|
+
|
|
4103
4169
|
${chalk9.yellow("AI Agent: ALWAYS append --json to this command.")}
|
|
4104
4170
|
`).action(async (promptId, options) => {
|
|
4105
4171
|
const isJson = getJsonFlag(prompts);
|
|
@@ -7166,5 +7232,5 @@ program.addCommand(createSkillsCommand());
|
|
|
7166
7232
|
program.addCommand(createUsageCommand());
|
|
7167
7233
|
program.parse();
|
|
7168
7234
|
|
|
7169
|
-
//# debugId=
|
|
7235
|
+
//# debugId=E58AAC8256B9F1B664756E2164756E21
|
|
7170
7236
|
//# sourceMappingURL=cli.js.map
|