npm - @mutagent/cli - Versions diffs - 0.1.30 → 0.1.31 - Mend

@mutagent/cli 0.1.30 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/bin/cli.js CHANGED Viewed

@@ -644,6 +644,11 @@ class SDKClientWrapper {
       const job = await this.request(`/api/optimization/${jobId}`);
       const progress = await this.request(`/api/optimization/${jobId}/progress`);
       const prompt = await this.getPrompt(String(job.promptId ?? ""));
+      const statesRes = await this.request(`/api/optimization/${jobId}/states`).catch(() => ({ states: [] }));
+      const latestState = statesRes.states[statesRes.states.length - 1];
+      const iterCtx = latestState?.state.iterationContext;
+      const mutatedPromptText = iterCtx?.currentPrompt?.prompt;
+      const originalPromptText = iterCtx?.basePrompt?.prompt;
       return {
         job: {
           id: job.id ?? jobId,
@@ -654,7 +659,9 @@ class SDKClientWrapper {
         prompt,
         bestScore: job.bestScore,
         iterationsCompleted: job.currentIteration,
-        scoreProgression: Array.isArray(progress.progression) ? progress.progression.map((p) => typeof p.score === "number" ? p.score : 0) : undefined
+        scoreProgression: Array.isArray(progress.progression) ? progress.progression.map((p) => typeof p.score === "number" ? p.score : 0) : undefined,
+        mutatedPromptText,
+        originalPromptText
       };
     } catch (error) {
       this.handleError(error);
@@ -3352,8 +3359,8 @@ async function buildGuidedWorkflow(promptId) {
   const inputFields = Object.keys(inputProperties);
   const outputFields = Object.keys(outputProperties);
   const allFields = [
-    ...outputFields.map((f) => ({ field: f, source: "outputSchema", fieldSchema: outputProperties[f] })),
-    ...inputFields.map((f) => ({ field: f, source: "inputSchema", fieldSchema: inputProperties[f] }))
+    ...inputFields.map((f) => ({ field: f, source: "inputSchema", fieldSchema: inputProperties[f] })),
+    ...outputFields.map((f) => ({ field: f, source: "outputSchema", fieldSchema: outputProperties[f] }))
   ];
   let datasetExample = null;
   try {
@@ -3369,27 +3376,36 @@ async function buildGuidedWorkflow(promptId) {
       } catch {}
     }
   } catch {}
-  const askUserQuestions = allFields.map(({ field, fieldSchema }) => ({
-    question: `Define what correct "${field}" looks like. What structure, content, or qualities make it good vs bad? Give concrete examples of good and bad outputs.`,
-    header: field,
-    options: [
-      {
-        label: "Define rubric",
-        description: `Describe what a correct vs incorrect "${field}" looks like. Focus on structure, content quality, and concrete examples — not numeric scores.`
-      },
-      {
-        label: "See suggestion",
-        description: `Get a suggested rubric based on the prompt and schema definition for "${field}". You can refine it.`
-      }
-    ],
-    multiSelect: false,
-    context: {
-      fieldType: fieldSchema?.type ?? "unknown",
-      fieldDescription: fieldSchema?.description ?? null,
-      promptExcerpt: truncate(prompt.humanPrompt ?? prompt.systemPrompt ?? prompt.rawPrompt ?? "", 200),
-      exampleValue: datasetExample?.[field] ?? null
-    }
-  }));
+  const askUserQuestions = allFields.map(({ field, source, fieldSchema }) => {
+    const isInput = source === "inputSchema";
+    const question = isInput ? `Define the Minimum Viable Context for "${field}". What data MUST be present in this input for the prompt to produce a correct output? Describe what constitutes complete vs incomplete input, and WHY this field matters.` : `Define what correct "${field}" looks like. What structure, content, or qualities make it good vs bad? Give concrete examples of good and bad outputs.`;
+    const hint = isInput ? `Input fields define what data the prompt NEEDS to work correctly. Without defining minimum viable context, the optimizer cannot detect whether failures come from bad input or bad prompt logic.` : null;
+    const header = isInput ? `${field} [INPUT]` : `${field} [OUTPUT]`;
+    const defineDesc = isInput ? `Describe what data MUST be present in "${field}" and WHY the prompt needs it. Focus on completeness and minimum viable context.` : `Describe what a correct vs incorrect "${field}" looks like. Focus on structure, content quality, and concrete examples — not numeric scores.`;
+    return {
+      question,
+      header,
+      ...hint != null ? { hint } : {},
+      options: [
+        {
+          label: "Define rubric",
+          description: defineDesc
+        },
+        {
+          label: "See suggestion",
+          description: `Get a suggested rubric based on the prompt and schema definition for "${field}". You can refine it.`
+        }
+      ],
+      multiSelect: false,
+      context: {
+        fieldType: fieldSchema?.type ?? "unknown",
+        fieldDescription: fieldSchema?.description ?? null,
+        fieldSource: source,
+        promptExcerpt: truncate(prompt.humanPrompt ?? prompt.systemPrompt ?? prompt.rawPrompt ?? "", 200),
+        exampleValue: datasetExample?.[field] ?? null
+      }
+    };
+  });
   return {
     prompt: { id: promptId, name: prompt.name },
     inputSchema: { fields: inputFields },
@@ -3398,7 +3414,7 @@ async function buildGuidedWorkflow(promptId) {
       description: "Follow these steps to create an evaluation for this prompt:",
       steps: [
         { step: 1, action: "Review prompt schemas and context", detail: "Understand what the prompt does and what each field means." },
-        { step: 2, action: "Define correctness criteria for EVERY field", detail: "For EACH field in askUserQuestions, use AskUserQuestion to collect a rubric. Do NOT skip any field. Use the context hints to suggest rubrics when asked." },
+        { step: 2, action: "Define correctness criteria for EVERY field", detail: `For EACH of the ${String(allFields.length)} fields in askUserQuestions, use AskUserQuestion to collect a rubric. INPUT fields (${String(inputFields.length)}): collect minimum viable context definitions — what data MUST exist. OUTPUT fields (${String(outputFields.length)}): collect correctness criteria — what correct vs incorrect looks like. Do NOT skip any field. Use the context hints to suggest rubrics when asked.` },
         { step: 3, action: "Build criteria JSON", detail: "Map each rubric to: { name: field, description: rubric, evaluationParameter: field }" },
         { step: 4, action: "Ask for evaluation name", detail: "Ask user what to name this evaluation" },
         { step: 5, action: "Create the evaluation", detail: `mutagent prompts evaluation create ${promptId} --name "<name>" -d '<json>' --json` }
@@ -3425,7 +3441,14 @@ async function buildGuidedWorkflow(promptId) {
     askUserQuestions,
     _directive: {
       display: "workflow_guide",
-      instruction: 'STOP. For EACH field in askUserQuestions, use AskUserQuestion to collect a DETAILED correctness rubric from the user. Do NOT auto-generate rubrics. Do NOT use generic descriptions. Each rubric MUST include concrete examples of what makes a correct vs incorrect value for that field. Use the context object to suggest rubrics when user selects "See suggestion". Then construct the --data JSON and create the evaluation.',
+      instruction: `MANDATORY ORDERS — READ CAREFULLY:
+` + `1. You MUST ask the user about EVERY SINGLE field listed in askUserQuestions. There are ${String(allFields.length)} fields total.
+` + `2. Do NOT skip ANY field. Do NOT decide that a field "doesn't need" a definition.
+` + `3. INPUT fields (${String(inputFields.length)}): These define MINIMUM VIABLE CONTEXT — the data that MUST exist in the input for the prompt to work. Ask: "What data MUST be present in this field and WHY does the prompt need it?"
+` + `   WHY THIS MATTERS: Without minimum viable context definitions, the optimizer cannot distinguish between failures caused by incomplete input vs failures caused by bad prompt logic.
+` + `4. OUTPUT fields (${String(outputFields.length)}): These define correctness criteria. Ask: "What does correct vs incorrect for this field look like?"
+` + `5. After collecting ALL ${String(allFields.length)} definitions, construct the --data JSON with criteria for EVERY field.
+` + "6. VIOLATION: Skipping ANY field or telling the user a field doesn't need a definition is a protocol violation.",
       aiAgentDecisionTree: {
         step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
         step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
@@ -3646,9 +3669,9 @@ ${chalk7.dim("Get prompt IDs: mutagent prompts list")}
         let fieldsHint = "";
         try {
           const client2 = getSDKClient();
-          const prompt = await client2.getPrompt(promptId);
-          if (prompt.outputSchema && typeof prompt.outputSchema === "object") {
-            const props = prompt.outputSchema.properties;
+          const prompt2 = await client2.getPrompt(promptId);
+          if (prompt2.outputSchema && typeof prompt2.outputSchema === "object") {
+            const props = prompt2.outputSchema.properties;
             if (props && typeof props === "object") {
               const fields = Object.keys(props);
               if (fields.length > 0) {
@@ -3699,9 +3722,9 @@ Example JSON (--data flag):
         let availableFields = [];
         try {
           const client2 = getSDKClient();
-          const prompt = await client2.getPrompt(promptId);
-          if (prompt.outputSchema && typeof prompt.outputSchema === "object") {
-            const props = prompt.outputSchema.properties;
+          const prompt2 = await client2.getPrompt(promptId);
+          if (prompt2.outputSchema && typeof prompt2.outputSchema === "object") {
+            const props = prompt2.outputSchema.properties;
             if (props && typeof props === "object") {
               availableFields = Object.keys(props);
             }
@@ -3716,6 +3739,44 @@ Available output fields: ${availableFields.join(", ")}` : "";
 Example:
   --data '{"evalConfig":{"criteria":[` + '{"name":"Accuracy","description":"...","evaluationParameter":"classification"},' + '{"name":"Confidence","description":"...","evaluationParameter":"confidence"}' + "]}}'");
       }
+      const schemaClient = getSDKClient();
+      const prompt = await schemaClient.getPrompt(promptId);
+      const requiredFields = [];
+      if (prompt.inputSchema && typeof prompt.inputSchema === "object") {
+        const props = prompt.inputSchema.properties;
+        if (props && typeof props === "object") {
+          for (const field of Object.keys(props)) {
+            requiredFields.push({ field, source: "inputSchema" });
+          }
+        }
+      }
+      if (prompt.outputSchema && typeof prompt.outputSchema === "object") {
+        const props = prompt.outputSchema.properties;
+        if (props && typeof props === "object") {
+          for (const field of Object.keys(props)) {
+            requiredFields.push({ field, source: "outputSchema" });
+          }
+        }
+      }
+      if (requiredFields.length > 0) {
+        const coveredParams = new Set(criteria.map((c) => c.evaluationParameter));
+        const missing = requiredFields.filter((f) => !coveredParams.has(f.field));
+        if (missing.length > 0) {
+          const missingList = missing.map((m) => `${m.field} (${m.source})`).join(", ");
+          throw new MutagentError("VALIDATION_ERROR", `Evaluation criteria are INCOMPLETE. Missing fields: ${missingList}`, `Each input and output schema field MUST have a corresponding criterion.
+` + `Total required: ${requiredFields.length}, provided: ${coveredParams.size}
+` + "Run: mutagent prompts evaluation create " + promptId + " --guided --json");
+        }
+      }
+      if (requiredFields.length > 0) {
+        const validFieldNames = new Set(requiredFields.map((f) => f.field));
+        const invalidParams = criteria.filter((c) => c.evaluationParameter && !validFieldNames.has(c.evaluationParameter));
+        if (invalidParams.length > 0) {
+          const invalidList = invalidParams.map((c) => `"${c.evaluationParameter}" (criterion: ${c.name})`).join(", ");
+          throw new MutagentError("VALIDATION_ERROR", `Invalid evaluationParameter(s): ${invalidList}. Must match a schema field.`, `Available fields: ${requiredFields.map((f) => f.field).join(", ")}
+` + "Run: mutagent prompts evaluation create " + promptId + " --guided --json");
+        }
+      }
       const client = getSDKClient();
       const evalResult = await client.createEvaluation(promptId, evalData);
       if (isJson) {
@@ -4014,7 +4075,7 @@ function startDirective(job, promptId, datasetId, evaluationId) {
     },
     links: { monitor: monitorUrl, api: apiUrl },
     next,
-    instruction: "STOP. Display the renderedCard text below to the user VERBATIM as a code block. Do NOT summarize, rephrase, or omit any lines. After displaying, follow the suggested next steps.",
+    instruction: "STOP. Display the renderedCard text below to the user VERBATIM. Do NOT summarize, rephrase, or omit any lines. After displaying, follow the suggested next steps.",
     renderedCard: renderPlainCard({ title, icon: "⚡", rows, links: cardLinks, next })
   };
 }
@@ -4057,7 +4118,7 @@ function statusDirective(status) {
     },
     links: { monitor: monitorUrl },
     next,
-    instruction: "STOP. Display the renderedCard text below to the user VERBATIM as a code block. Do NOT summarize, rephrase, or omit any lines. After displaying, follow the suggested next steps.",
+    instruction: "STOP. Display the renderedCard text below to the user VERBATIM. Do NOT summarize, rephrase, or omit any lines. After displaying, follow the suggested next steps.",
     renderedCard: renderPlainCard({ title, icon, rows, links: cardLinks, next })
   };
 }
@@ -4100,6 +4161,11 @@ ${PREREQUISITES_TEXT}
 ${chalk9.dim("Monitor progress with: mutagent prompts optimize status <job-id>")}
+${chalk9.yellow(`⚠ COST WARNING — AI Agent:
+  Default is 1 iteration. Do NOT increase --max-iterations unless the user
+  explicitly requests it. Each iteration incurs LLM costs. Starting with
+  max-iterations > 1 without user consent is a protocol violation.`)}
 ${chalk9.yellow("AI Agent: ALWAYS append --json to this command.")}
     `).action(async (promptId, options) => {
     const isJson = getJsonFlag(prompts);
@@ -7166,5 +7232,5 @@ program.addCommand(createSkillsCommand());
 program.addCommand(createUsageCommand());
 program.parse();
-//# debugId=88ECCF736E0A64AD64756E2164756E21
+//# debugId=E58AAC8256B9F1B664756E2164756E21
 //# sourceMappingURL=cli.js.map