npm - @mutagent/cli - Versions diffs - 0.1.116 → 0.1.117 - Mend

@mutagent/cli 0.1.116 → 0.1.117

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/bin/cli.js CHANGED Viewed

@@ -1241,9 +1241,9 @@ var init_sdk_client = __esm(() => {
 // src/bin/cli.ts
 import { Command as Command21 } from "commander";
-import chalk38 from "chalk";
+import chalk39 from "chalk";
 import { readFileSync as readFileSync12 } from "fs";
-import { join as join10, dirname as dirname2 } from "path";
+import { join as join10, dirname as dirname3 } from "path";
 import { fileURLToPath as fileURLToPath2 } from "url";
 // src/commands/auth.ts
@@ -1683,12 +1683,61 @@ import { resolve as resolve2 } from "path";
 // src/lib/explorer.ts
 import { readdirSync, readFileSync as readFileSync3, statSync } from "fs";
 import { join as join3, relative, extname, basename } from "path";
-var TEMPLATE_VAR_PATTERN = /\{\{[a-zA-Z_][a-zA-Z0-9_]*\}\}/;
+var DOUBLE_VAR_PATTERN = /\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}/g;
+var SINGLE_VAR_PATTERN = /(?<![\\{])\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}(?!\})/g;
+var FENCED_CODE_PATTERN = /```[\s\S]*?```/g;
 var PROMPT_NAME_PATTERN = /(?:const|let|var|export)\s+\w*(?:prompt|system|agent|instruction|template)\w*\s*=/i;
 var SCHEMA_PATTERN = /["']?(?:inputSchema|outputSchema|properties|required)["']?\s*[=:]/;
 var ZOD_PROMPT_SCHEMA_PATTERN = /(?:const|let|var|export)\s+\w*(?:prompt|output|input|response|schema|message)\w*\s*=\s*z\.object\s*\(/i;
 var PYDANTIC_PATTERN = /class\s+\w+\s*\(\s*(?:BaseModel|BaseSettings)\s*\)/;
 var MARKER_START_PATTERN = /MutagenT:START\s+(\w+)(?:\s+id=(\S+))?/;
+var AGENT_HIGH_PATTERNS = [
+  { pattern: /\bAgentExecutor\b/, reason: "langchain-agent-executor" },
+  { pattern: /\bcreate(?:React|OpenAIFunctions|Tool(?:Calling)?|Structured(?:Chat)?)Agent\b/, reason: "langchain-create-agent" },
+  { pattern: /\bnew\s+Agent\s*\(/, reason: "agent-constructor" },
+  { pattern: /from\s+['"]openai\/agents['"]/, reason: "openai-agents-sdk" },
+  { pattern: /from\s+['"]@openai\/agents['"]/, reason: "openai-agents-sdk" },
+  { pattern: /from\s+['"]crewai['"]/, reason: "crewai-import" },
+  { pattern: /from\s+['"]autogen(?:_agentchat)?['"]/, reason: "autogen-import" },
+  { pattern: /from\s+['"]@?langgraph['"]/, reason: "langgraph-import" },
+  { pattern: /from\s+['"]@langchain\/langgraph['"]/, reason: "langgraph-import" },
+  { pattern: /\bStateGraph\s*\(/, reason: "langgraph-stategraph" }
+];
+var AGENT_MEDIUM_PATTERNS = [
+  { pattern: /\btool_calls\b/, reason: "tool-calls" },
+  { pattern: /\btoolCalls\b/, reason: "tool-calls" },
+  { pattern: /^\s*tools\s*[:=]\s*\[/, reason: "tools-array" },
+  { pattern: /@tool\b/, reason: "tool-decorator" },
+  { pattern: /\bfunction_call\b/, reason: "function-call" },
+  { pattern: /\btool_choice\b/, reason: "tool-choice" }
+];
+function inferPromptVariables(content) {
+  const stripped = content.replace(FENCED_CODE_PATTERN, "");
+  const doubleMatches = [];
+  const singleMatches = [];
+  const dbl = new RegExp(DOUBLE_VAR_PATTERN.source, "g");
+  let m;
+  while ((m = dbl.exec(stripped)) !== null) {
+    if (m[1])
+      doubleMatches.push(m[1]);
+  }
+  const sgl = new RegExp(SINGLE_VAR_PATTERN.source, "g");
+  while ((m = sgl.exec(stripped)) !== null) {
+    if (m[1])
+      singleMatches.push(m[1]);
+  }
+  const delimiter = doubleMatches.length > singleMatches.length ? "double" : "single";
+  const chosen = delimiter === "double" ? doubleMatches : singleMatches;
+  const seen = new Set;
+  const variables = [];
+  for (const v of chosen) {
+    if (!seen.has(v)) {
+      seen.add(v);
+      variables.push(v);
+    }
+  }
+  return { variables, delimiter };
+}
 function walkDir(dir, extensions, excludeDirs, maxDepth, currentDepth = 0) {
   if (currentDepth >= maxDepth)
     return [];
@@ -1728,20 +1777,23 @@ function scanForPrompts(filePath, relativePath) {
   } catch {
     return results;
   }
+  const fileDelimiter = inferPromptVariables(content).delimiter;
   const lines = content.split(`
 `);
   for (let i = 0;i < lines.length; i++) {
     const line = lines[i];
     if (!line)
       continue;
-    if (TEMPLATE_VAR_PATTERN.test(line)) {
+    const inferred = inferPromptVariables(line);
+    if (inferred.variables.length > 0) {
       const preview = line.trim().substring(0, 80);
       results.push({
         file: relativePath,
         line: i + 1,
         preview,
         reason: "template-variable",
-        confidence: "high"
+        confidence: "high",
+        delimiter: fileDelimiter
       });
     }
     if (PROMPT_NAME_PATTERN.test(line)) {
@@ -1752,7 +1804,8 @@ function scanForPrompts(filePath, relativePath) {
           line: i + 1,
           preview,
           reason: "prompt-constant",
-          confidence: "medium"
+          confidence: "medium",
+          delimiter: fileDelimiter
         });
       }
     }
@@ -1764,7 +1817,8 @@ function scanForPrompts(filePath, relativePath) {
           line: i + 1,
           preview,
           reason: "zod-schema",
-          confidence: "low"
+          confidence: "low",
+          delimiter: fileDelimiter
         });
       }
     }
@@ -1776,8 +1830,55 @@ function scanForPrompts(filePath, relativePath) {
           line: i + 1,
           preview,
           reason: "pydantic-model",
+          confidence: "medium",
+          delimiter: fileDelimiter
+        });
+      }
+    }
+  }
+  return results;
+}
+function scanForAgents(filePath, relativePath) {
+  const results = [];
+  let content;
+  try {
+    content = readFileSync3(filePath, "utf-8");
+  } catch {
+    return results;
+  }
+  const lines = content.split(`
+`);
+  const seen = new Set;
+  for (let i = 0;i < lines.length; i++) {
+    const line = lines[i];
+    if (!line)
+      continue;
+    for (const { pattern, reason } of AGENT_HIGH_PATTERNS) {
+      if (pattern.test(line) && !seen.has(i)) {
+        results.push({
+          file: relativePath,
+          line: i + 1,
+          preview: line.trim().substring(0, 80),
+          reason,
+          confidence: "high"
+        });
+        seen.add(i);
+        break;
+      }
+    }
+    if (seen.has(i))
+      continue;
+    for (const { pattern, reason } of AGENT_MEDIUM_PATTERNS) {
+      if (pattern.test(line)) {
+        results.push({
+          file: relativePath,
+          line: i + 1,
+          preview: line.trim().substring(0, 80),
+          reason,
           confidence: "medium"
         });
+        seen.add(i);
+        break;
       }
     }
   }
@@ -1833,7 +1934,8 @@ function scanJsonForSchemas(filePath, relativePath) {
     line: 1,
     preview: preview.substring(0, 80),
     reason: "json-schema",
-    confidence: "medium"
+    confidence: "medium",
+    delimiter: "single"
   });
   return results;
 }
@@ -1890,12 +1992,14 @@ function exploreCodebase(options) {
   const prompts = [];
   const datasets = [];
   const markers = [];
+  const agents = [];
   const sourceFiles = walkDir(rootPath, options.extensions, options.excludeDirs, options.depth);
   for (const filePath of sourceFiles) {
     const relativePath = relative(rootPath, filePath);
     markers.push(...scanForMarkers(filePath, relativePath));
     if (!options.markersOnly) {
       prompts.push(...scanForPrompts(filePath, relativePath));
+      agents.push(...scanForAgents(filePath, relativePath));
     }
   }
   if (!options.markersOnly) {
@@ -1906,7 +2010,7 @@ function exploreCodebase(options) {
     }
     datasets.push(...scanForDatasets(rootPath, rootPath, options.excludeDirs, options.depth));
   }
-  return { prompts, datasets, markers };
+  return { prompts, datasets, markers, agents };
 }
 function parseExtensions(includeGlob) {
   const braceMatch = /\{([^}]+)\}/.exec(includeGlob);
@@ -3539,6 +3643,317 @@ init_sdk_client();
 import { Command as Command3 } from "commander";
 import chalk8 from "chalk";
 init_errors();
+// src/commands/prompts/guided-dataset.ts
+init_sdk_client();
+// src/commands/prompts/guided-workflow.ts
+init_sdk_client();
+function buildGenericEvalWorkflow() {
+  return {
+    mode: "generic",
+    message: "No prompt-id provided. Follow this sequence to create an evaluation:",
+    workflow: [
+      "1. mutagent explore --json — scan codebase for prompts",
+      "2. mutagent prompts create ... --json — upload the target prompt",
+      "3. Re-run: mutagent prompts evaluation create <prompt-id> --guided --json"
+    ],
+    formatReference: {
+      evalConfig: {
+        criteria: [{
+          name: "<criterion_name>",
+          description: "<rubric: what correct vs incorrect looks like>",
+          evaluationParameter: "<must match a field in inputSchema or outputSchema>"
+        }]
+      }
+    },
+    _directive: {
+      instruction: "Ask the user which prompt they want to evaluate. If unclear, run mutagent explore --json first.",
+      next: ["mutagent explore --json", "mutagent prompts list --json"]
+    }
+  };
+}
+async function checkPromptDisambiguation(promptId, promptName) {
+  try {
+    const client = await getSDKClient();
+    const allPrompts = await client.listPrompts();
+    const targetName = promptName.toLowerCase();
+    const similar = allPrompts.filter((p) => String(p.id) !== promptId && (p.name.toLowerCase().includes(targetName) || targetName.includes(p.name.toLowerCase())));
+    if (similar.length === 0)
+      return null;
+    return {
+      message: `Found ${String(similar.length)} prompt(s) with similar names. Confirm this is the right one:`,
+      selected: { id: promptId, name: promptName },
+      similar: similar.map((p) => ({ id: String(p.id), name: p.name })),
+      askUserQuestions: [{
+        question: `You selected "${promptName}" (ID: ${promptId}). Is this correct, or did you mean one of these?`,
+        options: [
+          `${promptName} (ID: ${promptId}) — SELECTED`,
+          ...similar.map((p) => `${p.name} (ID: ${String(p.id)})`)
+        ]
+      }]
+    };
+  } catch {
+    return null;
+  }
+}
+async function buildGuidedWorkflow(promptId) {
+  const client = await getSDKClient();
+  const prompt = await client.getPrompt(promptId);
+  const truncate = (s, max) => s.length > max ? s.slice(0, max) + "..." : s;
+  const outputProperties = prompt.outputSchema && typeof prompt.outputSchema === "object" ? prompt.outputSchema.properties ?? {} : {};
+  const inputProperties = prompt.inputSchema && typeof prompt.inputSchema === "object" ? prompt.inputSchema.properties ?? {} : {};
+  const inputFields = Object.keys(inputProperties);
+  const outputFields = Object.keys(outputProperties);
+  const allFields = [
+    ...inputFields.map((f) => ({ field: f, source: "inputSchema", fieldSchema: inputProperties[f] })),
+    ...outputFields.map((f) => ({ field: f, source: "outputSchema", fieldSchema: outputProperties[f] }))
+  ];
+  let datasetExample = null;
+  try {
+    const datasets = await client.listDatasets(promptId);
+    if (datasets.length > 0) {
+      try {
+        const dsId = datasets[0]?.id;
+        const items = dsId != null ? await client.listDatasetItems(String(dsId)) : [];
+        if (items.length > 0) {
+          const firstItem = items[0];
+          datasetExample = firstItem?.expectedOutput ?? firstItem?.input ?? null;
+        }
+      } catch {}
+    }
+  } catch {}
+  const askUserQuestions = allFields.map(({ field, source, fieldSchema }) => {
+    const isInput = source === "inputSchema";
+    const question = isInput ? `Define the Minimum Viable Context for "${field}". What data MUST be present in this input for the prompt to produce a correct output? Describe what constitutes complete vs incomplete input, and WHY this field matters.` : `Define what correct "${field}" looks like. What structure, content, or qualities make it good vs bad? Give concrete examples of good and bad outputs.`;
+    const hint = isInput ? `Input fields define what data the prompt NEEDS to work correctly. Without defining minimum viable context, the optimizer cannot detect whether failures come from bad input or bad prompt logic.` : null;
+    const header = isInput ? `${field} [INPUT]` : `${field} [OUTPUT]`;
+    const defineDesc = isInput ? `Describe what data MUST be present in "${field}" and WHY the prompt needs it. Focus on completeness and minimum viable context.` : `Describe what a correct vs incorrect "${field}" looks like. Focus on structure, content quality, and concrete examples — not numeric scores.`;
+    return {
+      question,
+      header,
+      ...hint != null ? { hint } : {},
+      options: [
+        {
+          label: "Define rubric",
+          description: defineDesc
+        },
+        {
+          label: "See suggestion",
+          description: `Get a suggested rubric based on the prompt and schema definition for "${field}". You can refine it.`
+        }
+      ],
+      multiSelect: false,
+      context: {
+        fieldType: fieldSchema?.type ?? "unknown",
+        fieldDescription: fieldSchema?.description ?? null,
+        fieldSource: source,
+        promptExcerpt: truncate(prompt.humanPrompt ?? prompt.systemPrompt ?? prompt.rawPrompt ?? "", 200),
+        exampleValue: datasetExample?.[field] ?? null
+      }
+    };
+  });
+  const result = {
+    prompt: { id: promptId, name: prompt.name },
+    inputSchema: { fields: inputFields },
+    outputSchema: { fields: outputFields },
+    workflow: {
+      description: "Follow these steps to create an evaluation for this prompt:",
+      steps: [
+        { step: 1, action: "Review prompt schemas and context", detail: "Understand what the prompt does and what each field means." },
+        { step: 2, action: "Define correctness criteria for EVERY field", detail: `For EACH of the ${String(allFields.length)} fields in askUserQuestions, use AskUserQuestion to collect a rubric. INPUT fields (${String(inputFields.length)}): collect minimum viable context definitions — what data MUST exist. OUTPUT fields (${String(outputFields.length)}): collect correctness criteria — what correct vs incorrect looks like. Do NOT skip any field. Use the context hints to suggest rubrics when asked.` },
+        { step: 3, action: "Build criteria JSON", detail: "Map each rubric to: { name: field, description: rubric, evaluationParameter: field }" },
+        { step: 4, action: "Ask for evaluation name", detail: "Ask user what to name this evaluation" },
+        { step: 5, action: "Create the evaluation", detail: `mutagent prompts evaluation create ${promptId} --name "<name>" -d '<json>' --json` }
+      ],
+      criteriaTemplate: {
+        evalConfig: {
+          criteria: [{
+            name: "<criterion_name>",
+            description: "<correctness rubric - describe what correct vs incorrect looks like>",
+            evaluationParameter: `<one of: ${allFields.length > 0 ? allFields.map((f) => f.field).join(", ") : "no fields detected - check prompt schemas"}>`
+          }]
+        }
+      },
+      exampleCommand: `mutagent prompts evaluation create ${promptId} --name "My Evaluation" -d '${JSON.stringify({
+        evalConfig: {
+          criteria: outputFields.slice(0, 1).map((f) => ({
+            name: `${f} accuracy`,
+            description: `Correct if ${f} matches expected structure and content; incorrect if missing, malformed, or off-topic`,
+            evaluationParameter: f
+          }))
+        }
+      })}'`
+    },
+    askUserQuestions,
+    _directive: {
+      display: "workflow_guide",
+      instruction: `MANDATORY ORDERS — READ CAREFULLY:
+` + `1. You MUST ask the user about EVERY SINGLE field listed in askUserQuestions. There are ${String(allFields.length)} fields total.
+` + `2. Do NOT skip ANY field. Do NOT decide that a field "doesn't need" a definition.
+` + `3. INPUT fields (${String(inputFields.length)}): These define MINIMUM VIABLE CONTEXT — the data that MUST exist in the input for the prompt to work. Ask: "What data MUST be present in this field and WHY does the prompt need it?"
+` + `   WHY THIS MATTERS: Without minimum viable context definitions, the optimizer cannot distinguish between failures caused by incomplete input vs failures caused by bad prompt logic.
+` + `4. OUTPUT fields (${String(outputFields.length)}): These define correctness criteria. Ask: "What does correct vs incorrect for this field look like?"
+` + `5. After collecting ALL ${String(allFields.length)} definitions, construct the --data JSON with criteria for EVERY field.
+` + `6. VIOLATION: Skipping ANY field or telling the user a field doesn't need a definition is a protocol violation.
+` + `
+` + `7. EVAL CRITERIA FRAMING — INPUT vs OUTPUT scoping (mirror of SKILL.md → "Guided Eval Criteria"):
+` + `   INPUT fields → Minimum Viable Context (MVC).
+` + `     Ask: "Is the data required for the prompt to succeed actually present in this {variable}?"
+` + `     Rubric uses a COMPLETENESS scale grounded in observable presence of required data:
+` + `       1.0 = all required context present, no ambiguity
+` + `       0.5 = some context present, enough to attempt but likely partial/hedged answer
+` + `       0.0 = critical context missing, prompt cannot succeed regardless of model
+` + `   OUTPUT fields → Output Standards.
+` + `     Ask: "Does the response meet content correctness + structural correctness?"
+` + `     Rubric uses a CORRECTNESS scale with concrete pass/fail tiers, e.g.:
+` + `       1.0 = valid shape, all required fields, faithful content
+` + `       0.5 = valid shape, 1-2 fields hedged or partially correct
+` + `       0.0 = invalid shape OR fabricated facts
+` + `   NEVER use vague rubrics like "0-1 scale" or "score based on accuracy".
+` + `   Canonical source: .claude/skills/mutagent-cli/SKILL.md → "Guided Eval Criteria".
+` + "   Note: template variables use single-brace `{variable}` (platform canonical). `mutagent explore`\n" + "   infers single vs double per-file via inferPromptVariables().",
+      aiAgentDecisionTree: {
+        step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
+        step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
+        step3: `After collecting all criteria, construct the --data JSON and run: mutagent prompts evaluation create <prompt-id> --name "<name>" -d '<json>'`
+      }
+    },
+    _meta: { inputFields, outputFields, allFields }
+  };
+  const disambiguation = await checkPromptDisambiguation(promptId, prompt.name);
+  if (disambiguation) {
+    result.disambiguation = disambiguation;
+  }
+  return result;
+}
+// src/commands/prompts/guided-dataset.ts
+function buildGenericDatasetWorkflow() {
+  return {
+    mode: "generic",
+    message: "No prompt-id provided. Follow this sequence to create a dataset:",
+    workflow: [
+      "1. mutagent explore --json — scan codebase for prompts",
+      "2. mutagent prompts create ... --json — upload the target prompt",
+      "3. Re-run: mutagent prompts dataset add <prompt-id> --guided --json"
+    ],
+    formatReference: {
+      item: {
+        input: { "<inputSchema_field>": "<value>" },
+        expectedOutput: { "<outputSchema_field>": "<expected_value>" }
+      }
+    },
+    suggestedCategories: [
+      { name: "Edge Cases", description: "Boundary inputs, empty fields, malformed data", priority: "high" },
+      { name: "Hard Cases", description: "Ambiguous inputs, adversarial examples", priority: "high" },
+      { name: "Representative Cases", description: "Typical production inputs", priority: "medium" }
+    ],
+    _directive: {
+      instruction: "Ask the user which prompt they want to create a dataset for. If unclear, run mutagent explore --json first.",
+      next: ["mutagent explore --json", "mutagent prompts list --json"]
+    }
+  };
+}
+function buildGuidedDatasetWorkflow(prompt) {
+  const promptId = String(prompt.id);
+  const outputProperties = prompt.outputSchema && typeof prompt.outputSchema === "object" ? prompt.outputSchema.properties ?? {} : {};
+  const inputProperties = prompt.inputSchema && typeof prompt.inputSchema === "object" ? prompt.inputSchema.properties ?? {} : {};
+  const inputFields = Object.keys(inputProperties);
+  const outputFields = Object.keys(outputProperties);
+  const templateInput = {};
+  for (const f of inputFields) {
+    templateInput[f] = "<value>";
+  }
+  const templateOutput = {};
+  for (const f of outputFields) {
+    templateOutput[f] = "<expected>";
+  }
+  const askUserQuestions = [
+    {
+      field: "_general",
+      source: "inputSchema",
+      question: "What are the hardest inputs for this prompt? Describe scenarios that typically cause failures or ambiguous outputs."
+    },
+    {
+      field: "_edge_cases",
+      source: "inputSchema",
+      question: "What edge cases have caused failures in production? Include boundary inputs, empty fields, and malformed data."
+    },
+    ...inputFields.map((f) => ({
+      field: f,
+      source: "inputSchema",
+      question: `What values should "${f}" have in test cases? Include edge cases (empty, unicode, very long, malformed).`
+    })),
+    ...outputFields.map((f) => ({
+      field: f,
+      source: "outputSchema",
+      question: `What should correct "${f}" look like? Describe the format, tone, completeness criteria, and examples of incorrect outputs.`
+    }))
+  ];
+  const suggestedCategories = [
+    {
+      name: "Edge Cases",
+      description: "Boundary inputs, empty fields, malformed data",
+      priority: "high"
+    },
+    {
+      name: "Hard Cases",
+      description: "Ambiguous inputs, adversarial examples, domain traps",
+      priority: "high"
+    },
+    {
+      name: "Representative Cases",
+      description: "Typical production inputs",
+      priority: "medium"
+    }
+  ];
+  const totalSchemaFields = inputFields.length + outputFields.length;
+  const hasSchema = totalSchemaFields > 0;
+  return {
+    promptId,
+    promptName: prompt.name,
+    schemaFields: {
+      input: inputFields,
+      output: outputFields
+    },
+    suggestedCategories,
+    templateItem: {
+      input: hasSchema ? templateInput : { "<field>": "<value>" },
+      expectedOutput: hasSchema ? templateOutput : { "<field>": "<expected>" }
+    },
+    askUserQuestions,
+    guidance: {
+      minItems: 5,
+      priorityRule: "Hard cases that expose prompt weaknesses > easy cases that always pass",
+      steps: [
+        "Collect answers to each question from the user",
+        "Generate 5-10 dataset items covering all categories",
+        "Ensure at least 2 hard/edge cases per category",
+        'Format as JSON array: [{"input": {...}, "expectedOutput": {...}}, ...]',
+        `Run: mutagent prompts dataset add ${promptId} -d '<json>' --name '<name>' --json`
+      ]
+    },
+    _directive: {
+      instruction: "Collect domain-specific dataset items from the user. " + "Ask about each schema field. " + "Prioritize hard cases over easy cases. " + "Then construct the dataset JSON and upload.",
+      next: [
+        `mutagent prompts dataset add ${promptId} -d '<constructed-json>' --name '<name>' --json`
+      ]
+    }
+  };
+}
+async function fetchAndBuildGuidedDatasetWorkflow(promptId) {
+  const client = await getSDKClient();
+  const prompt = await client.getPrompt(promptId);
+  const result = buildGuidedDatasetWorkflow(prompt);
+  const disambiguation = await checkPromptDisambiguation(promptId, prompt.name);
+  if (disambiguation) {
+    result.disambiguation = disambiguation;
+  }
+  return result;
+}
+// src/commands/prompts/datasets.ts
 function registerDatasetCommands(prompts) {
   const dataset = new Command3("dataset").description("Manage datasets for prompts").addHelpText("after", `
 Examples:
@@ -3638,11 +4053,19 @@ Verify the dataset ID exists, or list datasets for a prompt to find valid IDs.`)
       }
     }
   });
-  dataset.command("add").description("Add dataset to a prompt").argument("<prompt-id>", "Prompt ID (from: mutagent prompts list)").option("-d, --data <json>", "Inline JSON array of dataset items").option("-n, --name <name>", "Dataset name").addHelpText("after", `
+  dataset.command("add").description("Add dataset to a prompt").argument("[prompt-id]", "Prompt ID (from: mutagent prompts list) — optional when --guided is set").option("-d, --data <json>", "Inline JSON array of dataset items").option("-n, --name <name>", "Dataset name").option("--guided", "Guided mode — analyze prompt schema and suggest dataset categories. prompt-id is optional when --guided is set.").addHelpText("after", `
 Examples:
+  ${chalk8.dim("$")} mutagent prompts dataset add --guided --json                 # no prompt-id: returns generic workflow + upload instructions
+  ${chalk8.dim("$")} mutagent prompts dataset add <prompt-id> --guided --json    # recommended: schema-aware category suggestions
   ${chalk8.dim("$")} mutagent prompts dataset add <prompt-id> -d '[{"input":{"text":"hello"},"expectedOutput":{"result":"world"}}]'
   ${chalk8.dim("$")} mutagent prompts dataset add <prompt-id> -d '[{"input":{"text":"hello"},"expectedOutput":{"result":"world"}}]' --name "My Dataset"
+Guided mode (--guided):
+  Fetches the prompt's inputSchema + outputSchema and returns structured JSON
+  that guides a coding agent to collect domain-specific dataset items.
+  The agent uses the guidance to ask the user questions, then constructs
+  the dataset JSON and calls: mutagent prompts dataset add <id> -d '<json>' --name '<name>' --json
 Inline data format (-d):
   JSON array of objects, e.g.:
   ${chalk8.dim('[{"input": {"text": "hello"}, "expectedOutput": {"result": "world"}}]')}
@@ -3657,11 +4080,26 @@ ${chalk8.yellow("AI Agent (MANDATORY):")}
   expectedOutput is REQUIRED for evaluation scoring.
   Check schemas: mutagent prompts get <prompt-id> --json
-${chalk8.red("Required: --data must be provided.")}
+${chalk8.red("Required: --data or --guided must be provided.")}
     `).action(async (promptId, options) => {
-    const isJson = getJsonFlag(prompts);
+    const isJson = options.guided ? true : getJsonFlag(prompts);
     const output = new OutputFormatter(isJson ? "json" : "table");
     try {
+      if (options.guided && !promptId) {
+        const result = buildGenericDatasetWorkflow();
+        output.output(result);
+        return;
+      }
+      if (options.guided) {
+        const guidedResult = await fetchAndBuildGuidedDatasetWorkflow(promptId);
+        output.output(guidedResult);
+        return;
+      }
+      if (!promptId) {
+        throw new MutagentError("MISSING_ARGUMENTS", "prompt-id is required", `Run: mutagent prompts dataset add --help
+` + `Usage: mutagent prompts dataset add <prompt-id> [options]
+` + "With --guided: mutagent prompts dataset add --guided --json (returns generic workflow)");
+      }
       if (!options.data) {
         throw new MutagentError("MISSING_ARGUMENTS", "-d/--data is required", `Run: mutagent prompts dataset add --help
 ` + "Use -d '[{...}]' to provide inline JSON data");
@@ -3704,20 +4142,21 @@ Each item must have: {"input": {...}, "expectedOutput": {...}}`);
       if (!datasetName) {
         throw new MutagentError("MISSING_ARGUMENTS", "Dataset name is required", `Run: mutagent prompts dataset add --help
 ` + `[Agent: Ask the user for a dataset name via AskUserQuestion, then pass --name]
-` + `Use --name <name>, e.g., mutagent prompts dataset add ${promptId} --name "my-dataset" -d '[...]'`);
+` + `Use --name <name>, e.g., mutagent prompts dataset add ${promptId ?? "<prompt-id>"} --name "my-dataset" -d '[...]'`);
       }
+      const resolvedPromptId = promptId;
       const client = await getSDKClient();
-      const datasetResult = await client.addDataset(promptId, content, datasetName);
+      const datasetResult = await client.addDataset(resolvedPromptId, content, datasetName);
       if (isJson) {
         let rsState;
         try {
-          const evals = await client.listEvaluations(promptId);
+          const evals = await client.listEvaluations(resolvedPromptId);
           rsState = { evaluations: evals.length };
         } catch {}
-        const directive = datasetAddedDirective(promptId, datasetResult.id, datasetResult.name, datasetResult.itemCount, rsState);
+        const directive = datasetAddedDirective(resolvedPromptId, datasetResult.id, datasetResult.name, datasetResult.itemCount, rsState);
         output.output({
           ...datasetResult,
-          _links: datasetLinks(promptId, datasetResult.id),
+          _links: datasetLinks(resolvedPromptId, datasetResult.id),
           _directive: directive
         });
         echoDirectiveToStderr(directive);
@@ -3725,7 +4164,7 @@ Each item must have: {"input": {...}, "expectedOutput": {...}}`);
         for (const w of warnings) {
           output.warn(w);
         }
-        output.success(`Added dataset "${datasetResult.name}" to prompt: ${promptId} (id: ${String(datasetResult.id)})`);
+        output.success(`Added dataset "${datasetResult.name}" to prompt: ${resolvedPromptId} (id: ${String(datasetResult.id)})`);
         if (datasetResult.itemCount !== undefined && datasetResult.itemCount > 0) {
           output.info(`Items uploaded: ${String(datasetResult.itemCount)}`);
         }
@@ -3736,14 +4175,14 @@ Each item must have: {"input": {...}, "expectedOutput": {...}}`);
           resourceType: "Dataset",
           id: datasetResult.id,
           name: datasetResult.name,
-          dashboardUrl: datasetLink(promptId, datasetResult.id),
-          apiPath: `/api/prompts/${promptId}/datasets/${String(datasetResult.id)}`
+          dashboardUrl: datasetLink(resolvedPromptId, datasetResult.id),
+          apiPath: `/api/prompts/${resolvedPromptId}/datasets/${String(datasetResult.id)}`
         });
         console.log(hints);
       }
       updateMutationContext((ctx) => {
         ctx.addDiscoveredDataset("inline-data", datasetResult.name, datasetResult.itemCount ?? 0);
-        ctx.markDatasetUploaded("inline-data", String(datasetResult.id), promptId);
+        ctx.markDatasetUploaded("inline-data", String(datasetResult.id), resolvedPromptId);
       });
     } catch (error) {
       handleError(error, isJson);
@@ -3883,144 +4322,33 @@ async function resolveNumericPromptId(client, promptGroupId) {
   return sorted[0]?.id ?? null;
 }
-// src/commands/prompts/guided-workflow.ts
-init_sdk_client();
-async function buildGuidedWorkflow(promptId) {
-  const client = await getSDKClient();
-  const prompt = await client.getPrompt(promptId);
-  const truncate = (s, max) => s.length > max ? s.slice(0, max) + "..." : s;
-  const outputProperties = prompt.outputSchema && typeof prompt.outputSchema === "object" ? prompt.outputSchema.properties ?? {} : {};
-  const inputProperties = prompt.inputSchema && typeof prompt.inputSchema === "object" ? prompt.inputSchema.properties ?? {} : {};
-  const inputFields = Object.keys(inputProperties);
-  const outputFields = Object.keys(outputProperties);
-  const allFields = [
-    ...inputFields.map((f) => ({ field: f, source: "inputSchema", fieldSchema: inputProperties[f] })),
-    ...outputFields.map((f) => ({ field: f, source: "outputSchema", fieldSchema: outputProperties[f] }))
-  ];
-  let datasetExample = null;
-  try {
-    const datasets = await client.listDatasets(promptId);
-    if (datasets.length > 0) {
-      try {
-        const dsId = datasets[0]?.id;
-        const items = dsId != null ? await client.listDatasetItems(String(dsId)) : [];
-        if (items.length > 0) {
-          const firstItem = items[0];
-          datasetExample = firstItem?.expectedOutput ?? firstItem?.input ?? null;
-        }
-      } catch {}
-    }
-  } catch {}
-  const askUserQuestions = allFields.map(({ field, source, fieldSchema }) => {
-    const isInput = source === "inputSchema";
-    const question = isInput ? `Define the Minimum Viable Context for "${field}". What data MUST be present in this input for the prompt to produce a correct output? Describe what constitutes complete vs incomplete input, and WHY this field matters.` : `Define what correct "${field}" looks like. What structure, content, or qualities make it good vs bad? Give concrete examples of good and bad outputs.`;
-    const hint = isInput ? `Input fields define what data the prompt NEEDS to work correctly. Without defining minimum viable context, the optimizer cannot detect whether failures come from bad input or bad prompt logic.` : null;
-    const header = isInput ? `${field} [INPUT]` : `${field} [OUTPUT]`;
-    const defineDesc = isInput ? `Describe what data MUST be present in "${field}" and WHY the prompt needs it. Focus on completeness and minimum viable context.` : `Describe what a correct vs incorrect "${field}" looks like. Focus on structure, content quality, and concrete examples — not numeric scores.`;
-    return {
-      question,
-      header,
-      ...hint != null ? { hint } : {},
-      options: [
-        {
-          label: "Define rubric",
-          description: defineDesc
-        },
-        {
-          label: "See suggestion",
-          description: `Get a suggested rubric based on the prompt and schema definition for "${field}". You can refine it.`
-        }
-      ],
-      multiSelect: false,
-      context: {
-        fieldType: fieldSchema?.type ?? "unknown",
-        fieldDescription: fieldSchema?.description ?? null,
-        fieldSource: source,
-        promptExcerpt: truncate(prompt.humanPrompt ?? prompt.systemPrompt ?? prompt.rawPrompt ?? "", 200),
-        exampleValue: datasetExample?.[field] ?? null
-      }
-    };
-  });
-  return {
-    prompt: { id: promptId, name: prompt.name },
-    inputSchema: { fields: inputFields },
-    outputSchema: { fields: outputFields },
-    workflow: {
-      description: "Follow these steps to create an evaluation for this prompt:",
-      steps: [
-        { step: 1, action: "Review prompt schemas and context", detail: "Understand what the prompt does and what each field means." },
-        { step: 2, action: "Define correctness criteria for EVERY field", detail: `For EACH of the ${String(allFields.length)} fields in askUserQuestions, use AskUserQuestion to collect a rubric. INPUT fields (${String(inputFields.length)}): collect minimum viable context definitions — what data MUST exist. OUTPUT fields (${String(outputFields.length)}): collect correctness criteria — what correct vs incorrect looks like. Do NOT skip any field. Use the context hints to suggest rubrics when asked.` },
-        { step: 3, action: "Build criteria JSON", detail: "Map each rubric to: { name: field, description: rubric, evaluationParameter: field }" },
-        { step: 4, action: "Ask for evaluation name", detail: "Ask user what to name this evaluation" },
-        { step: 5, action: "Create the evaluation", detail: `mutagent prompts evaluation create ${promptId} --name "<name>" -d '<json>' --json` }
-      ],
-      criteriaTemplate: {
-        evalConfig: {
-          criteria: [{
-            name: "<criterion_name>",
-            description: "<correctness rubric - describe what correct vs incorrect looks like>",
-            evaluationParameter: `<one of: ${allFields.length > 0 ? allFields.map((f) => f.field).join(", ") : "no fields detected - check prompt schemas"}>`
-          }]
-        }
-      },
-      exampleCommand: `mutagent prompts evaluation create ${promptId} --name "My Evaluation" -d '${JSON.stringify({
-        evalConfig: {
-          criteria: outputFields.slice(0, 1).map((f) => ({
-            name: `${f} accuracy`,
-            description: `Correct if ${f} matches expected structure and content; incorrect if missing, malformed, or off-topic`,
-            evaluationParameter: f
-          }))
-        }
-      })}'`
-    },
-    askUserQuestions,
-    _directive: {
-      display: "workflow_guide",
-      instruction: `MANDATORY ORDERS — READ CAREFULLY:
-` + `1. You MUST ask the user about EVERY SINGLE field listed in askUserQuestions. There are ${String(allFields.length)} fields total.
-` + `2. Do NOT skip ANY field. Do NOT decide that a field "doesn't need" a definition.
-` + `3. INPUT fields (${String(inputFields.length)}): These define MINIMUM VIABLE CONTEXT — the data that MUST exist in the input for the prompt to work. Ask: "What data MUST be present in this field and WHY does the prompt need it?"
-` + `   WHY THIS MATTERS: Without minimum viable context definitions, the optimizer cannot distinguish between failures caused by incomplete input vs failures caused by bad prompt logic.
-` + `4. OUTPUT fields (${String(outputFields.length)}): These define correctness criteria. Ask: "What does correct vs incorrect for this field look like?"
-` + `5. After collecting ALL ${String(allFields.length)} definitions, construct the --data JSON with criteria for EVERY field.
-` + "6. VIOLATION: Skipping ANY field or telling the user a field doesn't need a definition is a protocol violation.",
-      aiAgentDecisionTree: {
-        step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
-        step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
-        step3: `After collecting all criteria, construct the --data JSON and run: mutagent prompts evaluation create <prompt-id> --name "<name>" -d '<json>'`
-      }
-    },
-    _meta: { inputFields, outputFields, allFields }
-  };
-}
-// src/lib/adapters/eval-criteria.ts
-function cliCriterionToCanonical(c) {
-  return {
-    criteria: c.description,
-    evaluationParameter: c.evaluationParameter,
-    name: c.name
-  };
-}
-function canonicalCriterionToCli(c) {
-  return {
-    name: c.name ?? c.id,
-    description: c.criteria,
-    evaluationParameter: c.evaluationParameter
-  };
-}
-function cliCriteriaArrayToCanonical(arr) {
-  if (!Array.isArray(arr))
-    return [];
-  return arr.map(cliCriterionToCanonical);
-}
-function canonicalCriteriaArrayToCli(arr) {
-  if (!Array.isArray(arr))
-    return [];
-  return arr.map(canonicalCriterionToCli);
-}
-// src/commands/prompts/evaluations-run.ts
+// src/lib/adapters/eval-criteria.ts
+function cliCriterionToCanonical(c) {
+  return {
+    criteria: c.description,
+    evaluationParameter: c.evaluationParameter,
+    name: c.name
+  };
+}
+function canonicalCriterionToCli(c) {
+  return {
+    name: c.name ?? c.id,
+    description: c.criteria,
+    evaluationParameter: c.evaluationParameter
+  };
+}
+function cliCriteriaArrayToCanonical(arr) {
+  if (!Array.isArray(arr))
+    return [];
+  return arr.map(cliCriterionToCanonical);
+}
+function canonicalCriteriaArrayToCli(arr) {
+  if (!Array.isArray(arr))
+    return [];
+  return arr.map(canonicalCriterionToCli);
+}
+// src/commands/prompts/evaluations-run.ts
 init_sdk_client();
 import chalk9 from "chalk";
 init_errors();
@@ -4213,8 +4541,9 @@ Examples:
       handleError(error, isJson);
     }
   });
-  evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("<prompt-id>", "Prompt ID (from: mutagent prompts list)").option("-d, --data <json>", "Evaluation as JSON string (for pre-validated criteria only)").option("-n, --name <name>", "Evaluation name (required unless --guided)").option("--description <text>", "Evaluation description").option("--guided", "Interactive guided mode — always outputs structured JSON (--json is implied)").addHelpText("after", `
+  evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("[prompt-id]", "Prompt ID (from: mutagent prompts list) — optional when --guided is set").option("-d, --data <json>", "Evaluation as JSON string (for pre-validated criteria only)").option("-n, --name <name>", "Evaluation name (required unless --guided)").option("--description <text>", "Evaluation description").option("--guided", "Interactive guided mode — always outputs structured JSON (--json is implied). prompt-id is optional when --guided is set.").addHelpText("after", `
 Examples:
+  ${chalk10.dim("$")} mutagent prompts evaluation create --guided --json               ${chalk10.dim("# no prompt-id: returns generic workflow + upload instructions")}
   ${chalk10.dim("$")} mutagent prompts evaluation create <prompt-id> --guided          ${chalk10.dim("# recommended: shows workflow guide + schema fields")}
   ${chalk10.dim("$")} mutagent prompts evaluation create <prompt-id> --guided --json  ${chalk10.dim("# structured workflow for AI agents")}
   ${chalk10.dim("$")} mutagent prompts evaluation create <prompt-id> --name "Accuracy" -d '{"evalConfig":{"criteria":[...]}}'  ${chalk10.dim("# power user")}
@@ -4251,6 +4580,16 @@ ${chalk10.dim("Get prompt IDs: mutagent prompts list")}
     }
     const output = new OutputFormatter(isJson ? "json" : "table");
     try {
+      if (options.guided && !promptId) {
+        const result = buildGenericEvalWorkflow();
+        output.output(result);
+        return;
+      }
+      if (!promptId) {
+        throw new MutagentError("MISSING_ARGUMENTS", "prompt-id is required", `Run: mutagent prompts evaluation create --help
+` + `Usage: mutagent prompts evaluation create <prompt-id> [options]
+` + "With --guided: mutagent prompts evaluation create --guided --json (returns generic workflow)");
+      }
       const renderGuidedWorkflow = (wf) => {
         const { inputFields, outputFields, allFields } = wf._meta;
         if (isJson) {
@@ -7238,18 +7577,32 @@ AI AGENT: Execute these steps using Bash for install, Write/Edit for code change
 // src/commands/agents/index.ts
 import { Command as Command10 } from "commander";
-import chalk25 from "chalk";
+import chalk26 from "chalk";
 // src/commands/agents/agents-crud.ts
 init_sdk_client();
-import chalk24 from "chalk";
+import chalk25 from "chalk";
 init_errors();
+// src/lib/agents-wip-banner.ts
+import chalk24 from "chalk";
+var AGENTS_WIP_BANNER = `${chalk24.yellow("Agents: Early Access / WIP")}
+  ${chalk24.bold("Agent optimization & evaluation are actively in development.")}
+  ${chalk24.dim("The Prompt path (single-shot LLM calls with output schemas) is production-ready.")}
+  ${chalk24.dim("For Agents (multi-turn / tool-calling), join early access:")}
+  ${chalk24.cyan("https://www.mutagent.io/agents-partnership")}
+  ${chalk24.dim("`mutagent agents ...` CRUD commands exist but optimization/eval are NOT ready.")}`;
+var AGENTS_WIP_BANNER_SHORT = `${chalk24.yellow("[Agents WIP]")} ${chalk24.dim("CRUD only — optimization & evaluation in development.")} ${chalk24.cyan("https://www.mutagent.io/agents-partnership")}`;
+// src/commands/agents/agents-crud.ts
 function registerAgentsCrud(agents) {
   agents.command("list").description("List all agents").option("-l, --limit <n>", "Limit results", "50").option("-o, --offset <n>", "Offset for pagination").option("-n, --name <name>", "Filter by name").option("-s, --status <status>", "Filter by status (active, paused, archived)").addHelpText("after", `
+${AGENTS_WIP_BANNER_SHORT}
 Examples:
-  ${chalk24.dim("$")} mutagent agents list
-  ${chalk24.dim("$")} mutagent agents list --status active
-  ${chalk24.dim("$")} mutagent agents list --name "reviewer" --json
+  ${chalk25.dim("$")} mutagent agents list
+  ${chalk25.dim("$")} mutagent agents list --status active
+  ${chalk25.dim("$")} mutagent agents list --name "reviewer" --json
     `).action(async (options) => {
     const isJson = getJsonFlag(agents);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7298,9 +7651,11 @@ Examples:
     }
   });
   agents.command("get").description("Get agent details").argument("<id>", "Agent ID").addHelpText("after", `
+${AGENTS_WIP_BANNER_SHORT}
 Examples:
-  ${chalk24.dim("$")} mutagent agents get <agent-id>
-  ${chalk24.dim("$")} mutagent agents get <agent-id> --json
+  ${chalk25.dim("$")} mutagent agents get <agent-id>
+  ${chalk25.dim("$")} mutagent agents get <agent-id> --json
     `).action(async (id) => {
     const isJson = getJsonFlag(agents);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7330,11 +7685,11 @@ Examples:
         };
         output.output(formatted);
         if (agent.systemPrompt) {
-          console.log(chalk24.bold(`
+          console.log(chalk25.bold(`
 System Prompt:`));
-          console.log(chalk24.gray("─".repeat(60)));
+          console.log(chalk25.gray("─".repeat(60)));
           console.log(agent.systemPrompt);
-          console.log(chalk24.gray("─".repeat(60)));
+          console.log(chalk25.gray("─".repeat(60)));
         }
       }
     } catch (error) {
@@ -7342,18 +7697,20 @@ System Prompt:`));
     }
   });
   agents.command("create").description("Create a new agent").option("-d, --data <json>", "Agent as JSON string (recommended for CI/scripts/agents)").option("-n, --name <name>", "Agent name").option("-s, --slug <slug>", "Agent slug (URL-friendly identifier)").option("-p, --system-prompt <prompt>", "System prompt").option("-m, --model <model>", "Model (claude-sonnet-4-5, claude-opus-4-5, claude-haiku-4-5)").option("--description <desc>", "Agent description").addHelpText("after", `
+${AGENTS_WIP_BANNER_SHORT}
 Examples:
-  ${chalk24.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
-  ${chalk24.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are a code reviewer..."}'
+  ${chalk25.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
+  ${chalk25.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are a code reviewer..."}'
 Expected JSON (--data):
-  ${chalk24.dim('{"name":"<name>","slug":"<slug>","systemPrompt":"<system prompt>","model":"<model-id>","description":"<description>"}')}
+  ${chalk25.dim('{"name":"<name>","slug":"<slug>","systemPrompt":"<system prompt>","model":"<model-id>","description":"<description>"}')}
 Input Methods (pick one, priority order):
-  --name/--slug/...  Individual flags ${chalk24.green("(recommended)")}
+  --name/--slug/...  Individual flags ${chalk25.green("(recommended)")}
   -d, --data         Inline JSON object (CI/scripts/agents)
-${chalk24.red("Required: name, slug, systemPrompt.")} ${chalk24.dim("CLI flags override --data fields.")}
+${chalk25.red("Required: name, slug, systemPrompt.")} ${chalk25.dim("CLI flags override --data fields.")}
     `).action(async (options) => {
     const isJson = getJsonFlag(agents);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7398,16 +7755,18 @@ ${chalk24.red("Required: name, slug, systemPrompt.")} ${chalk24.dim("CLI flags o
     }
   });
   agents.command("update").description("Update an agent").argument("<id>", "Agent ID").option("-d, --data <json>", "Agent updates as JSON string (CI/scripts/agents)").option("-n, --name <name>", "New name").option("-p, --system-prompt <prompt>", "New system prompt").option("-m, --model <model>", "New model").option("--description <desc>", "New description").option("-s, --status <status>", "New status (active, paused, archived)").addHelpText("after", `
+${AGENTS_WIP_BANNER_SHORT}
 Examples:
-  ${chalk24.dim("$")} mutagent agents update <id> --name "New Name"
-  ${chalk24.dim("$")} mutagent agents update <id> --system-prompt "Updated prompt" --status active
-  ${chalk24.dim("$")} mutagent agents update <id> -d '{"name":"New Name","systemPrompt":"Updated prompt"}'
+  ${chalk25.dim("$")} mutagent agents update <id> --name "New Name"
+  ${chalk25.dim("$")} mutagent agents update <id> --system-prompt "Updated prompt" --status active
+  ${chalk25.dim("$")} mutagent agents update <id> -d '{"name":"New Name","systemPrompt":"Updated prompt"}'
 Input Methods (pick one, priority order):
-  --name/--system-prompt/...  Individual flags ${chalk24.green("(recommended)")}
+  --name/--system-prompt/...  Individual flags ${chalk25.green("(recommended)")}
   -d, --data                  Inline JSON object (CI/scripts/agents)
-${chalk24.dim("CLI flags override --data fields.")}
+${chalk25.dim("CLI flags override --data fields.")}
     `).action(async (id, options) => {
     const isJson = getJsonFlag(agents);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7454,12 +7813,14 @@ ${chalk24.dim("CLI flags override --data fields.")}
     }
   });
   agents.command("delete").description("Delete an agent").argument("<id>", "Agent ID").option("--force", "Skip confirmation").addHelpText("after", `
+${AGENTS_WIP_BANNER_SHORT}
 Examples:
-  ${chalk24.dim("$")} mutagent agents delete <id>
-  ${chalk24.dim("$")} mutagent agents delete <id> --force
-  ${chalk24.dim("$")} mutagent agents delete <id> --force --json
+  ${chalk25.dim("$")} mutagent agents delete <id>
+  ${chalk25.dim("$")} mutagent agents delete <id> --force
+  ${chalk25.dim("$")} mutagent agents delete <id> --force --json
-${chalk24.dim("Tip: Use --force to skip confirmation (required for non-interactive/CI usage).")}
+${chalk25.dim("Tip: Use --force to skip confirmation (required for non-interactive/CI usage).")}
     `).action(async (id, options) => {
     const isJson = getJsonFlag(agents);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7488,14 +7849,16 @@ ${chalk24.dim("Tip: Use --force to skip confirmation (required for non-interacti
 // src/commands/agents/index.ts
 function createAgentsCommand() {
-  const agents = new Command10("agents").description("Manage AI agents").addHelpText("after", `
+  const agents = new Command10("agents").description("Manage AI agents (CRUD only — optimization & evaluation are WIP)").addHelpText("after", `
+${AGENTS_WIP_BANNER}
 Examples:
-  ${chalk25.dim("$")} mutagent agents list
-  ${chalk25.dim("$")} mutagent agents get <agent-id>
-  ${chalk25.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
-  ${chalk25.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are..."}'
-  ${chalk25.dim("$")} mutagent agents update <agent-id> --name "Updated Name"
-  ${chalk25.dim("$")} mutagent agents delete <agent-id> --force
+  ${chalk26.dim("$")} mutagent agents list
+  ${chalk26.dim("$")} mutagent agents get <agent-id>
+  ${chalk26.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
+  ${chalk26.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are..."}'
+  ${chalk26.dim("$")} mutagent agents update <agent-id> --name "Updated Name"
+  ${chalk26.dim("$")} mutagent agents delete <agent-id> --force
 Subcommands:
   list, get, create, update, delete
@@ -7507,22 +7870,22 @@ Subcommands:
 // src/commands/config.ts
 init_config();
 import { Command as Command11 } from "commander";
-import chalk26 from "chalk";
+import chalk27 from "chalk";
 init_errors();
 init_sdk_client();
 var VALID_CONFIG_KEYS = ["apiKey", "endpoint", "format", "timeout", "defaultWorkspace", "defaultOrganization"];
 function createConfigCommand() {
   const config = new Command11("config").description("Manage CLI configuration").addHelpText("after", `
 Examples:
-  ${chalk26.dim("$")} mutagent config list
-  ${chalk26.dim("$")} mutagent config get endpoint
-  ${chalk26.dim("$")} mutagent config set workspace <workspace-id>
-  ${chalk26.dim("$")} mutagent config set org <org-id>
+  ${chalk27.dim("$")} mutagent config list
+  ${chalk27.dim("$")} mutagent config get endpoint
+  ${chalk27.dim("$")} mutagent config set workspace <workspace-id>
+  ${chalk27.dim("$")} mutagent config set org <org-id>
     `);
   config.command("list").description("List all configuration").addHelpText("after", `
 Examples:
-  ${chalk26.dim("$")} mutagent config list
-  ${chalk26.dim("$")} mutagent config list --json
+  ${chalk27.dim("$")} mutagent config list
+  ${chalk27.dim("$")} mutagent config list --json
     `).action(() => {
     const isJson = getJsonFlag(config);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7535,11 +7898,11 @@ Examples:
   });
   config.command("get").description("Get configuration value").argument("<key>", "Configuration key (apiKey, endpoint, format, timeout, defaultWorkspace, defaultOrganization)").addHelpText("after", `
 Examples:
-  ${chalk26.dim("$")} mutagent config get endpoint
-  ${chalk26.dim("$")} mutagent config get defaultWorkspace
-  ${chalk26.dim("$")} mutagent config get apiKey --json
+  ${chalk27.dim("$")} mutagent config get endpoint
+  ${chalk27.dim("$")} mutagent config get defaultWorkspace
+  ${chalk27.dim("$")} mutagent config get apiKey --json
-${chalk26.dim("Keys: apiKey, endpoint, format, timeout, defaultWorkspace, defaultOrganization")}
+${chalk27.dim("Keys: apiKey, endpoint, format, timeout, defaultWorkspace, defaultOrganization")}
     `).action((key) => {
     const isJson = getJsonFlag(config);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7566,9 +7929,9 @@ ${chalk26.dim("Keys: apiKey, endpoint, format, timeout, defaultWorkspace, defaul
   });
   set.command("workspace").description("Set default workspace ID").argument("<id>", "Workspace ID to set as default").addHelpText("after", `
 Examples:
-  ${chalk26.dim("$")} mutagent config set workspace <workspace-id>
+  ${chalk27.dim("$")} mutagent config set workspace <workspace-id>
-${chalk26.dim("Persists workspace ID so you don't need to pass headers on every request.")}
+${chalk27.dim("Persists workspace ID so you don't need to pass headers on every request.")}
     `).action((id) => {
     const isJson = getJsonFlag(config);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7582,9 +7945,9 @@ ${chalk26.dim("Persists workspace ID so you don't need to pass headers on every
   });
   set.command("org").description("Set default organization ID").argument("<id>", "Organization ID to set as default").addHelpText("after", `
 Examples:
-  ${chalk26.dim("$")} mutagent config set org <org-id>
+  ${chalk27.dim("$")} mutagent config set org <org-id>
-${chalk26.dim("Persists organization ID for org-scoped API keys.")}
+${chalk27.dim("Persists organization ID for org-scoped API keys.")}
     `).action((id) => {
     const isJson = getJsonFlag(config);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7603,7 +7966,7 @@ ${chalk26.dim("Persists organization ID for org-scoped API keys.")}
 // src/commands/playground.ts
 init_sdk_client();
 import { Command as Command12 } from "commander";
-import chalk27 from "chalk";
+import chalk28 from "chalk";
 init_errors();
 function parseSSELine(line3) {
   if (!line3 || line3.startsWith(":")) {
@@ -7630,11 +7993,11 @@ function parsePromptStreamEvent(data) {
 function createPlaygroundCommand() {
   const playground = new Command12("playground").description("Execute and test prompts interactively").addHelpText("after", `
 Examples:
-  ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
-  ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
-  ${chalk27.dim("$")} mutagent playground run <prompt-id> -i '{}' --model gpt-4-turbo
-  ${chalk27.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
-  ${chalk27.dim("$")} mutagent playground run <prompt-id> --messages '[{"role":"user","content":"Hi"}]'
+  ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
+  ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
+  ${chalk28.dim("$")} mutagent playground run <prompt-id> -i '{}' --model gpt-4-turbo
+  ${chalk28.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
+  ${chalk28.dim("$")} mutagent playground run <prompt-id> --messages '[{"role":"user","content":"Hi"}]'
 Input Format:
   The input must be a valid JSON object matching the prompt's input schema.
@@ -7650,16 +8013,16 @@ Streaming:
     `);
   playground.command("run").description("Execute a prompt with input variables").argument("<prompt-id>", "Prompt ID to execute (from: mutagent prompts list)").option("-i, --input <json>", "Input variables as JSON").option("-s, --stream", "Stream the response").option("-m, --model <model>", "Override model").option("--system <text>", "Set system prompt text").option("--human <text>", "Set human/user message text").option("--messages <json>", "Pass full messages array as JSON string").addHelpText("after", `
 Examples:
-  ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
-  ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
-  ${chalk27.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
-  ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{}' --model gpt-4-turbo --json
+  ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
+  ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
+  ${chalk28.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
+  ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{}' --model gpt-4-turbo --json
 Input Methods (pick one, priority order):
-  --system/--human            Quick system + user message ${chalk27.green("(recommended)")}
+  --system/--human            Quick system + user message ${chalk28.green("(recommended)")}
   --input '{"key":"value"}'   Inline JSON variables
   --messages '[...]'          Full messages array
-${chalk27.dim(`Hint: Test before evaluating: mutagent playground run <id> --input '{"key":"value"}'`)}
+${chalk28.dim(`Hint: Test before evaluating: mutagent playground run <id> --input '{"key":"value"}'`)}
     `).action(async (promptId, options) => {
     const isJson = getJsonFlag(playground);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7681,21 +8044,21 @@ ${chalk27.dim(`Hint: Test before evaluating: mutagent playground run <id> --inpu
             }
           });
         } else {
-          console.log(chalk27.bold(`
+          console.log(chalk28.bold(`
 Execution Result:`));
-          console.log(chalk27.gray("─".repeat(50)));
-          console.log(chalk27.cyan("Output:"));
+          console.log(chalk28.gray("─".repeat(50)));
+          console.log(chalk28.cyan("Output:"));
           console.log(result.output);
-          console.log(chalk27.gray("─".repeat(50)));
-          console.log(chalk27.dim(`Model: ${result.model}`));
-          console.log(chalk27.dim(`Execution Time: ${String(result.executionTimeMs)}ms`));
+          console.log(chalk28.gray("─".repeat(50)));
+          console.log(chalk28.dim(`Model: ${result.model}`));
+          console.log(chalk28.dim(`Execution Time: ${String(result.executionTimeMs)}ms`));
           if (result.tokens) {
-            console.log(chalk27.dim(`Tokens: ${String(result.tokens.prompt)} prompt + ${String(result.tokens.completion)} completion = ${String(result.tokens.total)} total`));
+            console.log(chalk28.dim(`Tokens: ${String(result.tokens.prompt)} prompt + ${String(result.tokens.completion)} completion = ${String(result.tokens.total)} total`));
           }
           if (result.cost !== undefined) {
-            console.log(chalk27.dim(`Cost: $${result.cost.toFixed(6)}`));
+            console.log(chalk28.dim(`Cost: $${result.cost.toFixed(6)}`));
           }
-          console.log(chalk27.dim(`Playground: ${playgroundLink(promptId)}`));
+          console.log(chalk28.dim(`Playground: ${playgroundLink(promptId)}`));
           console.log();
         }
       }
@@ -7784,9 +8147,9 @@ async function executeStreaming(client, promptId, input, model, isJson, output)
   const decoder = new TextDecoder;
   let buffer = "";
   if (!isJson) {
-    console.log(chalk27.bold(`
+    console.log(chalk28.bold(`
 Streaming Output:`));
-    console.log(chalk27.gray("─".repeat(50)));
+    console.log(chalk28.gray("─".repeat(50)));
   }
   try {
     for (;; ) {
@@ -7825,15 +8188,15 @@ Streaming Output:`));
                 console.log(JSON.stringify({ type: "complete", result: event.result }));
               } else {
                 console.log();
-                console.log(chalk27.gray("─".repeat(50)));
+                console.log(chalk28.gray("─".repeat(50)));
                 if (event.result) {
-                  console.log(chalk27.dim(`Model: ${event.result.model}`));
-                  console.log(chalk27.dim(`Execution Time: ${String(event.result.executionTimeMs)}ms`));
+                  console.log(chalk28.dim(`Model: ${event.result.model}`));
+                  console.log(chalk28.dim(`Execution Time: ${String(event.result.executionTimeMs)}ms`));
                   if (event.result.tokens) {
-                    console.log(chalk27.dim(`Tokens: ${String(event.result.tokens.prompt)} prompt + ${String(event.result.tokens.completion)} completion = ${String(event.result.tokens.total)} total`));
+                    console.log(chalk28.dim(`Tokens: ${String(event.result.tokens.prompt)} prompt + ${String(event.result.tokens.completion)} completion = ${String(event.result.tokens.total)} total`));
                   }
                   if (event.result.cost !== undefined) {
-                    console.log(chalk27.dim(`Cost: $${event.result.cost.toFixed(6)}`));
+                    console.log(chalk28.dim(`Cost: $${event.result.cost.toFixed(6)}`));
                   }
                 }
                 console.log();
@@ -7858,13 +8221,13 @@ Streaming Output:`));
 // src/commands/workspaces.ts
 init_sdk_client();
 import { Command as Command13 } from "commander";
-import chalk28 from "chalk";
+import chalk29 from "chalk";
 init_errors();
 function createWorkspacesCommand() {
   const workspaces = new Command13("workspaces").description("View workspaces (read-only)").addHelpText("after", `
 Examples:
-  ${chalk28.dim("$")} mutagent workspaces list
-  ${chalk28.dim("$")} mutagent workspaces get <workspace-id>
+  ${chalk29.dim("$")} mutagent workspaces list
+  ${chalk29.dim("$")} mutagent workspaces get <workspace-id>
 Subcommands:
   list, get
@@ -7873,8 +8236,8 @@ Note: Workspace management (create, update, delete) is available in the Admin Pa
     `);
   workspaces.command("list").description("List all workspaces").option("-l, --limit <n>", "Limit results", "50").option("-o, --offset <n>", "Offset for pagination").addHelpText("after", `
 Examples:
-  ${chalk28.dim("$")} mutagent workspaces list
-  ${chalk28.dim("$")} mutagent workspaces list --limit 10 --json
+  ${chalk29.dim("$")} mutagent workspaces list
+  ${chalk29.dim("$")} mutagent workspaces list --limit 10 --json
     `).action(async (options) => {
     const isJson = getJsonFlag(workspaces);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7914,8 +8277,8 @@ Examples:
   });
   workspaces.command("get").description("Get workspace details").argument("<id>", "Workspace ID").addHelpText("after", `
 Examples:
-  ${chalk28.dim("$")} mutagent workspaces get <workspace-id>
-  ${chalk28.dim("$")} mutagent workspaces get <workspace-id> --json
+  ${chalk29.dim("$")} mutagent workspaces get <workspace-id>
+  ${chalk29.dim("$")} mutagent workspaces get <workspace-id> --json
     `).action(async (id) => {
     const isJson = getJsonFlag(workspaces);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7948,12 +8311,12 @@ Examples:
 // src/commands/providers/index.ts
 init_sdk_client();
 import { Command as Command14 } from "commander";
-import chalk32 from "chalk";
+import chalk33 from "chalk";
 init_errors();
 // src/commands/providers/add.ts
 init_sdk_client();
-import chalk29 from "chalk";
+import chalk30 from "chalk";
 init_errors();
 function resolveScope(scopeFlag, client) {
   const scope = scopeFlag ?? "workspace";
@@ -7976,10 +8339,10 @@ function resolveScope(scopeFlag, client) {
 function registerAddCommand(parent) {
   parent.command("add").description("Add a new provider configuration").requiredOption("-p, --provider <type>", "Provider type (openai, anthropic, google, ...)").requiredOption("-n, --name <name>", "Display name for this provider").requiredOption("-k, --api-key <key>", "API key for the provider").option("-s, --scope <scope>", "Scope: workspace (default), org, user", "workspace").option("--base-url <url>", "Custom base URL for the provider API").option("--set-default", "Set as default provider for this scope").addHelpText("after", `
 Examples:
-  ${chalk29.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $OPENAI_API_KEY
-  ${chalk29.dim("$")} mutagent providers add --provider anthropic --name "Team Claude" --api-key $KEY --scope org
-  ${chalk29.dim("$")} mutagent providers add --provider openai --name "Personal" --api-key $KEY --scope user --set-default
-  ${chalk29.dim("$")} mutagent providers add --provider custom --name "Ollama" --api-key none --base-url http://localhost:11434 --json
+  ${chalk30.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $OPENAI_API_KEY
+  ${chalk30.dim("$")} mutagent providers add --provider anthropic --name "Team Claude" --api-key $KEY --scope org
+  ${chalk30.dim("$")} mutagent providers add --provider openai --name "Personal" --api-key $KEY --scope user --set-default
+  ${chalk30.dim("$")} mutagent providers add --provider custom --name "Ollama" --api-key none --base-url http://localhost:11434 --json
 Scope Resolution:
   workspace (default)   Uses your configured workspace
@@ -8025,10 +8388,10 @@ The API key is encrypted server-side and never returned in plain text.
         console.log(`  Scope: ${options.scope ?? "workspace"}`);
         console.log(`  URL:   ${providerLink(created.id)}`);
         if (options.setDefault) {
-          console.log(chalk29.green("  Set as default provider"));
+          console.log(chalk30.green("  Set as default provider"));
         }
         console.log("");
-        console.log(chalk29.dim("API key is encrypted server-side. Use `providers get` to see masked key."));
+        console.log(chalk30.dim("API key is encrypted server-side. Use `providers get` to see masked key."));
       }
     } catch (error) {
       handleError(error, isJson);
@@ -8066,15 +8429,15 @@ function buildProviderCreatedDirective(provider, scope) {
 // src/commands/providers/update.ts
 init_sdk_client();
-import chalk30 from "chalk";
+import chalk31 from "chalk";
 init_errors();
 function registerUpdateCommand(parent) {
   parent.command("update").description("Update an existing provider configuration").argument("<id>", "Provider ID (from: mutagent providers list)").option("-n, --name <name>", "Updated display name").option("-k, --api-key <key>", "Updated API key (will be re-encrypted)").option("--active <bool>", "Activate or deactivate (true|false)").option("--set-default", "Set as default provider for its scope").option("--base-url <url>", 'Updated base URL (use "" to clear)').addHelpText("after", `
 Examples:
-  ${chalk30.dim("$")} mutagent providers update <id> --name "New Name"
-  ${chalk30.dim("$")} mutagent providers update <id> --api-key $NEW_KEY --json
-  ${chalk30.dim("$")} mutagent providers update <id> --active false
-  ${chalk30.dim("$")} mutagent providers update <id> --set-default --json
+  ${chalk31.dim("$")} mutagent providers update <id> --name "New Name"
+  ${chalk31.dim("$")} mutagent providers update <id> --api-key $NEW_KEY --json
+  ${chalk31.dim("$")} mutagent providers update <id> --active false
+  ${chalk31.dim("$")} mutagent providers update <id> --set-default --json
 PATCH semantics — only provided fields are updated.
     `).action(async (id, options) => {
@@ -8125,7 +8488,7 @@ PATCH semantics — only provided fields are updated.
         console.log(`  ID:  ${String(updated.id ?? id)}`);
         console.log(`  URL: ${providerLink(updated.id ?? id)}`);
         if (options.apiKey) {
-          console.log(chalk30.dim("  API key re-encrypted server-side."));
+          console.log(chalk31.dim("  API key re-encrypted server-side."));
         }
       }
     } catch (error) {
@@ -8164,17 +8527,17 @@ function buildProviderUpdatedDirective(provider, requestId) {
 // src/commands/providers/delete.ts
 init_sdk_client();
-import chalk31 from "chalk";
+import chalk32 from "chalk";
 init_errors();
 function registerDeleteCommand(parent) {
   parent.command("delete").description("Delete a provider configuration").argument("<id>", "Provider ID (from: mutagent providers list)").option("-f, --force", "Skip confirmation prompt").addHelpText("after", `
 Examples:
-  ${chalk31.dim("$")} mutagent providers delete <id>
-  ${chalk31.dim("$")} mutagent providers delete <id> --force
-  ${chalk31.dim("$")} mutagent providers delete <id> --force --json
+  ${chalk32.dim("$")} mutagent providers delete <id>
+  ${chalk32.dim("$")} mutagent providers delete <id> --force
+  ${chalk32.dim("$")} mutagent providers delete <id> --force --json
-${chalk31.dim("Note: --force is required. The CLI is non-interactive — confirm with the user via your native flow, then pass --force.")}
-${chalk31.dim("Warning: API keys are AES-256-GCM encrypted and irrecoverable after deletion. Agents referencing this provider will lose their model config.")}
+${chalk32.dim("Note: --force is required. The CLI is non-interactive — confirm with the user via your native flow, then pass --force.")}
+${chalk32.dim("Warning: API keys are AES-256-GCM encrypted and irrecoverable after deletion. Agents referencing this provider will lose their model config.")}
     `).action(async (id, options) => {
     const isJson = getJsonFlag(parent);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8264,12 +8627,12 @@ function validateProviderType(type) {
 function createProvidersCommand() {
   const providers = new Command14("providers").description("Manage LLM provider configurations (BYOK)").addHelpText("after", `
 Examples:
-  ${chalk32.dim("$")} mutagent providers list
-  ${chalk32.dim("$")} mutagent providers get <provider-id>
-  ${chalk32.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $KEY
-  ${chalk32.dim("$")} mutagent providers update <id> --name "New Name"
-  ${chalk32.dim("$")} mutagent providers delete <id> --force
-  ${chalk32.dim("$")} mutagent providers test <provider-id>
+  ${chalk33.dim("$")} mutagent providers list
+  ${chalk33.dim("$")} mutagent providers get <provider-id>
+  ${chalk33.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $KEY
+  ${chalk33.dim("$")} mutagent providers update <id> --name "New Name"
+  ${chalk33.dim("$")} mutagent providers delete <id> --force
+  ${chalk33.dim("$")} mutagent providers test <provider-id>
 Provider Types:
   openai, anthropic, google, azure, bedrock, cohere, mistral, groq, together, replicate, custom
@@ -8279,9 +8642,9 @@ Subcommands:
     `);
   providers.command("list").description("List all providers").option("-l, --limit <n>", "Limit results", "50").option("-o, --offset <n>", "Offset for pagination").option("-t, --type <type>", "Filter by provider type").addHelpText("after", `
 Examples:
-  ${chalk32.dim("$")} mutagent providers list
-  ${chalk32.dim("$")} mutagent providers list --type openai
-  ${chalk32.dim("$")} mutagent providers list --json
+  ${chalk33.dim("$")} mutagent providers list
+  ${chalk33.dim("$")} mutagent providers list --type openai
+  ${chalk33.dim("$")} mutagent providers list --json
     `).action(async (options) => {
     const isJson = getJsonFlag(providers);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8330,8 +8693,8 @@ Examples:
   });
   providers.command("get").description("Get provider details").argument("<id>", "Provider ID (from: mutagent providers list)").addHelpText("after", `
 Examples:
-  ${chalk32.dim("$")} mutagent providers get <provider-id>
-  ${chalk32.dim("$")} mutagent providers get <provider-id> --json
+  ${chalk33.dim("$")} mutagent providers get <provider-id>
+  ${chalk33.dim("$")} mutagent providers get <provider-id> --json
     `).action(async (id) => {
     const isJson = getJsonFlag(providers);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8360,10 +8723,10 @@ Examples:
   });
   providers.command("test").description("Test provider connectivity").argument("<id>", "Provider ID (from: mutagent providers list)").addHelpText("after", `
 Examples:
-  ${chalk32.dim("$")} mutagent providers test <provider-id>
-  ${chalk32.dim("$")} mutagent providers test <provider-id> --json
+  ${chalk33.dim("$")} mutagent providers test <provider-id>
+  ${chalk33.dim("$")} mutagent providers test <provider-id> --json
-${chalk32.dim("Tests connectivity and lists available models for the provider.")}
+${chalk33.dim("Tests connectivity and lists available models for the provider.")}
     `).action(async (id) => {
     const isJson = getJsonFlag(providers);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8378,9 +8741,9 @@ ${chalk32.dim("Tests connectivity and lists available models for the provider.")
       } else {
         if (result.success) {
           output.success(`Provider test passed (${String(result.responseTimeMs)}ms)`);
-          console.log(chalk32.green(`Message: ${result.message}`));
+          console.log(chalk33.green(`Message: ${result.message}`));
           if (result.availableModels && result.availableModels.length > 0) {
-            console.log(chalk32.bold(`
+            console.log(chalk33.bold(`
 Available Models:`));
             result.availableModels.forEach((model) => {
               console.log(`  - ${model}`);
@@ -8389,7 +8752,7 @@ Available Models:`));
         } else {
           output.error(`Provider test failed: ${result.message}`);
           if (result.error) {
-            console.log(chalk32.red(`Error: ${result.error}`));
+            console.log(chalk33.red(`Error: ${result.error}`));
           }
         }
       }
@@ -8407,7 +8770,7 @@ Available Models:`));
 init_config();
 import { Command as Command15 } from "commander";
 import inquirer2 from "inquirer";
-import chalk33 from "chalk";
+import chalk34 from "chalk";
 import { existsSync as existsSync11, mkdirSync as mkdirSync3, writeFileSync as writeFileSync4 } from "fs";
 import { execSync as execSync3 } from "child_process";
 import { join as join6 } from "path";
@@ -8530,13 +8893,13 @@ function writeRcConfig(config, cwd = process.cwd()) {
 function createInitCommand() {
   const init = new Command15("init").description("Initialize MutagenT in your project").option("--non-interactive", "Skip interactive prompts (defaults to CLI-only mode)").addHelpText("after", `
 Examples:
-  ${chalk33.dim("$")} mutagent init                     # Interactive setup wizard
-  ${chalk33.dim("$")} mutagent init --non-interactive    # CLI-only mode (no prompts)
+  ${chalk34.dim("$")} mutagent init                     # Interactive setup wizard
+  ${chalk34.dim("$")} mutagent init --non-interactive    # CLI-only mode (no prompts)
 Modes:
-  ${chalk33.bold("Full scaffold")}     Install SDK + integration package, create config, setup tracing
-  ${chalk33.bold("CLI-only")}          Verify auth + create .mutagentrc.json with workspace/endpoint
-  ${chalk33.bold("Skip")}              Exit without changes
+  ${chalk34.bold("Full scaffold")}     Install SDK + integration package, create config, setup tracing
+  ${chalk34.bold("CLI-only")}          Verify auth + create .mutagentrc.json with workspace/endpoint
+  ${chalk34.bold("Skip")}              Exit without changes
     `).action(async (options) => {
     const isJson = getJsonFlag(init);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8808,23 +9171,36 @@ Modes:
 // src/commands/explore.ts
 import { Command as Command16 } from "commander";
-import chalk34 from "chalk";
+import chalk35 from "chalk";
 import { resolve as resolve3 } from "path";
 init_errors();
 function createExploreCommand() {
   const explore = new Command16("explore").description("Scan codebase for prompts, datasets, and MutagenT markers").option("-p, --path <dir>", "Directory to scan", ".").option("--depth <n>", "Max directory depth", "10").option("--include <glob>", "Include file pattern", "**/*.{ts,js,py,tsx,jsx}").option("--exclude <dirs>", "Comma-separated directories to exclude", "node_modules,dist,.git,build,.next,__pycache__,venv,.venv").option("--markers-only", "Only find existing MutagenT markers").addHelpText("after", `
 Examples:
-  ${chalk34.dim("$")} mutagent explore
-  ${chalk34.dim("$")} mutagent explore --path ./src
-  ${chalk34.dim("$")} mutagent explore --include "**/*.{ts,py}" --depth 5
-  ${chalk34.dim("$")} mutagent explore --markers-only
-  ${chalk34.dim("$")} mutagent explore --json
+  ${chalk35.dim("$")} mutagent explore
+  ${chalk35.dim("$")} mutagent explore --path ./src
+  ${chalk35.dim("$")} mutagent explore --include "**/*.{ts,py}" --depth 5
+  ${chalk35.dim("$")} mutagent explore --markers-only
+  ${chalk35.dim("$")} mutagent explore --json
 Detection modes:
-  ${chalk34.dim("Heuristic")}  Template variables ({{var}}), prompt constants, schema definitions
-  ${chalk34.dim("Marker")}     MutagenT:START/END comment markers from previous uploads
+  ${chalk35.dim("Heuristic")}  Template variables ({var} or {{var}}), prompt constants, schema definitions
+  ${chalk35.dim("Marker")}     MutagenT:START/END comment markers from previous uploads
+  ${chalk35.dim("Agents")}     Multi-turn / tool-calling patterns (informational — WIP)
+${chalk35.bold("Template variable delimiters (inferred per-file):")}
+  ${chalk35.dim("single")}    ${chalk35.cyan("{variable}")}    MutagenT platform canonical; LangChain PromptTemplate
+  ${chalk35.dim("double")}    ${chalk35.cyan("{{variable}}")}  Handlebars / Mustache / LiquidJS / Jinja2 / LangChain ChatPromptTemplate
+  ${chalk35.dim("Inference is per-file: majority of bracket style wins, tie goes to single")}
+  ${chalk35.dim('(platform canonical). Fenced code blocks are stripped first; {"json": ...} ')}
+  ${chalk35.dim("literals are skipped via JSON-key lookahead. See the mutagent-cli skill at ")}
+  ${chalk35.dim("concepts/prompt-variables.md for the full contract.")}
-${chalk34.dim("Results are saved to .mutagent/mutation-context.md for use by other commands.")}
+  ${chalk35.dim("With --json, each discovered prompt carries a ")}${chalk35.cyan("delimiter")}${chalk35.dim(" field:")}
+    ${chalk35.dim('{ "file": "src/prompts/summarize.ts", "line": 12, "reason": "template-variable", "delimiter": "single" }')}
+${chalk35.dim("Results are saved to .mutagent/mutation-context.md for use by other commands.")}
     `).action((options) => {
     const isJson = getJsonFlag(explore);
     const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8842,7 +9218,7 @@ ${chalk34.dim("Results are saved to .mutagent/mutation-context.md for use by oth
         markersOnly
       };
       if (!isJson) {
-        console.log(chalk34.cyan(`
+        console.log(chalk35.cyan(`
 Scanning ${scanPath}...
 `));
       }
@@ -8860,52 +9236,78 @@ Scanning ${scanPath}...
           prompts: result.prompts,
           datasets: result.datasets,
           markers: result.markers,
+          agents: result.agents,
+          agentsWip: result.agents.length > 0 ? {
+            status: "early-access",
+            message: "Agent detection is informational only. Agent optimization & evaluation are WIP.",
+            earlyAccessUrl: "https://www.mutagent.io/agents-partnership"
+          } : undefined,
           summary: {
             totalPrompts: result.prompts.length,
             totalDatasets: result.datasets.length,
-            totalMarkers: result.markers.length
+            totalMarkers: result.markers.length,
+            totalAgents: result.agents.length
           },
           contextFile: ".mutagent/mutation-context.md"
         });
       } else {
-        const totalFindings = result.prompts.length + result.datasets.length + result.markers.length;
+        const totalFindings = result.prompts.length + result.datasets.length + result.markers.length + result.agents.length;
         if (totalFindings === 0) {
           output.info("No prompts, datasets, or markers found.");
-          console.log(chalk34.dim(`
+          console.log(chalk35.dim(`
   Tip: Create a prompt with template variables like {{input}} to get started.`));
           return;
         }
         if (result.prompts.length > 0) {
-          console.log(chalk34.bold(`  Prompts Found (${String(result.prompts.length)}):`));
+          console.log(chalk35.bold(`  Prompts Found (${String(result.prompts.length)}):`));
           console.log();
           for (const p of result.prompts) {
-            const confidenceTag = p.confidence === "high" ? chalk34.green("[high]") : p.confidence === "medium" ? chalk34.yellow("[medium]") : chalk34.dim("[low]");
-            const reasonTag = chalk34.dim(`[${p.reason}]`);
-            console.log(`    ${confidenceTag} ${chalk34.green(p.file)}:${chalk34.yellow(String(p.line))} ${reasonTag}`);
-            console.log(`      ${chalk34.dim(p.preview)}`);
+            const confidenceTag = p.confidence === "high" ? chalk35.green("[high]") : p.confidence === "medium" ? chalk35.yellow("[medium]") : chalk35.dim("[low]");
+            const reasonTag = chalk35.dim(`[${p.reason}]`);
+            console.log(`    ${confidenceTag} ${chalk35.green(p.file)}:${chalk35.yellow(String(p.line))} ${reasonTag}`);
+            console.log(`      ${chalk35.dim(p.preview)}`);
           }
           console.log();
         }
         if (result.datasets.length > 0) {
-          console.log(chalk34.bold(`  Datasets Found (${String(result.datasets.length)}):`));
+          console.log(chalk35.bold(`  Datasets Found (${String(result.datasets.length)}):`));
           console.log();
           for (const d of result.datasets) {
-            console.log(`    ${chalk34.green(d.file)} ${chalk34.dim(`(${String(d.items)} items)`)}`);
+            console.log(`    ${chalk35.green(d.file)} ${chalk35.dim(`(${String(d.items)} items)`)}`);
           }
           console.log();
         }
         if (result.markers.length > 0) {
-          console.log(chalk34.bold(`  MutagenT Markers (${String(result.markers.length)}):`));
+          console.log(chalk35.bold(`  MutagenT Markers (${String(result.markers.length)}):`));
           console.log();
           for (const m of result.markers) {
-            const idPart = m.platformId ? chalk34.cyan(` id=${m.platformId}`) : "";
-            console.log(`    ${chalk34.green(m.file)}:${chalk34.yellow(String(m.line))} ${chalk34.magenta(m.type)}${idPart}`);
+            const idPart = m.platformId ? chalk35.cyan(` id=${m.platformId}`) : "";
+            console.log(`    ${chalk35.green(m.file)}:${chalk35.yellow(String(m.line))} ${chalk35.magenta(m.type)}${idPart}`);
+          }
+          console.log();
+        }
+        if (result.agents.length > 0) {
+          console.log(chalk35.bold(`  Agents Detected (${String(result.agents.length)}):`));
+          console.log();
+          for (const a of result.agents) {
+            const confidenceTag = a.confidence === "high" ? chalk35.green("[high]") : a.confidence === "medium" ? chalk35.yellow("[medium]") : chalk35.dim("[low]");
+            const reasonTag = chalk35.dim(`[${a.reason}]`);
+            console.log(`    ${confidenceTag} ${chalk35.green(a.file)}:${chalk35.yellow(String(a.line))} ${reasonTag}`);
+            console.log(`      ${chalk35.dim(a.preview)}`);
           }
           console.log();
+          console.log(chalk35.yellow("  ┌─ Agents: Early Access / WIP ──────────────────────────────"));
+          console.log(chalk35.yellow("  │ ") + chalk35.bold("Agent optimization & evaluation are actively in development."));
+          console.log(chalk35.yellow("  │ ") + chalk35.dim("Detected patterns above are informational only."));
+          console.log(chalk35.yellow("  │ ") + chalk35.dim("The Prompt path (single-shot + output schema) is production-ready."));
+          console.log(chalk35.yellow("  │ ") + chalk35.dim("Join early access for the Agent path:"));
+          console.log(chalk35.yellow("  │ ") + chalk35.cyan("https://www.mutagent.io/agents-partnership"));
+          console.log(chalk35.yellow("  └────────────────────────────────────────────────────────────"));
+          console.log();
         }
-        console.log(chalk34.dim("  ─────────────────────────────────"));
-        console.log(`  ${chalk34.bold("Summary:")} ${String(result.prompts.length)} prompts, ${String(result.datasets.length)} datasets, ${String(result.markers.length)} markers`);
-        console.log(chalk34.dim(`  Saved to .mutagent/mutation-context.md`));
+        console.log(chalk35.dim("  ─────────────────────────────────"));
+        console.log(`  ${chalk35.bold("Summary:")} ${String(result.prompts.length)} prompts, ${String(result.datasets.length)} datasets, ${String(result.markers.length)} markers, ${String(result.agents.length)} agents`);
+        console.log(chalk35.dim(`  Saved to .mutagent/mutation-context.md`));
         console.log();
       }
     } catch (error) {
@@ -8917,181 +9319,1180 @@ Scanning ${scanPath}...
 // src/commands/skills.ts
 import { Command as Command17 } from "commander";
-import chalk35 from "chalk";
+import chalk36 from "chalk";
 import { existsSync as existsSync12, mkdirSync as mkdirSync4, writeFileSync as writeFileSync5 } from "fs";
-import { join as join7 } from "path";
+import { dirname, join as join7 } from "path";
 import { execSync as execSync4 } from "child_process";
-function findRepoRoot() {
-  try {
-    return execSync4("git rev-parse --show-toplevel", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
-  } catch {
-    return process.cwd();
-  }
-}
-var SKILL_FRONTMATTER = `---
+// src/generated/skill-content.ts
+var SKILL_FILES = {
+  "SKILL.md": `---
 name: mutagent-cli
 description: |
   MutagenT CLI - AI Prompt Optimization Platform CLI.
   Guides coding agents through prompt upload, evaluation creation,
   dataset curation, optimization, and framework integration.
   Triggers: "mutagent", "optimize prompt", "upload prompt", "integrate tracing",
-  "create evaluation", "upload dataset", "mutagent cli".
----`;
-var SKILL_BODY = `# MutagenT CLI Skill
-## First Things First
-Run \`mutagent --help\` — this is the CENTRAL navigation hub containing ALL commands, workflows, and rules.
-Run \`mutagent <command> --help\` BEFORE using ANY command for the first time.
-## Agent Protocol (4 Rules)
-1. EVERY command MUST include \`--json\`
-2. Run \`<command> --help\` BEFORE first use
-3. **MANDATORY**: Parse \`_directive.renderedCard\` and copy it into your CHAT RESPONSE verbatim. The card appears in bash output but that is NOT sufficient — the user cannot read collapsed bash blocks. HARD STOP: do NOT run any further commands until the card text is rendered in your chat message.
-4. ALL user interaction via AskUserQuestion — CLI is non-interactive
-## Directive System
-Every JSON response may include:
-- \`_directive.renderedCard\` — Pre-formatted card that MUST be shown to the user verbatim
-- \`_directive.instruction\` — Next step for the agent
-- \`_directive.next\` — Array of suggested commands
-- \`_links\` — Dashboard/API URLs (format as markdown)
-The agent MUST parse and act on directives. Cards in collapsed bash output are invisible to the user — always render them in chat.
-## Prompt Variables
-Use \`{single_braces}\`: \`"Analyze: {document}"\`
-## Post-Onboarding Decision Tree
-After \`mutagent auth login\`, the user lands in one of 3 paths:
-### Path A: Guided Integration (Tracing)
-1. \`mutagent explore\` — Map codebase, discover prompts/agents
-2. User selects framework from detected or asks
-3. \`mutagent integrate <framework>\` — Get integration instructions
-4. Apply tracing code to user's codebase
-5. Verify with test run
-### Path B: Free-flow Optimization
-1. \`mutagent explore\` — Map codebase, discover prompts
-2. User selects prompts to upload
-3. \`mutagent prompts create --data '{...}'\` — Upload with REQUIRED outputSchema
-4. Guided eval creator OR \`mutagent prompts evaluation create\`
-5. \`mutagent prompts dataset add\` — Upload/curate datasets (named)
-6. \`mutagent usage --json\` — View resource counts
-7. \`mutagent prompts optimize start\` — Run optimization
-8. Review scorecard → Apply/Reject optimized prompt
-### Path C: Manual
-User uses CLI commands directly. Run \`mutagent --help\`.
-## Guided Evaluation Creation
-Use \`--guided --json\` when creating evaluations (NEVER \`--guided\` alone — it launches interactive prompts that agents can't use):
-\\\`\\\`\\\`
-mutagent prompts evaluation create <id> --guided --json
-\\\`\\\`\\\`
-**When to use guided mode:**
-The optimizer requires each criterion to have \`name\`, \`description\` (scoring rubric), and \`evaluationParameter\` (a field from the prompt's inputSchema or outputSchema). If the user's existing evaluation format doesn't match this shape — e.g. they have generic rubrics without per-field targeting, or criteria that don't map 1:1 to schema fields — use \`--guided\` to:
-1. Fetch the prompt's inputSchema + outputSchema fields
-2. Show which fields need coverage
-3. Return a criteria template with the exact JSON shape
-**Validation rules the CLI enforces:**
-- Every criterion MUST have \`name\`, \`description\`, and \`evaluationParameter\`
-- \`evaluationParameter\` MUST match an actual schema field name
-- No duplicate \`evaluationParameter\` values — each criterion targets a unique field
-- ALL schema fields must be covered (missing fields = error)
+  "create evaluation", "upload dataset", "explore prompts", "mutagent cli".
+---
+# MutagenT CLI Skill
+> **Canonical source**: \`mutagent-cli/.claude/skills/mutagent-cli/SKILL.md\`
+> Packed into the CLI binary via \`scripts/sync-skill.ts\`. Installed to end-user
+> dev environments via \`mutagent skills install\`. Edit this file, not the installed copy.
+## SKILL vs CLI — responsibility split
+| Layer | Owner | Responsibility |
+|---|---|---|
+| **SKILL** (this file + subfiles) | here | journeys, routing, 5 rules, enforcement |
+| **CLI** | \`mutagent <cmd>\` | commands, flags, \`--json\`, \`_directive.*\`, \`_links\` |
+| Platform | api.mutagent.io | storage, optimization, eval execution, \`{variable}\` rendering |
+**Rule**: SKILL never duplicates CLI flag lists — always \`mutagent <cmd> --help\` for flags.
+---
+## 5 Core Rules — NON-NEGOTIABLE
+1. **\`--json\` on EVERY command.** No exceptions. Agents use JSON mode exclusively.
+2. **\`<command> --help\` BEFORE first use of any command.** The CLI is the source of truth for flags — this SKILL never inlines them.
+3. **NEVER auto-generate eval criteria — collect from user.** Ask the user for each rubric field. See [concepts/eval-criteria.md](./concepts/eval-criteria.md) for the 3-tier format.
+4. **Explore-before-modify.** Run \`mutagent explore --json\` before any write operation. Present findings, get user confirmation. Never mutate without discovery first.
+5. **Cost transparency before \`optimize start\`.** Run \`mutagent usage --json\` and show the result to the user. Get explicit confirmation before any optimization job.
+---
+## Journey Router — route by user intent
+Match the user's first request. Load ONLY the matching subfile. Do NOT preload the whole set.
+| User said / signal detected | Load subfile | Why |
+|---|---|---|
+| "trace", "observe", "integrate", "add framework" | [workflows/tracing.md](./workflows/tracing.md) | Non-destructive, fastest first-value path |
+| "optimize", "improve", "tune", "evaluate", "upload prompt" | [workflows/optimization.md](./workflows/optimization.md) | Full create→dataset→eval→optimize loop |
+| "explore", "scan", "find prompts", "what prompts", "discover" | [workflows/exploration.md](./workflows/exploration.md) | Read-only discovery + taxonomy |
+| \`AgentExecutor\`, \`StateGraph\`, \`createReactAgent\`, \`tool_calls\`, \`@tool\`, \`langgraph\`, \`crewai\`, \`autogen\`, \`openai/agents\`, multi-turn | [workflows/agents.md](./workflows/agents.md) | WIP path — surface partnership link |
+| "how do variables work", "single vs double braces", delimiter | [concepts/prompt-variables.md](./concepts/prompt-variables.md) | Delimiter inference contract |
+| "what makes a good eval", "rubric", "evaluation criteria" | [concepts/eval-criteria.md](./concepts/eval-criteria.md) | INPUT MVC + OUTPUT Standards |
+| Unclear / first time | run \`mutagent explore --json\` first, then reroute | Discovery before action |
+---
+## Subfile Map
+| File | WHEN to load | WHY | ENFORCEMENT |
+|---|---|---|---|
+| [workflows/tracing.md](./workflows/tracing.md) | User wants to add framework tracing / observability | Non-destructive append-only integration sequence | Must run explore first (Rule 4) |
+| [workflows/optimization.md](./workflows/optimization.md) | User wants to optimize or evaluate a prompt | Full loop: explore → upload → dataset → eval → optimize → apply | Must check usage before optimize (Rule 5); must collect rubrics from user (Rule 3) |
+| [workflows/exploration.md](./workflows/exploration.md) | User wants to scan codebase, identify prompts vs agents | Read-only discovery; output taxonomy to user | Run only; no writes |
+| [workflows/agents.md](./workflows/agents.md) | Multi-turn / tool-calling code detected | WIP — do NOT attempt optimizer, surface partnership link | Show WIP card to user verbatim |
+| [concepts/prompt-variables.md](./concepts/prompt-variables.md) | Any question about \`{var}\` vs \`{{var}}\`, delimiter inference | Brace convention + conversion rules | Load before \`prompts create\` in optimization workflow |
+| [concepts/eval-criteria.md](./concepts/eval-criteria.md) | Any question about rubric design, MVC, Output Standards | 3-tier rubric format — INPUT-param vs OUTPUT-param scope | Load before \`evaluation create --guided\` in optimization workflow |
+---
+## Output handling
+After every CLI command:
+- **Show the command output to the user.** Command output appears in bash blocks that users may not see — always present the key results in your chat response.
+- **For evaluation create \`--guided\`**: the CLI provides per-field questions for each schema field. Ask the user each question in turn. Do not skip any field. Do not pre-fill answers.
+- **For \`optimize results\`**: present the before/after scorecard to the user and confirm whether to apply, view diff, or reject.
+---
+## Anti-patterns — NEVER do these
+- Run any command without \`--json\`
+- Auto-generate eval criteria — always collect from the user
+- Skip any schema field when collecting evaluation rubrics
+- Skip \`mutagent explore --json\` before any write operation
+- Run \`optimize start\` without first showing \`usage --json\` to the user
+- Increase \`--max-iterations\` above 1 without explicit user consent (each iteration = LLM spend)
+- Run a multi-turn agent through the prompt optimizer
+- Skip showing command output results to the user
+- Inline CLI flags from memory — always read \`--help\` first
+---
 ## State Tracking
-- \`.mutagent/mutation-context.md\` — Codebase index of discovered/uploaded prompts
-- Check and update this file after explore, create, and dataset operations
-- \`mutagent auth status\` — Auth + workspace state
-- Comment markers (\`// MutagenT:START ... // MutagenT:END\`) in source files
-## Post-Optimization
-After \`optimize results\`: ALWAYS show the before/after diff to the user first. Then offer choices via AskUserQuestion: Apply / Reject.
-## Evaluation Criteria Reminder
-Every evaluation MUST specify criteria targeting either:
-- Input variable fields (from inputSchema)
-- Output fields (from outputSchema / structured output)
-## Optimization Cost Control
-- Default max-iterations is 1. NEVER increase without explicit user request.
-- Each iteration incurs LLM costs. Starting >1 iterations without consent = protocol violation.
-- If user wants more, confirm the number via AskUserQuestion first.
-## Claude Code Hooks (Session Telemetry)
-Optional — sends session activity to MutagenT traces API for observability.
-Install via \`.claude/settings.local.json\`:
-\\\`\\\`\\\`json
-{"hooks": {
-  "SessionStart": [{"matcher": "startup", "hooks": [{"type": "command", "command": "mutagent hooks claude-code session-start"}]}],
-  "Stop": [{"hooks": [{"type": "command", "command": "mutagent hooks claude-code session-end"}]}],
-  "PreToolUse": [{"hooks": [{"type": "command", "command": "mutagent hooks claude-code pre-tool-use"}]}],
-  "PostToolUse": [{"hooks": [{"type": "command", "command": "mutagent hooks claude-code post-tool-use"}]}]
-}}
-\\\`\\\`\\\`
-Or run: \`mutagent init\` (option 1 installs skill + hooks together)`;
-var SKILL_DIR = ".claude/skills/mutagent-cli";
-var SKILL_FILE = "SKILL.md";
-function createSkillsCommand() {
-  const skills = new Command17("skills").description("Manage MutagenT CLI skills for coding agents");
-  skills.command("install").description("Install MutagenT CLI skill for Claude Code").addHelpText("after", `
-Examples:
-  ${chalk35.dim("$")} mutagent skills install
-This creates a Claude Code skill at .claude/skills/mutagent-cli/SKILL.md
-that teaches coding agents how to use the MutagenT CLI effectively.
-    `).action((_options, cmd) => {
-    const parentCmd = cmd.parent?.parent;
-    const isJson = parentCmd ? getJsonFlag(parentCmd) : false;
-    const output = new OutputFormatter(isJson ? "json" : "table");
-    const repoRoot = findRepoRoot();
-    const skillDir = join7(repoRoot, SKILL_DIR);
-    const skillPath = join7(skillDir, SKILL_FILE);
-    if (!existsSync12(skillDir)) {
-      mkdirSync4(skillDir, { recursive: true });
-    }
-    const content = `${SKILL_FRONTMATTER}
+- \`.mutagent/mutation-context.md\` — codebase index of discovered/uploaded prompts. Update after explore, create, dataset ops.
+- \`mutagent auth status --json\` — auth + workspace state.
-${SKILL_BODY}
-`;
-    writeFileSync5(skillPath, content, "utf-8");
-    if (isJson) {
-      output.output({
-        installed: true,
-        path: skillPath,
-        name: "mutagent-cli"
-      });
-    } else {
-      output.success(`Installed MutagenT CLI skill`);
-      console.log(`  ${chalk35.dim("Path:")} ${skillPath}`);
-      console.log("");
-      console.log(`  ${chalk35.dim("This skill teaches coding agents how to use the MutagenT CLI.")}`);
-      console.log(`  ${chalk35.dim("It will be automatically loaded by Claude Code when relevant triggers match.")}`);
-    }
-  });
-  return skills;
-}
+---
-// src/commands/usage.ts
-init_config();
-import { Command as Command18 } from "commander";
-import chalk36 from "chalk";
-init_errors();
-init_sdk_client();
-var PROVIDERS_URL = "https://app.mutagent.io/settings/providers";
-function createUsageCommand() {
-  const usage = new Command18("usage").description("Show resource counts (prompts, datasets, evaluations, optimizations, experiments)").addHelpText("after", `
-Examples:
-  ${chalk36.dim("$")} mutagent usage
-  ${chalk36.dim("$")} mutagent usage --json
-    `);
-  usage.action(async () => {
-    const isJson = getJsonFlag(usage);
+## Login (two paths)
+- **CI / automated**: \`export MUTAGENT_API_KEY=mt_... && mutagent login --json\` — no browser, no prompts.
+- **Onboarding a user**: \`mutagent login --browser --json\` — CLI prints auth URL to stdout, polls 5 min. **Surface the URL verbatim to the user.** \`--non-interactive\` is NOT needed when \`--browser\` is set.
+\`mutagent login\` is canonical. \`mutagent auth login\` is a back-compat alias. Both delegate to \`lib/auth-flow.ts\`. Decision record: [cli-design-principles.md](../../docs/cli-design-principles.md) → Login Unification.
+---
+## Extensibility
+Add \`workflows/custom-<name>.md\` with frontmatter \`triggers: ["phrase"]\` — auto-discovered by the decision tree fallback row. No rebuild needed.
+`,
+  "concepts/eval-criteria.md": `---
+name: mutagent-cli-concepts-eval-criteria
+description: |
+  Canonical source for MutagenT evaluation-criteria framing:
+  INPUT-param criteria → Minimum Viable Context (MVC);
+  OUTPUT-param criteria → Output Standards.
+  3-tier rubric discipline: grounded, observable, never vague.
+  Includes current platform validation rules for criterion shape.
+  Mirrored in mutagent/src/modules/prompts/prompt-evaluations/README.md.
+triggers:
+  - "evaluation criteria"
+  - "eval criteria"
+  - "rubric"
+  - "how do I evaluate"
+  - "minimum viable context"
+  - "output standards"
+  - "evaluationParameter"
+  - "guided eval"
+---
+# Concept — Guided Evaluation Criteria
+> **Canonical source** for the INPUT vs OUTPUT framing. Mirrored in:
+> - \`mutagent-cli/src/commands/prompts/evaluation/guided-workflow.ts\` — kept in sync.
+> - \`mutagent/src/modules/prompts/prompt-evaluations/README.md\` as BE-side
+>   reference — mirror, not a fork.
+## The golden rule
+**Every evaluation criterion is scoped to EITHER the inputs OR the outputs of
+the prompt. Never both in one criterion.**
+Mixing them produces vague criteria ("is the output good given the inputs?")
+that the optimizer cannot act on. When collecting rubrics from the user,
+explicitly frame each question around one of these two scopes.
+---
+## NEVER auto-generate criteria
+This is Rule 3 of the [5 Core Rules](../SKILL.md). Reasons:
+- Auto-generated rubrics are vague by default ("score based on accuracy")
+- The optimizer cannot act on vague criteria — it needs observable tiers
+- The user knows what "correct" means for their domain; the agent does not
+- Generic rubrics produce noisy scores that mislead the optimizer
+**Always use AskUserQuestion to collect rubrics, one per variable/dimension.**
+When \`evaluation create --guided --json\` runs, the CLI provides a list of
+fields that need rubrics. Ask the user about EVERY field in that list.
+Do not skip any field. Do not pre-fill answers. The user must provide each rubric.
+---
+## Input-param criteria → Minimum Viable Context (MVC)
+**Scope**: the \`{variables}\` the prompt template consumes. Each \`{variable}\` is
+an **input param**. The criterion asks: *is the information required for the
+prompt to succeed actually present?*
+### 3-tier completeness scale (grounded, never vague)
+- \`1.0\` — all required context present, no ambiguity
+- \`0.5\` — some context present, enough to attempt but likely partial/hedged answer
+- \`0.0\` — critical context missing, prompt cannot succeed regardless of model
+### Discipline rules
+- **Grounded**: every tier must describe an observable property of the input
+  data, not a feeling.
+- **Observable**: a human reader should assign a tier by looking at a single
+  input row — no model output needed.
+- **Per-variable**: one criterion per \`{variable}\`. This localizes optimizer signal.
+### Before asking the user
+Enumerate variables using the delimiter inferred by \`mutagent explore --json\`:
+- \`delimiter: "single"\` → \`{foo}\` — platform canonical
+- \`delimiter: "double"\` → \`{{foo}}\` — framework template; convert before upload
+See [concepts/prompt-variables.md](./prompt-variables.md) for the full inference contract.
+### Example
+For \`"Summarize {document} for {audience}"\`:
+\`\`\`json
+[
+  {
+    "name": "document-present",
+    "evaluationParameter": "document",
+    "description": "1.0 = actual prose text >= 100 chars. 0.5 = short snippet or summary stub. 0.0 = filename, null, TODO, or empty."
+  },
+  {
+    "name": "audience-concrete",
+    "evaluationParameter": "audience",
+    "description": "1.0 = concrete persona (e.g. 'junior Python devs'). 0.5 = broad category ('engineers'). 0.0 = empty, 'general', 'everyone'."
+  }
+]
+\`\`\`
+Note the 1:1 mapping: one criterion per variable, \`evaluationParameter\` equal
+to the variable name.
+---
+## Output-param criteria → Output Standards
+**Scope**: the model's response shape and content. Pick the dimensions relevant
+to the task and write one criterion per dimension.
+### Common dimensions
+- **Content correctness** — right answer, right facts, right tone
+- **Structural correctness** — matches \`outputSchema\`, required fields, enums, length
+- **Groundedness** — facts in the output traceable to facts in the input
+- **Format compliance** — JSON validity, markdown shape, regex match
+### 3-tier correctness scale
+Concrete pass/fail per tier. **Never** write "0-1 scale" or "score based on
+accuracy" — those are vague and the optimizer can't act on them.
+### Example (JSON-output summarizer)
+\`\`\`json
+{
+  "name": "summary-correctness",
+  "evaluationParameter": "summary",
+  "description": "1.0 = valid JSON, all 3 required fields, covers all key arguments. 0.5 = valid JSON, 1-2 arguments missing/hedged. 0.0 = invalid JSON OR fabricated facts."
+}
+\`\`\`
+The \`evaluationParameter\` here is the output-schema field name, not an input
+variable. Same 1:1 discipline — one criterion per output dimension.
+---
+## Platform validation rules (current)
+When creating an evaluation via \`mutagent prompts evaluation create -d '<json>'\`,
+each criterion must pass these platform-enforced checks:
+| Field | Required | Validation |
+|---|---|---|
+| \`name\` | yes | slug-like, no spaces, \`[a-z0-9-_]\` |
+| \`description\` | yes | non-empty, ideally >= 20 chars with tier definitions |
+| \`evaluationParameter\` | yes | must match a variable name from the prompt OR an output field name |
+**Common validation failures:**
+- \`evaluationParameter\` references a variable not in the prompt template → rejected
+- \`description\` is too short or vague → accepted by platform but produces poor scores
+- Multiple criteria with the same \`evaluationParameter\` → accepted but wasteful
+---
+## How to apply when creating an evaluation
+1. **Read the prompt template.** Enumerate \`{variables}\` (input params) +
+   expected output shape (from \`outputSchema\` or the code's parse logic).
+2. **Ask the user**: "Evaluate INPUTS (is context sufficient) or OUTPUTS
+   (is response correct) first?" — let the user pick the scope.
+3. **Collect criteria**: use AskUserQuestion to collect from user, never auto-generate — one per variable (INPUT) or per dimension (OUTPUT),
+   always with a 3-tier rubric describing observable behavior.
+4. **Map to platform shape**:
+   \`\`\`typescript
+   {
+     name: string;                // short, slug-like
+     description: string;         // the 3-tier rubric verbatim
+     evaluationParameter: string; // the variable name OR output field
+   }
+   \`\`\`
+5. **Upload** via \`mutagent prompts evaluation create <id> -d '<json>' --json\`.
+The \`--guided\` flag walks the user through this flow interactively — use it
+when the user is new to the concept. Follow the CLI's next-step guidance in
+the output to collect rubrics in the correct order.
+---
+## Anti-patterns
+- **Auto-generating criteria** — Rule 3: NEVER. Always collect from user.
+- **Mixing input and output in one criterion** — breaks signal; split into two.
+- **Vague rubrics** — "0.8 if mostly good" → rewrite with observable tiers.
+- **One criterion for many variables** — reduces signal, slows optimization.
+- **Scoring the model, not the data** — MVC scores the INPUT data quality.
+---
+## Cross-references
+- [SKILL.md](../SKILL.md) → 5 rules (Rule 3: never auto-generate)
+- [workflows/optimization.md](../workflows/optimization.md) → steps 7-9 (where this concept is applied)
+- [concepts/prompt-variables.md](./prompt-variables.md) → delimiter inference (used in MVC step)
+- \`mutagent/src/modules/prompts/prompt-evaluations/README.md\` → BE mirror
+`,
+  "concepts/prompt-variables.md": `---
+name: mutagent-cli-concepts-prompt-variables
+description: |
+  Prompt template variable delimiter inference contract.
+  Platform canonical is single-brace {variable}. Third-party frameworks vary
+  (Handlebars / Mustache / Liquid / Jinja2 use double {{variable}}).
+  \`mutagent explore\` infers the delimiter per file and surfaces it in
+  \`--json\` output as \`delimiter: "single" | "double"\`.
+  Includes conversion rules for upload and apply phases.
+triggers:
+  - "prompt variables"
+  - "template variables"
+  - "single vs double brace"
+  - "{variable}"
+  - "{{variable}}"
+  - "delimiter"
+  - "inferPromptVariables"
+  - "brace convention"
+  - "convert variables"
+---
+# Concept — Prompt Variables
+## Platform canonical
+**MutagenT platform uses single-brace \`{variable}\`.** The platform renders
+prompts by substituting \`{name}\` with the provided value at optimization /
+evaluation time.
+## Third-party framework variance
+Real-world codebases use different delimiters depending on which prompt
+framework the user already has installed:
+| Framework | Delimiter | Example |
+|---|---|---|
+| **MutagenT platform** (canonical) | single | \`{document}\` |
+| **LangChain** \`PromptTemplate\` | single | \`{document}\` |
+| **LangChain** \`ChatPromptTemplate\` + Mustache | double | \`{{document}}\` |
+| **Handlebars** | double | \`{{document}}\` |
+| **Mustache** | double | \`{{document}}\` |
+| **LiquidJS** | double | \`{{ document }}\` |
+| **Jinja2** (Python) | double | \`{{ document }}\` |
+---
+## Brace conversion — upload and apply
+Getting this wrong breaks templates after optimization. Follow the two-phase rule:
+### Phase 1 — Upload (code → MutagenT)
+If the code has \`{{double}}\` braces:
+1. Warn the user: "Your template uses \`{{double}}\` braces (Handlebars/LangChain Mustache). MutagenT uses \`{single}\` braces. I'll convert before uploading."
+2. Convert \`{{name}}\` → \`{name}\` in the prompt content passed to \`mutagent prompts create\`.
+3. Record the original delimiter in \`.mutagent/mutation-context.md\` so the apply phase knows to convert back.
+If the code has \`{single}\` braces: no conversion needed — upload as-is.
+### Phase 2 — Apply (MutagenT → code)
+After optimization, the platform returns a prompt with \`{single}\` braces.
+If the original codebase used \`{{double}}\` braces:
+1. Convert \`{name}\` → \`{{name}}\` in the optimized prompt before writing to the source file.
+2. Confirm with the user before saving.
+If the original codebase used \`{single}\` braces: write the optimized prompt as-is.
+**Summary table:**
+| Code uses | Upload | Optimized output | Write back to code |
+|---|---|---|---|
+| \`{single}\` | as-is | \`{single}\` | as-is |
+| \`{{double}}\` | convert to \`{single}\` | \`{single}\` | convert back to \`{{double}}\` |
+---
+## Per-file inference — \`mutagent explore\`
+The CLI's \`mutagent explore\` command calls \`inferPromptVariables()\` on every
+matching source file and **infers the delimiter per file** rather than
+globally. A single repository may contain both LangChain \`PromptTemplate\`
+(single) and Handlebars email templates (double) side by side.
+### Inference algorithm
+1. **Strip fenced markdown code blocks** first (\` \`\`\` ... \`\`\` \`). Avoids false
+   positives from prompts that document JSON examples in fenced blocks.
+2. **Count \`{{name}}\` matches** → \`doubleHits\`.
+3. **Count \`{name}\` matches** that are NOT adjacent to \`{\` (not part of \`{{...}}\`)
+   and NOT followed by \`"\` (JSON-key skip) → \`singleHits\`.
+4. **Majority wins**: \`doubleHits > singleHits\` → \`double\`, else \`single\`.
+5. **Tie-break**: singleHits === doubleHits (including the 0/0 case) →
+   \`single\` (platform canonical).
+### Escaped-JSON caveat
+Prompts like \`"Return {\\"status\\": \\"ok\\"}"\` — the \`{\` is followed by \`"\`, so
+the single-brace regex deliberately skips it. Never treat literal JSON keys as
+template variables.
+---
+## How to use the delimiter field
+\`mutagent explore --json\` surfaces the inferred delimiter per discovered prompt:
+\`\`\`json
+{
+  "prompts": [
+    {
+      "file": "src/prompts/summarize.ts",
+      "line": 12,
+      "preview": "const prompt = \`Summarize {document} for {audience}\`;",
+      "reason": "template-variable",
+      "confidence": "high",
+      "delimiter": "single"
+    },
+    {
+      "file": "src/emails/welcome.hbs",
+      "line": 3,
+      "preview": "<p>Hello {{name}}, welcome to {{product}}!</p>",
+      "reason": "template-variable",
+      "confidence": "high",
+      "delimiter": "double"
+    }
+  ]
+}
+\`\`\`
+Use the delimiter field to:
+- Enumerate variables correctly (don't treat \`{{foo}}\` as two \`{foo}\` tokens).
+- Decide whether to convert before upload (Phase 1 above).
+- Drive the [concepts/eval-criteria.md](./eval-criteria.md) → MVC step (one criterion per variable).
+---
+## Edge cases
+- **Empty prompt** — no variables → tie (0/0) → \`single\` (canonical).
+- **Mixed delimiters in one file** — majority wins, tied files default to \`single\`.
+  Warn the user: their codebase probably has two prompt systems co-existing.
+- **Nested braces** \`{{{ foo }}}\` — Handlebars triple-brace (no-escape). Currently
+  matched by the double regex as \`{{ foo }}\`; outer \`}\` ignored. Fine for inference.
+---
+## Cross-references
+- [SKILL.md](../SKILL.md) → 5 rules + journey router
+- [workflows/optimization.md](../workflows/optimization.md) → step 3 (delimiter drives variable enumeration) and step 15 (apply conversion)
+- [concepts/eval-criteria.md](./eval-criteria.md) → MVC (Minimum Viable Context) — uses delimiter to enumerate input params
+- Source: \`mutagent-cli/src/lib/explorer.ts\` → \`inferPromptVariables()\` and \`DiscoveredPrompt.delimiter\`
+- Tests: \`mutagent-cli/src/__tests__/lib/explorer.test.ts\`
+`,
+  "workflows/agents.md": `---
+name: mutagent-cli-workflows-agents
+description: |
+  Agent path (multi-turn, tool-calling) — WORK IN PROGRESS.
+  Agent optimization & evaluation are actively in development.
+  This workflow short-circuits to a partnership link, offers read-only CRUD,
+  and optionally extracts a sub-prompt back to the Optimization path.
+triggers:
+  - "agent"
+  - "langchain agent"
+  - "langgraph"
+  - "crewai"
+  - "autogen"
+  - "openai agents"
+  - "tool calling"
+  - "multi-turn"
+  - "AgentExecutor"
+  - "createReactAgent"
+  - "StateGraph"
+---
+# Workflow — Agents (WIP)
+> **This path is WORK IN PROGRESS.** Agent optimization and evaluation are
+> actively in development. Do NOT run a multi-turn agent through the prompt
+> optimizer — the platform will reject it, and scores would be meaningless for
+> a tool-calling loop.
+Read the **5 rules** in [SKILL.md](../SKILL.md) before executing.
+---
+## When this workflow applies
+Load this file when the journey router in [SKILL.md](../SKILL.md) matched one of these
+signals in the user's code:
+- \`AgentExecutor\`, \`createReactAgent\`, \`createToolCallingAgent\`, \`createStructuredChatAgent\`
+- \`from "openai/agents"\` or \`from "@openai/agents"\`
+- \`from "crewai"\`, \`from "autogen"\`, \`from "autogen_agentchat"\`
+- \`from "@langchain/langgraph"\`, \`StateGraph(\`
+- \`tool_calls\` / \`toolCalls\` property access
+- \`@tool\` decorator (Python)
+- \`tools: [...]\` array in an LLM call config
+- \`function_call\` / \`tool_choice\` fields
+- \`while\` loop + LLM call
+\`mutagent explore --json\` flags these under \`agents[]\` (not \`prompts[]\`).
+---
+## Required card (show this verbatim to the user)
+When the agent path is triggered, copy this card into your chat response
+verbatim — do NOT paraphrase, do NOT collapse into a bash block:
+\`\`\`
+I see you have an Agent (multi-turn / tool-calling). Agent Optimization &
+Evaluations are actively in development in MutagenT. For early access and
+to partner with us on the roadmap:
+→ https://www.mutagent.io/agents-partnership
+\`\`\`
+---
+## Sequence
+\`\`\`
+1. Run \`mutagent explore --json\` if you haven't already.
+   → Confirm agents[] is non-empty.
+   → Show command output to user.
+2. Show the WIP card above verbatim in your chat response.
+3. Use AskUserQuestion to explain that agent code cannot be directly optimized:
+   "Your code looks like a multi-turn agent. I can't run it
+   through the prompt optimizer yet. Would you like to:
+   (a) Join early access → https://www.mutagent.io/agents-partnership
+   (b) Inspect existing agents in MutagenT (read-only CRUD)
+   (c) Extract a single sub-prompt inside the agent loop and optimize it"
+4. Branch:
+   (a) → surface the URL verbatim. STOP.
+   (b) → \`mutagent agents list --json\` ; \`mutagent agents get <id> --json\`
+          → show results to user. STOP (no mutations available).
+   (c) → extract one sub-prompt, then route to [workflows/optimization.md](./optimization.md)
+          treating the sub-prompt as a standalone Prompt.
+\`\`\`
+---
+## Branch (c) — extracting a sub-prompt
+Multi-turn agents often contain inner prompts that ARE suitable for the
+Optimization path:
+- a planner prompt ("given this user goal, list the tools you'd call")
+- a summarizer prompt ("given these tool results, write the final answer")
+- a classifier prompt ("which tool should handle this input?")
+Each of these is a single-shot prompt with a clear output schema. Extract ONE,
+treat it as a standalone Prompt.
+When extracting:
+1. Identify the exact string literal or template that becomes the sub-prompt.
+2. Enumerate its \`{variables}\` per [concepts/prompt-variables.md](../concepts/prompt-variables.md).
+3. Confirm with user: "I'll optimize the planner prompt only, not the full agent. Sound right?"
+4. On confirmation → load [workflows/optimization.md](./optimization.md) from step 3 (prompts create).
+Do NOT try to extract the whole agent loop at once.
+---
+## What NOT to do
+- **Do not** call \`mutagent prompts optimize start\` on an agent file.
+- **Do not** upload an agent's full system prompt + tool definitions as a single "prompt".
+- **Do not** suggest "try it anyway" — the WIP status is deliberate.
+- **Do not** skip showing the WIP card to the user — they need the partnership URL.
+---
+## Cross-references
+- [SKILL.md](../SKILL.md) → 5 rules + journey router
+- [workflows/exploration.md](./exploration.md) → where \`agents[]\` entries are first detected
+- [workflows/optimization.md](./optimization.md) → branch (c) destination
+- [concepts/prompt-variables.md](../concepts/prompt-variables.md) → \`{foo}\` vs \`{{foo}}\` for sub-prompt extraction
+- Partnership link: https://www.mutagent.io/agents-partnership
+`,
+  "workflows/exploration.md": `---
+name: mutagent-cli-workflows-exploration
+description: |
+  Read-only codebase scan workflow. Discovers prompts and agents, classifies
+  each as optimization-eligible (prompt) or WIP (agent), and presents a
+  structured taxonomy to the user before any write operations.
+triggers:
+  - "explore"
+  - "scan codebase"
+  - "find prompts"
+  - "what prompts do I have"
+  - "discover prompts"
+  - "what's in my codebase"
+---
+# Workflow — Exploration (Read-Only Discovery)
+> **This workflow is read-only.** No writes, no uploads, no mutations.
+> Use it to understand what's in the codebase before deciding next steps.
+Read the **5 rules** in [SKILL.md](../SKILL.md) before executing. Key reminders:
+- \`--json\` on every command
+- \`<command> --help\` before first use
+- This workflow ends with a user question — never auto-proceed to writes
+---
+## When this workflow applies
+- User said "explore my codebase", "what prompts do I have", "find prompts", "scan"
+- Intent is unclear and you need to discover before acting
+- User wants to understand what's optimizable before committing to a path
+---
+## Prompt vs Agent — taxonomy
+Before running explore, understand what the CLI will return:
+**PROMPT (optimization-eligible)**
+- Single LLM call: input → LLM → output
+- Template with \`{variables}\` (single) or \`{{variables}}\` (double brace)
+- Can be evaluated with G-Eval (input/output pair)
+- Can be optimized by Metatuner
+- Appears in \`prompts[]\` in explore output
+**AGENT (NOT directly optimizable)**
+- Multi-turn loop: input → LLM → tool → LLM → ... → output
+- Dynamic branching execution
+- Cannot be optimized like a prompt (no fixed output schema)
+- Requires agent-level evaluation (not yet available)
+- Appears in \`agents[]\` in explore output
+### Agent detection heuristics (what \`mutagent explore\` flags)
+| Pattern | Framework | Classification |
+|---|---|---|
+| \`new StateGraph(\` / \`StateGraph.compile()\` | LangGraph | agent |
+| \`new Agent(\` / \`agent.execute()\` | Mastra / custom | agent |
+| \`AgentExecutor\` / \`createReactAgent\` / \`createToolCallingAgent\` | LangChain | agent |
+| \`from "openai/agents"\` / \`from "@openai/agents"\` | OpenAI Agents SDK | agent |
+| \`from "crewai"\` / \`from "autogen"\` | CrewAI / AutoGen | agent |
+| \`tools: [...]\` + LLM call in a loop | custom | agent |
+| \`while\` loop + LLM call | custom agent loop | agent |
+| \`@tool\` decorator (Python) | any | agent |
+| \`tool_calls\` / \`toolCalls\` property access | any | agent |
+| Single \`openai.chat()\` or template string | — | prompt |
+| String with \`{variable}\` / \`{{variable}}\` | — | prompt |
+---
+## Workflow steps
+\`\`\`
+1. mutagent explore --json
+   → surfaces: prompts[], agents[], datasets[], markers[]
+   → show command output to user
+2. Classify results:
+   - prompts[]  → optimization-eligible (single-shot, output schema)
+   - agents[]   → WIP (multi-turn/tool-calling) — route to [workflows/agents.md](./agents.md)
+   - datasets[] → existing local data (uploadable in optimization workflow)
+   - markers[]  → already-uploaded items (show dashboard links)
+3. Note the \`delimiter\` field on each prompt entry:
+   - "single" → {variable} — MutagenT native, no conversion needed
+   - "double" → {{variable}} — framework template, conversion required on upload
+   See [concepts/prompt-variables.md](../concepts/prompt-variables.md) for the conversion rules.
+4. Use AskUserQuestion to present findings and ask which prompts to upload:
+   "Here's what I found in your codebase:
+    - N prompt(s) found: [list files]
+    - N agent(s) found: [list files] (WIP — not optimizable yet)
+    - N dataset(s) found: [list]
+    - N already-uploaded: [list]
+    What would you like to do?"
+5. Route based on user answer:
+   - "optimize this prompt" → load [workflows/optimization.md](./optimization.md)
+   - "add tracing" → load [workflows/tracing.md](./tracing.md)
+   - "tell me about the agent" → load [workflows/agents.md](./agents.md)
+   - "nothing yet" → STOP (read-only complete)
+\`\`\`
+---
+## Output handling
+After step 1, show the command output to the user before proceeding to classification. Do NOT proceed to step 2 until the user has seen the results.
+---
+## Brace convention note
+\`mutagent explore --json\` surfaces \`delimiter: "single" | "double"\` per discovered prompt. Use this before deciding how to enumerate variables. See [concepts/prompt-variables.md](../concepts/prompt-variables.md) for the full inference contract and conversion rules.
+---
+## Common pitfalls
+- Skipping the classification step → user gets a raw JSON dump instead of a next-action recommendation
+- Treating \`agents[]\` entries as optimization-eligible → they are NOT; route to [workflows/agents.md](./agents.md)
+- Auto-proceeding to writes after explore → always confirm with user first
+- Ignoring the \`delimiter\` field → wrong variable enumeration when uploading a double-brace prompt
+---
+## Cross-references
+- [SKILL.md](../SKILL.md) → 5 rules + journey router
+- [concepts/prompt-variables.md](../concepts/prompt-variables.md) → \`{foo}\` vs \`{{foo}}\` inference + conversion
+- [workflows/optimization.md](./optimization.md) → next step after exploration (prompt path)
+- [workflows/tracing.md](./tracing.md) → next step after exploration (integration path)
+- [workflows/agents.md](./agents.md) → next step after exploration (agent path)
+`,
+  "workflows/optimization.md": `---
+name: mutagent-cli-workflows-optimization
+description: |
+  Full prompt optimization journey: explore → prompts create → dataset add →
+  evaluation create (guided) → optimize start → watch/status → results → apply.
+  Enforces 5 rules: --json always, --help before first use, user-collected
+  eval criteria, explore-before-modify, cost transparency before optimize.
+triggers:
+  - "optimize prompt"
+  - "improve prompt"
+  - "tune prompt"
+  - "evaluate prompt"
+  - "upload prompt"
+  - "create evaluation"
+  - "upload dataset"
+  - "run optimizer"
+  - "start optimization"
+---
+# Workflow — Optimization (Full Journey)
+> **This is the full loop.** Expect 5-10 CLI calls and at least one long-running
+> optimizer job. Each step requires user confirmation. Never auto-run the full
+> chain without presenting findings at each gate.
+Read the **5 rules** in [SKILL.md](../SKILL.md) before executing. All 5 rules apply here:
+- \`--json\` on every command (Rule 1)
+- \`--help\` before first use of any command (Rule 2)
+- **NEVER auto-generate eval criteria** — collect from user (Rule 3)
+- \`mutagent explore --json\` before any write (Rule 4)
+- \`mutagent usage --json\` before \`optimize start\` (Rule 5)
+---
+## When this workflow applies
+- User said "optimize prompt", "improve prompt", "tune prompt"
+- User wants to upload a prompt and measure its quality
+- User wants to run the Metatuner optimizer
+---
+## Required pre-reads (load these before the relevant steps)
+| Step | Pre-read | Why |
+|---|---|---|
+| Before \`prompts create\` | [concepts/prompt-variables.md](../concepts/prompt-variables.md) | Brace convention — single \`{var}\` vs double \`{{var}}\` affects how variables are parsed |
+| Before \`evaluation create --guided\` | [concepts/eval-criteria.md](../concepts/eval-criteria.md) | INPUT MVC + OUTPUT Standards — 3-tier rubric format |
+---
+## Directive chain
+\`\`\`
+explore → prompts create → dataset add → evaluation create --guided
+  → [Use AskUserQuestion to collect rubrics from the user for each field]
+  → evaluation create -d '<json>'
+  → usage check
+  → [Use AskUserQuestion to confirm optimization cost with user]
+  → optimize start
+  → optimize status (poll)
+  → optimize results
+  → [Use AskUserQuestion to present scorecard: Apply / View Diff / Reject]
+  → On Apply: Edit source file
+\`\`\`
+---
+## Full workflow steps
+\`\`\`
+ 1. mutagent explore --json
+    → find candidate prompts in codebase
+    → show command output to user
+    → confirm with user: "Which prompt would you like to optimize?"
+ 2. mutagent prompts --help
+    mutagent prompts create --help
+    → read flags before using (Rule 2)
+ 3. Load [concepts/prompt-variables.md](../concepts/prompt-variables.md)
+    → determine if prompt uses {single} or {{double}} braces
+    → if double-brace: warn user about conversion requirement
+ 4. mutagent prompts create --name <name> [--system-file / --raw-file] --json
+    → show command output to user
+    → record promptId from response
+ 5. mutagent prompts dataset add --help
+    → read flags before using
+ 6. mutagent prompts dataset add <promptId> -d '[...]' --name "<name>" --json
+    → upload dataset rows (input/output pairs)
+    → show command output to user
+    → record datasetId
+ 7. Load [concepts/eval-criteria.md](../concepts/eval-criteria.md)
+    → understand INPUT-param (MVC) vs OUTPUT-param (Standards) scope
+ 8. mutagent prompts evaluation create <promptId> --guided --json
+    → the CLI provides a list of fields, each needing a rubric
+    → follow the CLI's next-step guidance in the output
+    → for EVERY field listed (INPUT scope first, then OUTPUT):
+        - ask the user the provided question for that field
+        - wait for user response
+        - do NOT skip any field
+        - do NOT auto-generate any answer
+    → collect at minimum: one INPUT criterion per {variable}, one OUTPUT criterion
+ 9. mutagent prompts evaluation create <promptId> -d '<json>' --json
+    → upload the criteria collected in step 8
+    → show command output to user
+    → record evaluationId
+10. mutagent usage --json
+    → show usage/quota to user
+    → confirm with user: "This optimization will use N iterations (~X min each).
+       You have Y remaining. Proceed?"
+    → STOP if user declines
+11. mutagent prompts optimize start <promptId> \\
+      --dataset <datasetId> \\
+      --evaluation <evaluationId> \\
+      --max-iterations 1 \\
+      --json
+    → NEVER set --max-iterations > 1 without explicit user consent
+    → record jobId from response
+12. mutagent prompts optimize status <jobId> --json
+    → poll until status = "completed" or "failed"
+    → show progress to user
+13. mutagent prompts optimize results <jobId> --json
+    → ALWAYS show before/after scorecard to user
+14. Confirm with user: "Here's the before/after scorecard. What would you like to do?
+    (a) Apply — update the prompt in your source file
+    (b) View diff first
+    (c) Reject — keep the original"
+15. On Apply (a): Edit the prompt in the user's source file
+    → replace old prompt text with optimized version
+    → if double-brace codebase: convert {variable} back to {{variable}}
+    → confirm with user before saving
+\`\`\`
+---
+## Cost control
+- Default \`--max-iterations 1\` is the only value you may use without explicit consent.
+- If user requests more: confirm the number with user → confirm the cost implication.
+- Each iteration = one full G-Eval run over the dataset × LLM calls. This costs real money.
+---
+## Apply / Reject rules
+- **Apply**: edit the source file with the optimized prompt. If the codebase used \`{{double}}\` braces, convert the optimized \`{single}\` brace output back to \`{{double}}\` before writing. See [concepts/prompt-variables.md](../concepts/prompt-variables.md) → Conversion.
+- **Reject**: no file changes. Record the jobId in \`.mutagent/mutation-context.md\` for future reference.
+- **View diff**: show a unified diff of old vs new prompt text before asking again.
+---
+## Common pitfalls
+- Running \`optimize start\` before \`evaluation create\` → optimizer has no scoring signal
+- Mixing INPUT and OUTPUT criteria in the same rubric → vague scores
+- Applying results without showing the before/after scorecard first
+- Forgetting to convert \`{single}\` back to \`{{double}}\` after apply in double-brace codebases
+- Starting with \`--max-iterations 3\` without consent
+---
+## Guided Dataset Creation
+When no local dataset exists, use the guided mode to curate high-quality test data:
+\`\`\`
+mutagent prompts dataset add <prompt-id> --guided --json
+\`\`\`
+The CLI analyzes the prompt's inputSchema + outputSchema and returns:
+- **Suggested categories**: edge cases, hard cases, representative cases
+- **Per-field questions**: what values, what edge cases, what correct output looks like
+- **Template item**: showing the expected shape for each dataset entry
+- **Priority rule**: hard cases that expose prompt weaknesses > easy cases that always pass
+Collect answers from the user, then construct 5-10 dataset items covering all categories.
+Ensure at least 2 hard/edge cases per category. Then upload:
+\`\`\`
+mutagent prompts dataset add <prompt-id> -d '<constructed-json>' --name '<name>' --json
+\`\`\`
+---
+## Cross-references
+- [SKILL.md](../SKILL.md) → 5 rules + journey router
+- [concepts/prompt-variables.md](../concepts/prompt-variables.md) → brace convention + conversion (critical for steps 3 and 15)
+- [concepts/eval-criteria.md](../concepts/eval-criteria.md) → INPUT MVC + OUTPUT Standards + 3-tier rubric (critical for steps 7-8)
+- [workflows/exploration.md](./exploration.md) → step 1 of this workflow
+- [workflows/tracing.md](./tracing.md) → parallel or follow-up path
+`,
+  "workflows/tracing.md": `---
+name: mutagent-cli-workflows-tracing
+description: |
+  Framework integration workflow. Adds MutagenT tracing/observability to the
+  user's codebase. Non-destructive (append-only), fastest first-value path.
+  Detects framework via \`mutagent explore\`, generates snippet via
+  \`mutagent integrate\`, applies via Edit tool, verifies via traces list.
+triggers:
+  - "add tracing"
+  - "add observability"
+  - "integrate framework"
+  - "integrate mutagent"
+  - "add mutagent to my code"
+  - "instrument my prompts"
+  - "trace my prompts"
+---
+# Workflow — Tracing (Framework Integration)
+> **Scope**: read + append-only on user code. Never modify existing business
+> logic — only add tracing imports and decorators.
+Read the **5 rules** in [SKILL.md](../SKILL.md) before executing. Key reminders:
+- \`--json\` on every command
+- \`<command> --help\` before first use
+- Explore before modify (Rule 4)
+- Show command output to user after every mutation
+---
+## When this workflow applies
+- User said "add tracing", "add observability", "integrate \\<framework\\>"
+- User wants to see their prompts captured in the MutagenT dashboard
+- Fastest path to first value — prefer this before suggesting optimization
+---
+## Supported frameworks
+\`mutagent integrate --help\` is the authoritative list. Common entries:
+| Framework | Signal in codebase |
+|---|---|
+| LangChain | \`from "langchain"\`, \`PromptTemplate\`, \`ChatPromptTemplate\` |
+| LangGraph | \`from "@langchain/langgraph"\`, \`StateGraph\` |
+| OpenAI SDK | \`import OpenAI\`, \`openai.chat.completions.create\` |
+| Vercel AI SDK | \`import { generateText }\` from \`"ai"\` |
+| Mastra | \`from "@mastra/core"\` |
+| Custom / raw | any string template with \`{variable}\` |
+---
+## Workflow steps
+\`\`\`
+1. mutagent explore --json
+   → detect which framework is in use
+   → show command output to user
+2. mutagent integrate --help
+   → read the available frameworks and flags (ALWAYS before step 3)
+3. mutagent integrate <framework> --json
+   → get the integration snippet
+4. Use AskUserQuestion to confirm before applying code changes: "I'll add the tracing snippet to <file>. Proceed?"
+   → show the snippet preview before applying
+5. Apply snippet via Edit tool
+   → code change happens here — append imports + decorators only
+   → never touch existing business logic
+6. mutagent traces list --json
+   → verify integration: check traces count > 0
+   → show results and dashboard link to user
+\`\`\`
+---
+## Scope guard
+This path is **READ + APPEND-ONLY** on the user's code:
+- ✓ Add import at top of file
+- ✓ Wrap existing function call with tracing decorator
+- ✗ Rename variables
+- ✗ Refactor logic
+- ✗ Remove existing code
+If the integration snippet requires a significant rewrite, confirm scope with the user before proceeding.
+---
+## Post-integration state
+After step 6:
+- Update \`.mutagent/mutation-context.md\` with the integration marker
+- Show the dashboard link from traces output so user can verify traces in UI
+- If user wants to optimize the traced prompt → route to [workflows/optimization.md](./optimization.md)
+---
+## Common pitfalls
+- Skipping step 1 and guessing the framework → let \`explore\` detect it
+- Forgetting step 6 → user has no proof the integration works
+- Editing files outside the \`integrate --json\` snippet block
+- Not showing command results to the user after mutations
+---
+## Cross-references
+- [SKILL.md](../SKILL.md) → 5 rules + journey router
+- [workflows/exploration.md](./exploration.md) → step 1 of this workflow
+- [workflows/optimization.md](./optimization.md) → natural next step after tracing
+- [concepts/prompt-variables.md](../concepts/prompt-variables.md) → variable inference for traced prompts
+`
+};
+var SKILL_CONTENT = SKILL_FILES["SKILL.md"] ?? "";
+// src/commands/skills.ts
+function findRepoRoot() {
+  try {
+    return execSync4("git rev-parse --show-toplevel", {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"]
+    }).trim();
+  } catch {
+    return process.cwd();
+  }
+}
+var SKILL_DIR = ".claude/skills/mutagent-cli";
+var SKILL_ENTRY_FILE = "SKILL.md";
+function assertSkillFilesValid() {
+  if (Object.keys(SKILL_FILES).length === 0) {
+    throw new Error("SKILL_FILES is missing or empty. This indicates a broken build: " + "scripts/sync-skill.ts must regenerate src/generated/skill-content.ts " + "from .claude/skills/mutagent-cli/ before build/test. " + "Run `bun run sync-skill` to debug.");
+  }
+  const entry = SKILL_FILES[SKILL_ENTRY_FILE];
+  if (typeof entry !== "string" || entry.trim().length === 0) {
+    throw new Error(`SKILL_FILES["${SKILL_ENTRY_FILE}"] is missing or empty. ` + "Check .claude/skills/mutagent-cli/SKILL.md and run `bun run sync-skill`.");
+  }
+  if (!entry.startsWith("---")) {
+    throw new Error(`SKILL_FILES["${SKILL_ENTRY_FILE}"] is malformed: expected YAML frontmatter ("---") at start.`);
+  }
+}
+function getSkillFiles() {
+  assertSkillFilesValid();
+  return SKILL_FILES;
+}
+function createSkillsCommand() {
+  const skills = new Command17("skills").description("Manage MutagenT CLI skills for coding agents");
+  skills.command("install").description("Install MutagenT CLI skill for Claude Code").addHelpText("after", `
+Examples:
+  ${chalk36.dim("$")} mutagent skills install
+This creates a Claude Code skill at .claude/skills/mutagent-cli/SKILL.md
+that teaches coding agents how to use the MutagenT CLI effectively.
+    `).action((_options, cmd) => {
+    const parentCmd = cmd.parent?.parent;
+    const isJson = parentCmd ? getJsonFlag(parentCmd) : false;
+    const output = new OutputFormatter(isJson ? "json" : "table");
+    const repoRoot = findRepoRoot();
+    const skillDir = join7(repoRoot, SKILL_DIR);
+    const files = getSkillFiles();
+    const writtenFiles = [];
+    let totalBytes = 0;
+    const sortedKeys = Object.keys(files).sort((a, b) => {
+      if (a === SKILL_ENTRY_FILE)
+        return -1;
+      if (b === SKILL_ENTRY_FILE)
+        return 1;
+      return a.localeCompare(b);
+    });
+    for (const relPath of sortedKeys) {
+      const destPath = join7(skillDir, relPath);
+      const parentDir = dirname(destPath);
+      if (!existsSync12(parentDir)) {
+        mkdirSync4(parentDir, { recursive: true });
+      }
+      const raw = files[relPath] ?? "";
+      const finalContent = raw.endsWith(`
+`) ? raw : `${raw}
+`;
+      writeFileSync5(destPath, finalContent, "utf-8");
+      writtenFiles.push({ path: destPath, bytes: finalContent.length });
+      totalBytes += finalContent.length;
+    }
+    if (isJson) {
+      output.output({
+        installed: true,
+        name: "mutagent-cli",
+        skillDir,
+        files: writtenFiles,
+        totalBytes
+      });
+    } else {
+      output.success(`Installed MutagenT CLI skill`);
+      console.log(`  ${chalk36.dim("Dir:")} ${skillDir}`);
+      console.log("");
+      for (let i = 0;i < writtenFiles.length; i++) {
+        const entry = writtenFiles[i];
+        if (!entry)
+          continue;
+        const relPath = sortedKeys[i] ?? "";
+        const isLast = i === writtenFiles.length - 1;
+        const prefix = isLast ? "└─" : "├─";
+        console.log(`  ${chalk36.dim(prefix)} ${relPath} ${chalk36.dim(`(${String(entry.bytes)} bytes)`)}`);
+      }
+      console.log("");
+      console.log(`  ${chalk36.dim(`${String(sortedKeys.length)} file(s), ${String(totalBytes)} bytes total.`)}`);
+      console.log(`  ${chalk36.dim("This skill teaches coding agents how to use the MutagenT CLI.")}`);
+      console.log(`  ${chalk36.dim("It will be automatically loaded by Claude Code when relevant triggers match.")}`);
+    }
+  });
+  return skills;
+}
+// src/commands/usage.ts
+init_config();
+import { Command as Command18 } from "commander";
+import chalk37 from "chalk";
+init_errors();
+init_sdk_client();
+var PROVIDERS_URL = "https://app.mutagent.io/settings/providers";
+function createUsageCommand() {
+  const usage = new Command18("usage").description("Show resource counts (prompts, datasets, evaluations, optimizations, experiments)").addHelpText("after", `
+Examples:
+  ${chalk37.dim("$")} mutagent usage
+  ${chalk37.dim("$")} mutagent usage --json
+    `);
+  usage.action(async () => {
+    const isJson = getJsonFlag(usage);
     const output = new OutputFormatter(isJson ? "json" : "table");
     try {
       const apiKey = getApiKey();
@@ -9143,17 +10544,17 @@ Examples:
         });
       } else {
         console.log("");
-        console.log(chalk36.bold("\uD83D\uDCCA MutagenT Usage"));
-        console.log(chalk36.dim("─".repeat(45)));
+        console.log(chalk37.bold("\uD83D\uDCCA MutagenT Usage"));
+        console.log(chalk37.dim("─".repeat(45)));
         console.log("");
-        console.log(chalk36.bold("Resources:"));
-        console.log(`  Prompts:         ${chalk36.cyan(String(promptCount))}`);
-        console.log(`  Datasets:        ${chalk36.cyan(String(datasetCount))}`);
-        console.log(`  Evaluations:     ${chalk36.cyan(String(evaluationCount))}`);
-        console.log(`  Optimizations:   ${chalk36.cyan(String(optimizationCount))}`);
-        console.log(`  Experiments:     ${chalk36.cyan(String(experimentCount))}`);
+        console.log(chalk37.bold("Resources:"));
+        console.log(`  Prompts:         ${chalk37.cyan(String(promptCount))}`);
+        console.log(`  Datasets:        ${chalk37.cyan(String(datasetCount))}`);
+        console.log(`  Evaluations:     ${chalk37.cyan(String(evaluationCount))}`);
+        console.log(`  Optimizations:   ${chalk37.cyan(String(optimizationCount))}`);
+        console.log(`  Experiments:     ${chalk37.cyan(String(experimentCount))}`);
         console.log("");
-        console.log(`  Providers: ${chalk36.underline(PROVIDERS_URL)}`);
+        console.log(`  Providers: ${chalk37.underline(PROVIDERS_URL)}`);
         console.log("");
       }
     } catch (error) {
@@ -9441,11 +10842,11 @@ Claude Code Session Telemetry:
 // src/commands/feedback.ts
 import { Command as Command20 } from "commander";
-import chalk37 from "chalk";
+import chalk38 from "chalk";
 init_errors();
 init_config();
 import { readFileSync as readFileSync11 } from "fs";
-import { join as join9, dirname } from "path";
+import { join as join9, dirname as dirname2 } from "path";
 import { fileURLToPath } from "url";
 var VALID_CATEGORIES = ["bug", "feature", "improvement", "praise"];
 function getCliVersion() {
@@ -9453,7 +10854,7 @@ function getCliVersion() {
     return process.env.CLI_VERSION;
   }
   try {
-    const __dirname2 = dirname(fileURLToPath(import.meta.url));
+    const __dirname2 = dirname2(fileURLToPath(import.meta.url));
     const pkgPath = join9(__dirname2, "..", "..", "package.json");
     const pkg = JSON.parse(readFileSync11(pkgPath, "utf-8"));
     return pkg.version ?? "0.1.1";
@@ -9495,12 +10896,12 @@ async function postToServer(payload, endpoint, apiKey, workspaceId, organization
 }
 function createFeedbackCommand() {
   const feedback = new Command20("feedback").description("Send product feedback to MutagenT").addHelpText("after", `
-${chalk37.bold("Examples:")}
-  ${chalk37.cyan('mutagent feedback send -m "Great optimization results!"')}
-  ${chalk37.cyan('mutagent feedback send -m "CLI crashed on export" --category bug')}
-  ${chalk37.cyan('mutagent feedback send -m "Need batch operations" --category feature --json')}
+${chalk38.bold("Examples:")}
+  ${chalk38.cyan('mutagent feedback send -m "Great optimization results!"')}
+  ${chalk38.cyan('mutagent feedback send -m "CLI crashed on export" --category bug')}
+  ${chalk38.cyan('mutagent feedback send -m "Need batch operations" --category feature --json')}
-${chalk37.yellow("AI Agent (MANDATORY):")}
+${chalk38.yellow("AI Agent (MANDATORY):")}
   ALWAYS use --json: mutagent feedback send -m "..." --category improvement --json
   Use this command to report bugs, request features, or share UX feedback.
     `).action(() => {
@@ -9511,18 +10912,18 @@ ${chalk37.yellow("AI Agent (MANDATORY):")}
 }
 function registerFeedbackSend(feedback) {
   feedback.command("send").description("Send feedback about the MutagenT platform").requiredOption("-m, --message <text>", "Feedback message").option("--category <type>", `Feedback category: ${VALID_CATEGORIES.join(", ")}`, "improvement").option("--session <id>", "Link feedback to a specific session").addHelpText("after", `
-${chalk37.bold("Examples:")}
-  ${chalk37.dim("$")} mutagent feedback send -m "The optimization UX could show progress better"
-  ${chalk37.dim("$")} mutagent feedback send -m "CLI errored on traces export" --category bug
-  ${chalk37.dim("$")} mutagent feedback send -m "Love the guided eval!" --category praise --json
-${chalk37.bold("Categories:")}
-  ${chalk37.bold("bug")}          Something is broken or not working as expected
-  ${chalk37.bold("feature")}      Request a new capability
-  ${chalk37.bold("improvement")}  Suggest a UX or workflow enhancement (default)
-  ${chalk37.bold("praise")}       Share what you love about the platform
-${chalk37.yellow("AI Agent (MANDATORY):")}
+${chalk38.bold("Examples:")}
+  ${chalk38.dim("$")} mutagent feedback send -m "The optimization UX could show progress better"
+  ${chalk38.dim("$")} mutagent feedback send -m "CLI errored on traces export" --category bug
+  ${chalk38.dim("$")} mutagent feedback send -m "Love the guided eval!" --category praise --json
+${chalk38.bold("Categories:")}
+  ${chalk38.bold("bug")}          Something is broken or not working as expected
+  ${chalk38.bold("feature")}      Request a new capability
+  ${chalk38.bold("improvement")}  Suggest a UX or workflow enhancement (default)
+  ${chalk38.bold("praise")}       Share what you love about the platform
+${chalk38.yellow("AI Agent (MANDATORY):")}
   ALWAYS use --json: mutagent feedback send -m "..." --json
   Auto-captured context (CLI version, platform, node version) is included automatically.
     `).action(async (options) => {
@@ -9579,7 +10980,7 @@ if (process.env.CLI_VERSION) {
   cliVersion = process.env.CLI_VERSION;
 } else {
   try {
-    const __dirname2 = dirname2(fileURLToPath2(import.meta.url));
+    const __dirname2 = dirname3(fileURLToPath2(import.meta.url));
     const pkgPath = join10(__dirname2, "..", "..", "package.json");
     const pkg = JSON.parse(readFileSync12(pkgPath, "utf-8"));
     cliVersion = pkg.version ?? cliVersion;
@@ -9594,90 +10995,100 @@ program.name("mutagent").description(`MutagenT CLI - AI-native prompt optimizati
   showGlobalOptions: true
 });
 program.addHelpText("after", `
-${chalk38.yellow("Non-Interactive Mode (CI/CD & Coding Agents):")}
-  export MUTAGENT_API_KEY=mt_...  ${chalk38.dim("or")}  --api-key mt_...
-  --json ${chalk38.dim("for structured output")}    --non-interactive ${chalk38.dim("to disable prompts")}
-${chalk38.yellow("Command Navigation:")}
-  mutagent login                        ${chalk38.dim("Login (browser OAuth — recommended)")}
-  mutagent auth status                  ${chalk38.dim("Check auth + workspace")}
-  mutagent init                         ${chalk38.dim("Initialize project (.mutagentrc.json)")}
-  mutagent explore                      ${chalk38.dim("Discover prompts in codebase")}
-  mutagent workspaces list --json       ${chalk38.dim("List workspaces (verify ID)")}
-  mutagent config set workspace <id>    ${chalk38.dim("Set active workspace")}
-  mutagent usage --json                 ${chalk38.dim("Show account usage + provider status")}
-  mutagent prompts create --help        ${chalk38.dim("Upload prompt (read help first!)")}
-  mutagent prompts list --json          ${chalk38.dim("List prompts")}
-  mutagent prompts get <id> --json      ${chalk38.dim("Full prompt details + schemas")}
-  mutagent prompts dataset add --help   ${chalk38.dim("Upload dataset (read help first!)")}
-  mutagent prompts dataset list <id>    ${chalk38.dim("List datasets")}
-  mutagent prompts evaluation create --help           ${chalk38.dim("Create eval (read help first!)")}
-  mutagent prompts evaluation create <id> --guided --json   ${chalk38.dim("Guided eval workflow")}
-  mutagent prompts evaluation list <id> --json        ${chalk38.dim("List evaluations")}
-  mutagent prompts optimize start --help      ${chalk38.dim("Run optimization (read help first!)")}
-  mutagent prompts optimize status <job-id>   ${chalk38.dim("Poll progress")}
-  mutagent prompts optimize results <job-id>  ${chalk38.dim("View scorecard")}
-  mutagent feedback send -m "..."         ${chalk38.dim("Send product feedback")}
-  mutagent feedback send -m "..." --category bug  ${chalk38.dim("Report a bug")}
-  mutagent integrate <framework>        ${chalk38.dim("Framework integration guide")}
-  mutagent hooks --help                 ${chalk38.dim("Hook setup for Claude Code telemetry")}
-  mutagent playground run <id> --input '{...}'  ${chalk38.dim("Quick test")}
-${chalk38.yellow("★ Workflow: Framework Integration (Tracing):")}
-  1. mutagent explore                                    ${chalk38.dim("← discover prompts/agents in codebase")}
-  2. mutagent integrate <framework>                      ${chalk38.dim("← get integration instructions")}
-  3. Apply tracing code to your codebase                 ${chalk38.dim("← follow the guide output")}
-  4. mutagent traces list --json                         ${chalk38.dim("← verify traces are arriving")}
-${chalk38.yellow("★ Workflow: Evaluate → Optimize:")}
-  1. mutagent prompts create --help               ${chalk38.dim("← read help")}
-  2. mutagent prompts create ... --json            ${chalk38.dim("← upload prompt with {variables} + inputSchema")}
-  3. mutagent prompts dataset add --help           ${chalk38.dim("← read help")}
-  4. mutagent prompts dataset add <id> ... --json  ${chalk38.dim("← upload dataset")}
-  5. mutagent prompts evaluation create <id> --guided --json  ${chalk38.dim("← guided eval")}
+${AGENTS_WIP_BANNER}
+${chalk39.yellow("Non-Interactive Mode (CI/CD & Coding Agents):")}
+  export MUTAGENT_API_KEY=mt_...  ${chalk39.dim("or")}  --api-key mt_...
+  --json ${chalk39.dim("for structured output")}    --non-interactive ${chalk39.dim("to disable prompts")}
+${chalk39.yellow("Command Navigation:")}
+  mutagent login                        ${chalk39.dim("Login (browser OAuth — recommended)")}
+  mutagent auth status                  ${chalk39.dim("Check auth + workspace")}
+  mutagent init                         ${chalk39.dim("Initialize project (.mutagentrc.json)")}
+  mutagent explore                      ${chalk39.dim("Discover prompts in codebase")}
+  mutagent workspaces list --json       ${chalk39.dim("List workspaces (verify ID)")}
+  mutagent config set workspace <id>    ${chalk39.dim("Set active workspace")}
+  mutagent usage --json                 ${chalk39.dim("Show account usage + provider status")}
+  mutagent prompts create --help        ${chalk39.dim("Upload prompt (read help first!)")}
+  mutagent prompts list --json          ${chalk39.dim("List prompts")}
+  mutagent prompts get <id> --json      ${chalk39.dim("Full prompt details + schemas")}
+  mutagent prompts dataset add --help   ${chalk39.dim("Upload dataset (read help first!)")}
+  mutagent prompts dataset list <id>    ${chalk39.dim("List datasets")}
+  mutagent prompts evaluation create --help           ${chalk39.dim("Create eval (read help first!)")}
+  mutagent prompts evaluation create <id> --guided --json   ${chalk39.dim("Guided eval workflow")}
+  mutagent prompts evaluation list <id> --json        ${chalk39.dim("List evaluations")}
+  mutagent prompts optimize start --help      ${chalk39.dim("Run optimization (read help first!)")}
+  mutagent prompts optimize status <job-id>   ${chalk39.dim("Poll progress")}
+  mutagent prompts optimize results <job-id>  ${chalk39.dim("View scorecard")}
+  mutagent feedback send -m "..."         ${chalk39.dim("Send product feedback")}
+  mutagent feedback send -m "..." --category bug  ${chalk39.dim("Report a bug")}
+  mutagent integrate <framework>        ${chalk39.dim("Framework integration guide")}
+  mutagent hooks --help                 ${chalk39.dim("Hook setup for Claude Code telemetry")}
+  mutagent playground run <id> --input '{...}'  ${chalk39.dim("Quick test")}
+${chalk39.yellow("★ Workflow: Framework Integration (Tracing):")}
+  1. mutagent explore                                    ${chalk39.dim("← discover prompts/agents in codebase")}
+  2. mutagent integrate <framework>                      ${chalk39.dim("← get integration instructions")}
+  3. Apply tracing code to your codebase                 ${chalk39.dim("← follow the guide output")}
+  4. mutagent traces list --json                         ${chalk39.dim("← verify traces are arriving")}
+${chalk39.yellow("★ Workflow: Evaluate → Optimize:")}
+  1. mutagent prompts create --help               ${chalk39.dim("← read help")}
+  2. mutagent prompts create ... --json            ${chalk39.dim("← upload prompt with {variables} + inputSchema")}
+  3. mutagent prompts dataset add --help           ${chalk39.dim("← read help")}
+  4. mutagent prompts dataset add <id> ... --json  ${chalk39.dim("← upload dataset")}
+  5. mutagent prompts evaluation create <id> --guided --json  ${chalk39.dim("← guided eval")}
   6. mutagent prompts optimize start <id> --dataset <d> --evaluation <e> --json
-${chalk38.yellow("Post-Onboarding Decision Tree:")}
-  After ${chalk38.bold("mutagent auth login")}, users land in one of 3 paths:
-  ${chalk38.bold("Path A")} (Tracing):      explore → integrate <framework> → apply tracing → verify
-  ${chalk38.bold("Path B")} (Optimization): explore → prompts create → dataset add → eval create → optimize
-  ${chalk38.bold("Path C")} (Manual):       Use CLI commands directly — run mutagent <command> --help
+${chalk39.yellow("Post-Onboarding Decision Tree:")}
+  After ${chalk39.bold("mutagent auth login")}, users land in one of 3 paths:
+  ${chalk39.bold("Path A")} (Tracing):      explore → integrate <framework> → apply tracing → verify
+  ${chalk39.bold("Path B")} (Optimization): explore → prompts create → dataset add → eval create → optimize
+  ${chalk39.bold("Path C")} (Manual):       Use CLI commands directly — run mutagent <command> --help
-${chalk38.yellow("Directive System:")}
+${chalk39.yellow("Directive System:")}
   Every --json response may include:
-  ${chalk38.bold("_directive.renderedCard")}  Pre-formatted card for the user ${chalk38.red("(MUST be shown in chat)")}
-  ${chalk38.bold("_directive.instruction")}   Next step for the agent
-  ${chalk38.bold("_directive.next")}          Array of suggested follow-up commands
-  ${chalk38.bold("_links")}                   Dashboard/API URLs (format as markdown links)
+  ${chalk39.bold("_directive.renderedCard")}  Pre-formatted card for the user ${chalk39.red("(MUST be shown in chat)")}
+  ${chalk39.bold("_directive.instruction")}   Next step for the agent
+  ${chalk39.bold("_directive.next")}          Array of suggested follow-up commands
+  ${chalk39.bold("_links")}                   Dashboard/API URLs (format as markdown links)
-${chalk38.yellow("Evaluation Criteria Format:")}
-  Each criterion MUST have: ${chalk38.bold("name")}, ${chalk38.bold("description")} (scoring rubric), ${chalk38.bold("evaluationParameter")}
+${chalk39.yellow("Evaluation Criteria Format:")}
+  Each criterion MUST have: ${chalk39.bold("name")}, ${chalk39.bold("description")} (scoring rubric), ${chalk39.bold("evaluationParameter")}
   evaluationParameter MUST match an inputSchema or outputSchema field name
   No duplicate evaluationParameter values — each criterion targets a unique field
   ALL schema fields must be covered (missing fields = error)
-  Use ${chalk38.bold("--guided --json")} to generate criteria templates from prompt schemas
-${chalk38.yellow("Optimization Cost Control:")}
-  Default max-iterations is 1. ${chalk38.red("NEVER increase without explicit user request.")}
+  Use ${chalk39.bold("--guided --json")} to generate criteria templates from prompt schemas
+  ${chalk39.bold("Two framings — coding agents MUST distinguish:")}
+  ${chalk39.bold("• INPUT fields → Minimum Viable Context")}
+    What info MUST be present in the input for the prompt to succeed.
+    Rubric should include a ${chalk39.bold("completeness scale")} (e.g., 0 = missing critical context, 5 = complete).
+    ${chalk39.dim("Input fields are typically `{variables}` in your prompt template.")}
+  ${chalk39.bold("• OUTPUT fields → Output Standards")}
+    Content correctness + structural correctness + a ${chalk39.bold("correctness scale")} (e.g., 0 = wrong, 5 = perfect).
+${chalk39.yellow("Optimization Cost Control:")}
+  Default max-iterations is 1. ${chalk39.red("NEVER increase without explicit user request.")}
   Each iteration incurs LLM costs — confirm with user before starting >1.
-${chalk38.yellow("Post-Optimization:")}
-  After ${chalk38.bold("optimize results")}: ALWAYS show the before/after diff to the user first.
-  Then offer choices: ${chalk38.bold("Apply")} / ${chalk38.bold("Reject")}.
+${chalk39.yellow("Post-Optimization:")}
+  After ${chalk39.bold("optimize results")}: ALWAYS show the before/after diff to the user first.
+  Then offer choices: ${chalk39.bold("Apply")} / ${chalk39.bold("Reject")}.
-${chalk38.yellow("State Tracking:")}
+${chalk39.yellow("State Tracking:")}
   .mutagent/mutation-context.md — Codebase index of discovered/uploaded prompts
   Update after explore, create, and dataset operations
   mutagent auth status — Auth + workspace state
   Comment markers (// MutagenT:START ... // MutagenT:END) in source files
-${chalk38.yellow("AI Agent Rules (MANDATORY for coding agents):")}
+${chalk39.yellow("AI Agent Rules (MANDATORY for coding agents):")}
   1. Login (two paths):
      - CI / fully automated: export MUTAGENT_API_KEY=mt_... then mutagent login --json
      - Helping a user onboard: mutagent login --browser --json
@@ -9688,15 +11099,15 @@ ${chalk38.yellow("AI Agent Rules (MANDATORY for coding agents):")}
   3. Run <command> --help BEFORE first use of any command
   4. Use --guided --json for evaluation creation (NEVER --guided alone)
   5. Parse _directive.renderedCard and copy it into your CHAT RESPONSE verbatim
-     ${chalk38.red("HARD STOP")}: do NOT run further commands until the card is rendered in chat
+     ${chalk39.red("HARD STOP")}: do NOT run further commands until the card is rendered in chat
   6. After mutagent init, verify workspace: mutagent workspaces list --json
   7. Use {single_braces} for template variables in prompts
   8. Collect evaluation criteria from the user — NEVER auto-generate
   9. ALL user interaction via AskUserQuestion — CLI is non-interactive
 ${!hasCredentials() ? `
-` + chalk38.yellow("  Warning: Not authenticated. Run: mutagent login") + `
+` + chalk39.yellow("  Warning: Not authenticated. Run: mutagent login") + `
 ` : ""}${!hasRcConfig() ? `
-` + chalk38.green("  Get started: mutagent init") + `
+` + chalk39.green("  Get started: mutagent init") + `
 ` : ""}`);
 var rawArgs = process.argv.slice(2);
 if (rawArgs.includes("-v") || rawArgs.includes("--version")) {
@@ -9737,5 +11148,5 @@ program.addCommand(createHooksCommand());
 program.addCommand(createFeedbackCommand());
 program.parse();
-//# debugId=C3949CD3A9488F4264756E2164756E21
+//# debugId=4EA6EB9078B51CC264756E2164756E21
 //# sourceMappingURL=cli.js.map