@mutagent/cli 0.1.116 → 0.1.117

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/cli.js CHANGED
@@ -1241,9 +1241,9 @@ var init_sdk_client = __esm(() => {
1241
1241
 
1242
1242
  // src/bin/cli.ts
1243
1243
  import { Command as Command21 } from "commander";
1244
- import chalk38 from "chalk";
1244
+ import chalk39 from "chalk";
1245
1245
  import { readFileSync as readFileSync12 } from "fs";
1246
- import { join as join10, dirname as dirname2 } from "path";
1246
+ import { join as join10, dirname as dirname3 } from "path";
1247
1247
  import { fileURLToPath as fileURLToPath2 } from "url";
1248
1248
 
1249
1249
  // src/commands/auth.ts
@@ -1683,12 +1683,61 @@ import { resolve as resolve2 } from "path";
1683
1683
  // src/lib/explorer.ts
1684
1684
  import { readdirSync, readFileSync as readFileSync3, statSync } from "fs";
1685
1685
  import { join as join3, relative, extname, basename } from "path";
1686
- var TEMPLATE_VAR_PATTERN = /\{\{[a-zA-Z_][a-zA-Z0-9_]*\}\}/;
1686
+ var DOUBLE_VAR_PATTERN = /\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}/g;
1687
+ var SINGLE_VAR_PATTERN = /(?<![\\{])\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}(?!\})/g;
1688
+ var FENCED_CODE_PATTERN = /```[\s\S]*?```/g;
1687
1689
  var PROMPT_NAME_PATTERN = /(?:const|let|var|export)\s+\w*(?:prompt|system|agent|instruction|template)\w*\s*=/i;
1688
1690
  var SCHEMA_PATTERN = /["']?(?:inputSchema|outputSchema|properties|required)["']?\s*[=:]/;
1689
1691
  var ZOD_PROMPT_SCHEMA_PATTERN = /(?:const|let|var|export)\s+\w*(?:prompt|output|input|response|schema|message)\w*\s*=\s*z\.object\s*\(/i;
1690
1692
  var PYDANTIC_PATTERN = /class\s+\w+\s*\(\s*(?:BaseModel|BaseSettings)\s*\)/;
1691
1693
  var MARKER_START_PATTERN = /MutagenT:START\s+(\w+)(?:\s+id=(\S+))?/;
1694
+ var AGENT_HIGH_PATTERNS = [
1695
+ { pattern: /\bAgentExecutor\b/, reason: "langchain-agent-executor" },
1696
+ { pattern: /\bcreate(?:React|OpenAIFunctions|Tool(?:Calling)?|Structured(?:Chat)?)Agent\b/, reason: "langchain-create-agent" },
1697
+ { pattern: /\bnew\s+Agent\s*\(/, reason: "agent-constructor" },
1698
+ { pattern: /from\s+['"]openai\/agents['"]/, reason: "openai-agents-sdk" },
1699
+ { pattern: /from\s+['"]@openai\/agents['"]/, reason: "openai-agents-sdk" },
1700
+ { pattern: /from\s+['"]crewai['"]/, reason: "crewai-import" },
1701
+ { pattern: /from\s+['"]autogen(?:_agentchat)?['"]/, reason: "autogen-import" },
1702
+ { pattern: /from\s+['"]@?langgraph['"]/, reason: "langgraph-import" },
1703
+ { pattern: /from\s+['"]@langchain\/langgraph['"]/, reason: "langgraph-import" },
1704
+ { pattern: /\bStateGraph\s*\(/, reason: "langgraph-stategraph" }
1705
+ ];
1706
+ var AGENT_MEDIUM_PATTERNS = [
1707
+ { pattern: /\btool_calls\b/, reason: "tool-calls" },
1708
+ { pattern: /\btoolCalls\b/, reason: "tool-calls" },
1709
+ { pattern: /^\s*tools\s*[:=]\s*\[/, reason: "tools-array" },
1710
+ { pattern: /@tool\b/, reason: "tool-decorator" },
1711
+ { pattern: /\bfunction_call\b/, reason: "function-call" },
1712
+ { pattern: /\btool_choice\b/, reason: "tool-choice" }
1713
+ ];
1714
+ function inferPromptVariables(content) {
1715
+ const stripped = content.replace(FENCED_CODE_PATTERN, "");
1716
+ const doubleMatches = [];
1717
+ const singleMatches = [];
1718
+ const dbl = new RegExp(DOUBLE_VAR_PATTERN.source, "g");
1719
+ let m;
1720
+ while ((m = dbl.exec(stripped)) !== null) {
1721
+ if (m[1])
1722
+ doubleMatches.push(m[1]);
1723
+ }
1724
+ const sgl = new RegExp(SINGLE_VAR_PATTERN.source, "g");
1725
+ while ((m = sgl.exec(stripped)) !== null) {
1726
+ if (m[1])
1727
+ singleMatches.push(m[1]);
1728
+ }
1729
+ const delimiter = doubleMatches.length > singleMatches.length ? "double" : "single";
1730
+ const chosen = delimiter === "double" ? doubleMatches : singleMatches;
1731
+ const seen = new Set;
1732
+ const variables = [];
1733
+ for (const v of chosen) {
1734
+ if (!seen.has(v)) {
1735
+ seen.add(v);
1736
+ variables.push(v);
1737
+ }
1738
+ }
1739
+ return { variables, delimiter };
1740
+ }
1692
1741
  function walkDir(dir, extensions, excludeDirs, maxDepth, currentDepth = 0) {
1693
1742
  if (currentDepth >= maxDepth)
1694
1743
  return [];
@@ -1728,20 +1777,23 @@ function scanForPrompts(filePath, relativePath) {
1728
1777
  } catch {
1729
1778
  return results;
1730
1779
  }
1780
+ const fileDelimiter = inferPromptVariables(content).delimiter;
1731
1781
  const lines = content.split(`
1732
1782
  `);
1733
1783
  for (let i = 0;i < lines.length; i++) {
1734
1784
  const line = lines[i];
1735
1785
  if (!line)
1736
1786
  continue;
1737
- if (TEMPLATE_VAR_PATTERN.test(line)) {
1787
+ const inferred = inferPromptVariables(line);
1788
+ if (inferred.variables.length > 0) {
1738
1789
  const preview = line.trim().substring(0, 80);
1739
1790
  results.push({
1740
1791
  file: relativePath,
1741
1792
  line: i + 1,
1742
1793
  preview,
1743
1794
  reason: "template-variable",
1744
- confidence: "high"
1795
+ confidence: "high",
1796
+ delimiter: fileDelimiter
1745
1797
  });
1746
1798
  }
1747
1799
  if (PROMPT_NAME_PATTERN.test(line)) {
@@ -1752,7 +1804,8 @@ function scanForPrompts(filePath, relativePath) {
1752
1804
  line: i + 1,
1753
1805
  preview,
1754
1806
  reason: "prompt-constant",
1755
- confidence: "medium"
1807
+ confidence: "medium",
1808
+ delimiter: fileDelimiter
1756
1809
  });
1757
1810
  }
1758
1811
  }
@@ -1764,7 +1817,8 @@ function scanForPrompts(filePath, relativePath) {
1764
1817
  line: i + 1,
1765
1818
  preview,
1766
1819
  reason: "zod-schema",
1767
- confidence: "low"
1820
+ confidence: "low",
1821
+ delimiter: fileDelimiter
1768
1822
  });
1769
1823
  }
1770
1824
  }
@@ -1776,8 +1830,55 @@ function scanForPrompts(filePath, relativePath) {
1776
1830
  line: i + 1,
1777
1831
  preview,
1778
1832
  reason: "pydantic-model",
1833
+ confidence: "medium",
1834
+ delimiter: fileDelimiter
1835
+ });
1836
+ }
1837
+ }
1838
+ }
1839
+ return results;
1840
+ }
1841
+ function scanForAgents(filePath, relativePath) {
1842
+ const results = [];
1843
+ let content;
1844
+ try {
1845
+ content = readFileSync3(filePath, "utf-8");
1846
+ } catch {
1847
+ return results;
1848
+ }
1849
+ const lines = content.split(`
1850
+ `);
1851
+ const seen = new Set;
1852
+ for (let i = 0;i < lines.length; i++) {
1853
+ const line = lines[i];
1854
+ if (!line)
1855
+ continue;
1856
+ for (const { pattern, reason } of AGENT_HIGH_PATTERNS) {
1857
+ if (pattern.test(line) && !seen.has(i)) {
1858
+ results.push({
1859
+ file: relativePath,
1860
+ line: i + 1,
1861
+ preview: line.trim().substring(0, 80),
1862
+ reason,
1863
+ confidence: "high"
1864
+ });
1865
+ seen.add(i);
1866
+ break;
1867
+ }
1868
+ }
1869
+ if (seen.has(i))
1870
+ continue;
1871
+ for (const { pattern, reason } of AGENT_MEDIUM_PATTERNS) {
1872
+ if (pattern.test(line)) {
1873
+ results.push({
1874
+ file: relativePath,
1875
+ line: i + 1,
1876
+ preview: line.trim().substring(0, 80),
1877
+ reason,
1779
1878
  confidence: "medium"
1780
1879
  });
1880
+ seen.add(i);
1881
+ break;
1781
1882
  }
1782
1883
  }
1783
1884
  }
@@ -1833,7 +1934,8 @@ function scanJsonForSchemas(filePath, relativePath) {
1833
1934
  line: 1,
1834
1935
  preview: preview.substring(0, 80),
1835
1936
  reason: "json-schema",
1836
- confidence: "medium"
1937
+ confidence: "medium",
1938
+ delimiter: "single"
1837
1939
  });
1838
1940
  return results;
1839
1941
  }
@@ -1890,12 +1992,14 @@ function exploreCodebase(options) {
1890
1992
  const prompts = [];
1891
1993
  const datasets = [];
1892
1994
  const markers = [];
1995
+ const agents = [];
1893
1996
  const sourceFiles = walkDir(rootPath, options.extensions, options.excludeDirs, options.depth);
1894
1997
  for (const filePath of sourceFiles) {
1895
1998
  const relativePath = relative(rootPath, filePath);
1896
1999
  markers.push(...scanForMarkers(filePath, relativePath));
1897
2000
  if (!options.markersOnly) {
1898
2001
  prompts.push(...scanForPrompts(filePath, relativePath));
2002
+ agents.push(...scanForAgents(filePath, relativePath));
1899
2003
  }
1900
2004
  }
1901
2005
  if (!options.markersOnly) {
@@ -1906,7 +2010,7 @@ function exploreCodebase(options) {
1906
2010
  }
1907
2011
  datasets.push(...scanForDatasets(rootPath, rootPath, options.excludeDirs, options.depth));
1908
2012
  }
1909
- return { prompts, datasets, markers };
2013
+ return { prompts, datasets, markers, agents };
1910
2014
  }
1911
2015
  function parseExtensions(includeGlob) {
1912
2016
  const braceMatch = /\{([^}]+)\}/.exec(includeGlob);
@@ -3539,6 +3643,317 @@ init_sdk_client();
3539
3643
  import { Command as Command3 } from "commander";
3540
3644
  import chalk8 from "chalk";
3541
3645
  init_errors();
3646
+
3647
+ // src/commands/prompts/guided-dataset.ts
3648
+ init_sdk_client();
3649
+
3650
+ // src/commands/prompts/guided-workflow.ts
3651
+ init_sdk_client();
3652
+ function buildGenericEvalWorkflow() {
3653
+ return {
3654
+ mode: "generic",
3655
+ message: "No prompt-id provided. Follow this sequence to create an evaluation:",
3656
+ workflow: [
3657
+ "1. mutagent explore --json — scan codebase for prompts",
3658
+ "2. mutagent prompts create ... --json — upload the target prompt",
3659
+ "3. Re-run: mutagent prompts evaluation create <prompt-id> --guided --json"
3660
+ ],
3661
+ formatReference: {
3662
+ evalConfig: {
3663
+ criteria: [{
3664
+ name: "<criterion_name>",
3665
+ description: "<rubric: what correct vs incorrect looks like>",
3666
+ evaluationParameter: "<must match a field in inputSchema or outputSchema>"
3667
+ }]
3668
+ }
3669
+ },
3670
+ _directive: {
3671
+ instruction: "Ask the user which prompt they want to evaluate. If unclear, run mutagent explore --json first.",
3672
+ next: ["mutagent explore --json", "mutagent prompts list --json"]
3673
+ }
3674
+ };
3675
+ }
3676
+ async function checkPromptDisambiguation(promptId, promptName) {
3677
+ try {
3678
+ const client = await getSDKClient();
3679
+ const allPrompts = await client.listPrompts();
3680
+ const targetName = promptName.toLowerCase();
3681
+ const similar = allPrompts.filter((p) => String(p.id) !== promptId && (p.name.toLowerCase().includes(targetName) || targetName.includes(p.name.toLowerCase())));
3682
+ if (similar.length === 0)
3683
+ return null;
3684
+ return {
3685
+ message: `Found ${String(similar.length)} prompt(s) with similar names. Confirm this is the right one:`,
3686
+ selected: { id: promptId, name: promptName },
3687
+ similar: similar.map((p) => ({ id: String(p.id), name: p.name })),
3688
+ askUserQuestions: [{
3689
+ question: `You selected "${promptName}" (ID: ${promptId}). Is this correct, or did you mean one of these?`,
3690
+ options: [
3691
+ `${promptName} (ID: ${promptId}) — SELECTED`,
3692
+ ...similar.map((p) => `${p.name} (ID: ${String(p.id)})`)
3693
+ ]
3694
+ }]
3695
+ };
3696
+ } catch {
3697
+ return null;
3698
+ }
3699
+ }
3700
+ async function buildGuidedWorkflow(promptId) {
3701
+ const client = await getSDKClient();
3702
+ const prompt = await client.getPrompt(promptId);
3703
+ const truncate = (s, max) => s.length > max ? s.slice(0, max) + "..." : s;
3704
+ const outputProperties = prompt.outputSchema && typeof prompt.outputSchema === "object" ? prompt.outputSchema.properties ?? {} : {};
3705
+ const inputProperties = prompt.inputSchema && typeof prompt.inputSchema === "object" ? prompt.inputSchema.properties ?? {} : {};
3706
+ const inputFields = Object.keys(inputProperties);
3707
+ const outputFields = Object.keys(outputProperties);
3708
+ const allFields = [
3709
+ ...inputFields.map((f) => ({ field: f, source: "inputSchema", fieldSchema: inputProperties[f] })),
3710
+ ...outputFields.map((f) => ({ field: f, source: "outputSchema", fieldSchema: outputProperties[f] }))
3711
+ ];
3712
+ let datasetExample = null;
3713
+ try {
3714
+ const datasets = await client.listDatasets(promptId);
3715
+ if (datasets.length > 0) {
3716
+ try {
3717
+ const dsId = datasets[0]?.id;
3718
+ const items = dsId != null ? await client.listDatasetItems(String(dsId)) : [];
3719
+ if (items.length > 0) {
3720
+ const firstItem = items[0];
3721
+ datasetExample = firstItem?.expectedOutput ?? firstItem?.input ?? null;
3722
+ }
3723
+ } catch {}
3724
+ }
3725
+ } catch {}
3726
+ const askUserQuestions = allFields.map(({ field, source, fieldSchema }) => {
3727
+ const isInput = source === "inputSchema";
3728
+ const question = isInput ? `Define the Minimum Viable Context for "${field}". What data MUST be present in this input for the prompt to produce a correct output? Describe what constitutes complete vs incomplete input, and WHY this field matters.` : `Define what correct "${field}" looks like. What structure, content, or qualities make it good vs bad? Give concrete examples of good and bad outputs.`;
3729
+ const hint = isInput ? `Input fields define what data the prompt NEEDS to work correctly. Without defining minimum viable context, the optimizer cannot detect whether failures come from bad input or bad prompt logic.` : null;
3730
+ const header = isInput ? `${field} [INPUT]` : `${field} [OUTPUT]`;
3731
+ const defineDesc = isInput ? `Describe what data MUST be present in "${field}" and WHY the prompt needs it. Focus on completeness and minimum viable context.` : `Describe what a correct vs incorrect "${field}" looks like. Focus on structure, content quality, and concrete examples — not numeric scores.`;
3732
+ return {
3733
+ question,
3734
+ header,
3735
+ ...hint != null ? { hint } : {},
3736
+ options: [
3737
+ {
3738
+ label: "Define rubric",
3739
+ description: defineDesc
3740
+ },
3741
+ {
3742
+ label: "See suggestion",
3743
+ description: `Get a suggested rubric based on the prompt and schema definition for "${field}". You can refine it.`
3744
+ }
3745
+ ],
3746
+ multiSelect: false,
3747
+ context: {
3748
+ fieldType: fieldSchema?.type ?? "unknown",
3749
+ fieldDescription: fieldSchema?.description ?? null,
3750
+ fieldSource: source,
3751
+ promptExcerpt: truncate(prompt.humanPrompt ?? prompt.systemPrompt ?? prompt.rawPrompt ?? "", 200),
3752
+ exampleValue: datasetExample?.[field] ?? null
3753
+ }
3754
+ };
3755
+ });
3756
+ const result = {
3757
+ prompt: { id: promptId, name: prompt.name },
3758
+ inputSchema: { fields: inputFields },
3759
+ outputSchema: { fields: outputFields },
3760
+ workflow: {
3761
+ description: "Follow these steps to create an evaluation for this prompt:",
3762
+ steps: [
3763
+ { step: 1, action: "Review prompt schemas and context", detail: "Understand what the prompt does and what each field means." },
3764
+ { step: 2, action: "Define correctness criteria for EVERY field", detail: `For EACH of the ${String(allFields.length)} fields in askUserQuestions, use AskUserQuestion to collect a rubric. INPUT fields (${String(inputFields.length)}): collect minimum viable context definitions — what data MUST exist. OUTPUT fields (${String(outputFields.length)}): collect correctness criteria — what correct vs incorrect looks like. Do NOT skip any field. Use the context hints to suggest rubrics when asked.` },
3765
+ { step: 3, action: "Build criteria JSON", detail: "Map each rubric to: { name: field, description: rubric, evaluationParameter: field }" },
3766
+ { step: 4, action: "Ask for evaluation name", detail: "Ask user what to name this evaluation" },
3767
+ { step: 5, action: "Create the evaluation", detail: `mutagent prompts evaluation create ${promptId} --name "<name>" -d '<json>' --json` }
3768
+ ],
3769
+ criteriaTemplate: {
3770
+ evalConfig: {
3771
+ criteria: [{
3772
+ name: "<criterion_name>",
3773
+ description: "<correctness rubric - describe what correct vs incorrect looks like>",
3774
+ evaluationParameter: `<one of: ${allFields.length > 0 ? allFields.map((f) => f.field).join(", ") : "no fields detected - check prompt schemas"}>`
3775
+ }]
3776
+ }
3777
+ },
3778
+ exampleCommand: `mutagent prompts evaluation create ${promptId} --name "My Evaluation" -d '${JSON.stringify({
3779
+ evalConfig: {
3780
+ criteria: outputFields.slice(0, 1).map((f) => ({
3781
+ name: `${f} accuracy`,
3782
+ description: `Correct if ${f} matches expected structure and content; incorrect if missing, malformed, or off-topic`,
3783
+ evaluationParameter: f
3784
+ }))
3785
+ }
3786
+ })}'`
3787
+ },
3788
+ askUserQuestions,
3789
+ _directive: {
3790
+ display: "workflow_guide",
3791
+ instruction: `MANDATORY ORDERS — READ CAREFULLY:
3792
+ ` + `1. You MUST ask the user about EVERY SINGLE field listed in askUserQuestions. There are ${String(allFields.length)} fields total.
3793
+ ` + `2. Do NOT skip ANY field. Do NOT decide that a field "doesn't need" a definition.
3794
+ ` + `3. INPUT fields (${String(inputFields.length)}): These define MINIMUM VIABLE CONTEXT — the data that MUST exist in the input for the prompt to work. Ask: "What data MUST be present in this field and WHY does the prompt need it?"
3795
+ ` + ` WHY THIS MATTERS: Without minimum viable context definitions, the optimizer cannot distinguish between failures caused by incomplete input vs failures caused by bad prompt logic.
3796
+ ` + `4. OUTPUT fields (${String(outputFields.length)}): These define correctness criteria. Ask: "What does correct vs incorrect for this field look like?"
3797
+ ` + `5. After collecting ALL ${String(allFields.length)} definitions, construct the --data JSON with criteria for EVERY field.
3798
+ ` + `6. VIOLATION: Skipping ANY field or telling the user a field doesn't need a definition is a protocol violation.
3799
+ ` + `
3800
+ ` + `7. EVAL CRITERIA FRAMING — INPUT vs OUTPUT scoping (mirror of SKILL.md → "Guided Eval Criteria"):
3801
+ ` + ` INPUT fields → Minimum Viable Context (MVC).
3802
+ ` + ` Ask: "Is the data required for the prompt to succeed actually present in this {variable}?"
3803
+ ` + ` Rubric uses a COMPLETENESS scale grounded in observable presence of required data:
3804
+ ` + ` 1.0 = all required context present, no ambiguity
3805
+ ` + ` 0.5 = some context present, enough to attempt but likely partial/hedged answer
3806
+ ` + ` 0.0 = critical context missing, prompt cannot succeed regardless of model
3807
+ ` + ` OUTPUT fields → Output Standards.
3808
+ ` + ` Ask: "Does the response meet content correctness + structural correctness?"
3809
+ ` + ` Rubric uses a CORRECTNESS scale with concrete pass/fail tiers, e.g.:
3810
+ ` + ` 1.0 = valid shape, all required fields, faithful content
3811
+ ` + ` 0.5 = valid shape, 1-2 fields hedged or partially correct
3812
+ ` + ` 0.0 = invalid shape OR fabricated facts
3813
+ ` + ` NEVER use vague rubrics like "0-1 scale" or "score based on accuracy".
3814
+ ` + ` Canonical source: .claude/skills/mutagent-cli/SKILL.md → "Guided Eval Criteria".
3815
+ ` + " Note: template variables use single-brace `{variable}` (platform canonical). `mutagent explore`\n" + " infers single vs double per-file via inferPromptVariables().",
3816
+ aiAgentDecisionTree: {
3817
+ step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
3818
+ step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
3819
+ step3: `After collecting all criteria, construct the --data JSON and run: mutagent prompts evaluation create <prompt-id> --name "<name>" -d '<json>'`
3820
+ }
3821
+ },
3822
+ _meta: { inputFields, outputFields, allFields }
3823
+ };
3824
+ const disambiguation = await checkPromptDisambiguation(promptId, prompt.name);
3825
+ if (disambiguation) {
3826
+ result.disambiguation = disambiguation;
3827
+ }
3828
+ return result;
3829
+ }
3830
+
3831
+ // src/commands/prompts/guided-dataset.ts
3832
+ function buildGenericDatasetWorkflow() {
3833
+ return {
3834
+ mode: "generic",
3835
+ message: "No prompt-id provided. Follow this sequence to create a dataset:",
3836
+ workflow: [
3837
+ "1. mutagent explore --json — scan codebase for prompts",
3838
+ "2. mutagent prompts create ... --json — upload the target prompt",
3839
+ "3. Re-run: mutagent prompts dataset add <prompt-id> --guided --json"
3840
+ ],
3841
+ formatReference: {
3842
+ item: {
3843
+ input: { "<inputSchema_field>": "<value>" },
3844
+ expectedOutput: { "<outputSchema_field>": "<expected_value>" }
3845
+ }
3846
+ },
3847
+ suggestedCategories: [
3848
+ { name: "Edge Cases", description: "Boundary inputs, empty fields, malformed data", priority: "high" },
3849
+ { name: "Hard Cases", description: "Ambiguous inputs, adversarial examples", priority: "high" },
3850
+ { name: "Representative Cases", description: "Typical production inputs", priority: "medium" }
3851
+ ],
3852
+ _directive: {
3853
+ instruction: "Ask the user which prompt they want to create a dataset for. If unclear, run mutagent explore --json first.",
3854
+ next: ["mutagent explore --json", "mutagent prompts list --json"]
3855
+ }
3856
+ };
3857
+ }
3858
+ function buildGuidedDatasetWorkflow(prompt) {
3859
+ const promptId = String(prompt.id);
3860
+ const outputProperties = prompt.outputSchema && typeof prompt.outputSchema === "object" ? prompt.outputSchema.properties ?? {} : {};
3861
+ const inputProperties = prompt.inputSchema && typeof prompt.inputSchema === "object" ? prompt.inputSchema.properties ?? {} : {};
3862
+ const inputFields = Object.keys(inputProperties);
3863
+ const outputFields = Object.keys(outputProperties);
3864
+ const templateInput = {};
3865
+ for (const f of inputFields) {
3866
+ templateInput[f] = "<value>";
3867
+ }
3868
+ const templateOutput = {};
3869
+ for (const f of outputFields) {
3870
+ templateOutput[f] = "<expected>";
3871
+ }
3872
+ const askUserQuestions = [
3873
+ {
3874
+ field: "_general",
3875
+ source: "inputSchema",
3876
+ question: "What are the hardest inputs for this prompt? Describe scenarios that typically cause failures or ambiguous outputs."
3877
+ },
3878
+ {
3879
+ field: "_edge_cases",
3880
+ source: "inputSchema",
3881
+ question: "What edge cases have caused failures in production? Include boundary inputs, empty fields, and malformed data."
3882
+ },
3883
+ ...inputFields.map((f) => ({
3884
+ field: f,
3885
+ source: "inputSchema",
3886
+ question: `What values should "${f}" have in test cases? Include edge cases (empty, unicode, very long, malformed).`
3887
+ })),
3888
+ ...outputFields.map((f) => ({
3889
+ field: f,
3890
+ source: "outputSchema",
3891
+ question: `What should correct "${f}" look like? Describe the format, tone, completeness criteria, and examples of incorrect outputs.`
3892
+ }))
3893
+ ];
3894
+ const suggestedCategories = [
3895
+ {
3896
+ name: "Edge Cases",
3897
+ description: "Boundary inputs, empty fields, malformed data",
3898
+ priority: "high"
3899
+ },
3900
+ {
3901
+ name: "Hard Cases",
3902
+ description: "Ambiguous inputs, adversarial examples, domain traps",
3903
+ priority: "high"
3904
+ },
3905
+ {
3906
+ name: "Representative Cases",
3907
+ description: "Typical production inputs",
3908
+ priority: "medium"
3909
+ }
3910
+ ];
3911
+ const totalSchemaFields = inputFields.length + outputFields.length;
3912
+ const hasSchema = totalSchemaFields > 0;
3913
+ return {
3914
+ promptId,
3915
+ promptName: prompt.name,
3916
+ schemaFields: {
3917
+ input: inputFields,
3918
+ output: outputFields
3919
+ },
3920
+ suggestedCategories,
3921
+ templateItem: {
3922
+ input: hasSchema ? templateInput : { "<field>": "<value>" },
3923
+ expectedOutput: hasSchema ? templateOutput : { "<field>": "<expected>" }
3924
+ },
3925
+ askUserQuestions,
3926
+ guidance: {
3927
+ minItems: 5,
3928
+ priorityRule: "Hard cases that expose prompt weaknesses > easy cases that always pass",
3929
+ steps: [
3930
+ "Collect answers to each question from the user",
3931
+ "Generate 5-10 dataset items covering all categories",
3932
+ "Ensure at least 2 hard/edge cases per category",
3933
+ 'Format as JSON array: [{"input": {...}, "expectedOutput": {...}}, ...]',
3934
+ `Run: mutagent prompts dataset add ${promptId} -d '<json>' --name '<name>' --json`
3935
+ ]
3936
+ },
3937
+ _directive: {
3938
+ instruction: "Collect domain-specific dataset items from the user. " + "Ask about each schema field. " + "Prioritize hard cases over easy cases. " + "Then construct the dataset JSON and upload.",
3939
+ next: [
3940
+ `mutagent prompts dataset add ${promptId} -d '<constructed-json>' --name '<name>' --json`
3941
+ ]
3942
+ }
3943
+ };
3944
+ }
3945
+ async function fetchAndBuildGuidedDatasetWorkflow(promptId) {
3946
+ const client = await getSDKClient();
3947
+ const prompt = await client.getPrompt(promptId);
3948
+ const result = buildGuidedDatasetWorkflow(prompt);
3949
+ const disambiguation = await checkPromptDisambiguation(promptId, prompt.name);
3950
+ if (disambiguation) {
3951
+ result.disambiguation = disambiguation;
3952
+ }
3953
+ return result;
3954
+ }
3955
+
3956
+ // src/commands/prompts/datasets.ts
3542
3957
  function registerDatasetCommands(prompts) {
3543
3958
  const dataset = new Command3("dataset").description("Manage datasets for prompts").addHelpText("after", `
3544
3959
  Examples:
@@ -3638,11 +4053,19 @@ Verify the dataset ID exists, or list datasets for a prompt to find valid IDs.`)
3638
4053
  }
3639
4054
  }
3640
4055
  });
3641
- dataset.command("add").description("Add dataset to a prompt").argument("<prompt-id>", "Prompt ID (from: mutagent prompts list)").option("-d, --data <json>", "Inline JSON array of dataset items").option("-n, --name <name>", "Dataset name").addHelpText("after", `
4056
+ dataset.command("add").description("Add dataset to a prompt").argument("[prompt-id]", "Prompt ID (from: mutagent prompts list) — optional when --guided is set").option("-d, --data <json>", "Inline JSON array of dataset items").option("-n, --name <name>", "Dataset name").option("--guided", "Guided mode — analyze prompt schema and suggest dataset categories. prompt-id is optional when --guided is set.").addHelpText("after", `
3642
4057
  Examples:
4058
+ ${chalk8.dim("$")} mutagent prompts dataset add --guided --json # no prompt-id: returns generic workflow + upload instructions
4059
+ ${chalk8.dim("$")} mutagent prompts dataset add <prompt-id> --guided --json # recommended: schema-aware category suggestions
3643
4060
  ${chalk8.dim("$")} mutagent prompts dataset add <prompt-id> -d '[{"input":{"text":"hello"},"expectedOutput":{"result":"world"}}]'
3644
4061
  ${chalk8.dim("$")} mutagent prompts dataset add <prompt-id> -d '[{"input":{"text":"hello"},"expectedOutput":{"result":"world"}}]' --name "My Dataset"
3645
4062
 
4063
+ Guided mode (--guided):
4064
+ Fetches the prompt's inputSchema + outputSchema and returns structured JSON
4065
+ that guides a coding agent to collect domain-specific dataset items.
4066
+ The agent uses the guidance to ask the user questions, then constructs
4067
+ the dataset JSON and calls: mutagent prompts dataset add <id> -d '<json>' --name '<name>' --json
4068
+
3646
4069
  Inline data format (-d):
3647
4070
  JSON array of objects, e.g.:
3648
4071
  ${chalk8.dim('[{"input": {"text": "hello"}, "expectedOutput": {"result": "world"}}]')}
@@ -3657,11 +4080,26 @@ ${chalk8.yellow("AI Agent (MANDATORY):")}
3657
4080
  expectedOutput is REQUIRED for evaluation scoring.
3658
4081
  Check schemas: mutagent prompts get <prompt-id> --json
3659
4082
 
3660
- ${chalk8.red("Required: --data must be provided.")}
4083
+ ${chalk8.red("Required: --data or --guided must be provided.")}
3661
4084
  `).action(async (promptId, options) => {
3662
- const isJson = getJsonFlag(prompts);
4085
+ const isJson = options.guided ? true : getJsonFlag(prompts);
3663
4086
  const output = new OutputFormatter(isJson ? "json" : "table");
3664
4087
  try {
4088
+ if (options.guided && !promptId) {
4089
+ const result = buildGenericDatasetWorkflow();
4090
+ output.output(result);
4091
+ return;
4092
+ }
4093
+ if (options.guided) {
4094
+ const guidedResult = await fetchAndBuildGuidedDatasetWorkflow(promptId);
4095
+ output.output(guidedResult);
4096
+ return;
4097
+ }
4098
+ if (!promptId) {
4099
+ throw new MutagentError("MISSING_ARGUMENTS", "prompt-id is required", `Run: mutagent prompts dataset add --help
4100
+ ` + `Usage: mutagent prompts dataset add <prompt-id> [options]
4101
+ ` + "With --guided: mutagent prompts dataset add --guided --json (returns generic workflow)");
4102
+ }
3665
4103
  if (!options.data) {
3666
4104
  throw new MutagentError("MISSING_ARGUMENTS", "-d/--data is required", `Run: mutagent prompts dataset add --help
3667
4105
  ` + "Use -d '[{...}]' to provide inline JSON data");
@@ -3704,20 +4142,21 @@ Each item must have: {"input": {...}, "expectedOutput": {...}}`);
3704
4142
  if (!datasetName) {
3705
4143
  throw new MutagentError("MISSING_ARGUMENTS", "Dataset name is required", `Run: mutagent prompts dataset add --help
3706
4144
  ` + `[Agent: Ask the user for a dataset name via AskUserQuestion, then pass --name]
3707
- ` + `Use --name <name>, e.g., mutagent prompts dataset add ${promptId} --name "my-dataset" -d '[...]'`);
4145
+ ` + `Use --name <name>, e.g., mutagent prompts dataset add ${promptId ?? "<prompt-id>"} --name "my-dataset" -d '[...]'`);
3708
4146
  }
4147
+ const resolvedPromptId = promptId;
3709
4148
  const client = await getSDKClient();
3710
- const datasetResult = await client.addDataset(promptId, content, datasetName);
4149
+ const datasetResult = await client.addDataset(resolvedPromptId, content, datasetName);
3711
4150
  if (isJson) {
3712
4151
  let rsState;
3713
4152
  try {
3714
- const evals = await client.listEvaluations(promptId);
4153
+ const evals = await client.listEvaluations(resolvedPromptId);
3715
4154
  rsState = { evaluations: evals.length };
3716
4155
  } catch {}
3717
- const directive = datasetAddedDirective(promptId, datasetResult.id, datasetResult.name, datasetResult.itemCount, rsState);
4156
+ const directive = datasetAddedDirective(resolvedPromptId, datasetResult.id, datasetResult.name, datasetResult.itemCount, rsState);
3718
4157
  output.output({
3719
4158
  ...datasetResult,
3720
- _links: datasetLinks(promptId, datasetResult.id),
4159
+ _links: datasetLinks(resolvedPromptId, datasetResult.id),
3721
4160
  _directive: directive
3722
4161
  });
3723
4162
  echoDirectiveToStderr(directive);
@@ -3725,7 +4164,7 @@ Each item must have: {"input": {...}, "expectedOutput": {...}}`);
3725
4164
  for (const w of warnings) {
3726
4165
  output.warn(w);
3727
4166
  }
3728
- output.success(`Added dataset "${datasetResult.name}" to prompt: ${promptId} (id: ${String(datasetResult.id)})`);
4167
+ output.success(`Added dataset "${datasetResult.name}" to prompt: ${resolvedPromptId} (id: ${String(datasetResult.id)})`);
3729
4168
  if (datasetResult.itemCount !== undefined && datasetResult.itemCount > 0) {
3730
4169
  output.info(`Items uploaded: ${String(datasetResult.itemCount)}`);
3731
4170
  }
@@ -3736,14 +4175,14 @@ Each item must have: {"input": {...}, "expectedOutput": {...}}`);
3736
4175
  resourceType: "Dataset",
3737
4176
  id: datasetResult.id,
3738
4177
  name: datasetResult.name,
3739
- dashboardUrl: datasetLink(promptId, datasetResult.id),
3740
- apiPath: `/api/prompts/${promptId}/datasets/${String(datasetResult.id)}`
4178
+ dashboardUrl: datasetLink(resolvedPromptId, datasetResult.id),
4179
+ apiPath: `/api/prompts/${resolvedPromptId}/datasets/${String(datasetResult.id)}`
3741
4180
  });
3742
4181
  console.log(hints);
3743
4182
  }
3744
4183
  updateMutationContext((ctx) => {
3745
4184
  ctx.addDiscoveredDataset("inline-data", datasetResult.name, datasetResult.itemCount ?? 0);
3746
- ctx.markDatasetUploaded("inline-data", String(datasetResult.id), promptId);
4185
+ ctx.markDatasetUploaded("inline-data", String(datasetResult.id), resolvedPromptId);
3747
4186
  });
3748
4187
  } catch (error) {
3749
4188
  handleError(error, isJson);
@@ -3883,144 +4322,33 @@ async function resolveNumericPromptId(client, promptGroupId) {
3883
4322
  return sorted[0]?.id ?? null;
3884
4323
  }
3885
4324
 
3886
- // src/commands/prompts/guided-workflow.ts
3887
- init_sdk_client();
3888
- async function buildGuidedWorkflow(promptId) {
3889
- const client = await getSDKClient();
3890
- const prompt = await client.getPrompt(promptId);
3891
- const truncate = (s, max) => s.length > max ? s.slice(0, max) + "..." : s;
3892
- const outputProperties = prompt.outputSchema && typeof prompt.outputSchema === "object" ? prompt.outputSchema.properties ?? {} : {};
3893
- const inputProperties = prompt.inputSchema && typeof prompt.inputSchema === "object" ? prompt.inputSchema.properties ?? {} : {};
3894
- const inputFields = Object.keys(inputProperties);
3895
- const outputFields = Object.keys(outputProperties);
3896
- const allFields = [
3897
- ...inputFields.map((f) => ({ field: f, source: "inputSchema", fieldSchema: inputProperties[f] })),
3898
- ...outputFields.map((f) => ({ field: f, source: "outputSchema", fieldSchema: outputProperties[f] }))
3899
- ];
3900
- let datasetExample = null;
3901
- try {
3902
- const datasets = await client.listDatasets(promptId);
3903
- if (datasets.length > 0) {
3904
- try {
3905
- const dsId = datasets[0]?.id;
3906
- const items = dsId != null ? await client.listDatasetItems(String(dsId)) : [];
3907
- if (items.length > 0) {
3908
- const firstItem = items[0];
3909
- datasetExample = firstItem?.expectedOutput ?? firstItem?.input ?? null;
3910
- }
3911
- } catch {}
3912
- }
3913
- } catch {}
3914
- const askUserQuestions = allFields.map(({ field, source, fieldSchema }) => {
3915
- const isInput = source === "inputSchema";
3916
- const question = isInput ? `Define the Minimum Viable Context for "${field}". What data MUST be present in this input for the prompt to produce a correct output? Describe what constitutes complete vs incomplete input, and WHY this field matters.` : `Define what correct "${field}" looks like. What structure, content, or qualities make it good vs bad? Give concrete examples of good and bad outputs.`;
3917
- const hint = isInput ? `Input fields define what data the prompt NEEDS to work correctly. Without defining minimum viable context, the optimizer cannot detect whether failures come from bad input or bad prompt logic.` : null;
3918
- const header = isInput ? `${field} [INPUT]` : `${field} [OUTPUT]`;
3919
- const defineDesc = isInput ? `Describe what data MUST be present in "${field}" and WHY the prompt needs it. Focus on completeness and minimum viable context.` : `Describe what a correct vs incorrect "${field}" looks like. Focus on structure, content quality, and concrete examples — not numeric scores.`;
3920
- return {
3921
- question,
3922
- header,
3923
- ...hint != null ? { hint } : {},
3924
- options: [
3925
- {
3926
- label: "Define rubric",
3927
- description: defineDesc
3928
- },
3929
- {
3930
- label: "See suggestion",
3931
- description: `Get a suggested rubric based on the prompt and schema definition for "${field}". You can refine it.`
3932
- }
3933
- ],
3934
- multiSelect: false,
3935
- context: {
3936
- fieldType: fieldSchema?.type ?? "unknown",
3937
- fieldDescription: fieldSchema?.description ?? null,
3938
- fieldSource: source,
3939
- promptExcerpt: truncate(prompt.humanPrompt ?? prompt.systemPrompt ?? prompt.rawPrompt ?? "", 200),
3940
- exampleValue: datasetExample?.[field] ?? null
3941
- }
3942
- };
3943
- });
3944
- return {
3945
- prompt: { id: promptId, name: prompt.name },
3946
- inputSchema: { fields: inputFields },
3947
- outputSchema: { fields: outputFields },
3948
- workflow: {
3949
- description: "Follow these steps to create an evaluation for this prompt:",
3950
- steps: [
3951
- { step: 1, action: "Review prompt schemas and context", detail: "Understand what the prompt does and what each field means." },
3952
- { step: 2, action: "Define correctness criteria for EVERY field", detail: `For EACH of the ${String(allFields.length)} fields in askUserQuestions, use AskUserQuestion to collect a rubric. INPUT fields (${String(inputFields.length)}): collect minimum viable context definitions — what data MUST exist. OUTPUT fields (${String(outputFields.length)}): collect correctness criteria — what correct vs incorrect looks like. Do NOT skip any field. Use the context hints to suggest rubrics when asked.` },
3953
- { step: 3, action: "Build criteria JSON", detail: "Map each rubric to: { name: field, description: rubric, evaluationParameter: field }" },
3954
- { step: 4, action: "Ask for evaluation name", detail: "Ask user what to name this evaluation" },
3955
- { step: 5, action: "Create the evaluation", detail: `mutagent prompts evaluation create ${promptId} --name "<name>" -d '<json>' --json` }
3956
- ],
3957
- criteriaTemplate: {
3958
- evalConfig: {
3959
- criteria: [{
3960
- name: "<criterion_name>",
3961
- description: "<correctness rubric - describe what correct vs incorrect looks like>",
3962
- evaluationParameter: `<one of: ${allFields.length > 0 ? allFields.map((f) => f.field).join(", ") : "no fields detected - check prompt schemas"}>`
3963
- }]
3964
- }
3965
- },
3966
- exampleCommand: `mutagent prompts evaluation create ${promptId} --name "My Evaluation" -d '${JSON.stringify({
3967
- evalConfig: {
3968
- criteria: outputFields.slice(0, 1).map((f) => ({
3969
- name: `${f} accuracy`,
3970
- description: `Correct if ${f} matches expected structure and content; incorrect if missing, malformed, or off-topic`,
3971
- evaluationParameter: f
3972
- }))
3973
- }
3974
- })}'`
3975
- },
3976
- askUserQuestions,
3977
- _directive: {
3978
- display: "workflow_guide",
3979
- instruction: `MANDATORY ORDERS — READ CAREFULLY:
3980
- ` + `1. You MUST ask the user about EVERY SINGLE field listed in askUserQuestions. There are ${String(allFields.length)} fields total.
3981
- ` + `2. Do NOT skip ANY field. Do NOT decide that a field "doesn't need" a definition.
3982
- ` + `3. INPUT fields (${String(inputFields.length)}): These define MINIMUM VIABLE CONTEXT — the data that MUST exist in the input for the prompt to work. Ask: "What data MUST be present in this field and WHY does the prompt need it?"
3983
- ` + ` WHY THIS MATTERS: Without minimum viable context definitions, the optimizer cannot distinguish between failures caused by incomplete input vs failures caused by bad prompt logic.
3984
- ` + `4. OUTPUT fields (${String(outputFields.length)}): These define correctness criteria. Ask: "What does correct vs incorrect for this field look like?"
3985
- ` + `5. After collecting ALL ${String(allFields.length)} definitions, construct the --data JSON with criteria for EVERY field.
3986
- ` + "6. VIOLATION: Skipping ANY field or telling the user a field doesn't need a definition is a protocol violation.",
3987
- aiAgentDecisionTree: {
3988
- step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
3989
- step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
3990
- step3: `After collecting all criteria, construct the --data JSON and run: mutagent prompts evaluation create <prompt-id> --name "<name>" -d '<json>'`
3991
- }
3992
- },
3993
- _meta: { inputFields, outputFields, allFields }
3994
- };
3995
- }
3996
-
3997
- // src/lib/adapters/eval-criteria.ts
3998
- function cliCriterionToCanonical(c) {
3999
- return {
4000
- criteria: c.description,
4001
- evaluationParameter: c.evaluationParameter,
4002
- name: c.name
4003
- };
4004
- }
4005
- function canonicalCriterionToCli(c) {
4006
- return {
4007
- name: c.name ?? c.id,
4008
- description: c.criteria,
4009
- evaluationParameter: c.evaluationParameter
4010
- };
4011
- }
4012
- function cliCriteriaArrayToCanonical(arr) {
4013
- if (!Array.isArray(arr))
4014
- return [];
4015
- return arr.map(cliCriterionToCanonical);
4016
- }
4017
- function canonicalCriteriaArrayToCli(arr) {
4018
- if (!Array.isArray(arr))
4019
- return [];
4020
- return arr.map(canonicalCriterionToCli);
4021
- }
4022
-
4023
- // src/commands/prompts/evaluations-run.ts
4325
+ // src/lib/adapters/eval-criteria.ts
4326
+ function cliCriterionToCanonical(c) {
4327
+ return {
4328
+ criteria: c.description,
4329
+ evaluationParameter: c.evaluationParameter,
4330
+ name: c.name
4331
+ };
4332
+ }
4333
+ function canonicalCriterionToCli(c) {
4334
+ return {
4335
+ name: c.name ?? c.id,
4336
+ description: c.criteria,
4337
+ evaluationParameter: c.evaluationParameter
4338
+ };
4339
+ }
4340
+ function cliCriteriaArrayToCanonical(arr) {
4341
+ if (!Array.isArray(arr))
4342
+ return [];
4343
+ return arr.map(cliCriterionToCanonical);
4344
+ }
4345
+ function canonicalCriteriaArrayToCli(arr) {
4346
+ if (!Array.isArray(arr))
4347
+ return [];
4348
+ return arr.map(canonicalCriterionToCli);
4349
+ }
4350
+
4351
+ // src/commands/prompts/evaluations-run.ts
4024
4352
  init_sdk_client();
4025
4353
  import chalk9 from "chalk";
4026
4354
  init_errors();
@@ -4213,8 +4541,9 @@ Examples:
4213
4541
  handleError(error, isJson);
4214
4542
  }
4215
4543
  });
4216
- evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("<prompt-id>", "Prompt ID (from: mutagent prompts list)").option("-d, --data <json>", "Evaluation as JSON string (for pre-validated criteria only)").option("-n, --name <name>", "Evaluation name (required unless --guided)").option("--description <text>", "Evaluation description").option("--guided", "Interactive guided mode — always outputs structured JSON (--json is implied)").addHelpText("after", `
4544
+ evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("[prompt-id]", "Prompt ID (from: mutagent prompts list) — optional when --guided is set").option("-d, --data <json>", "Evaluation as JSON string (for pre-validated criteria only)").option("-n, --name <name>", "Evaluation name (required unless --guided)").option("--description <text>", "Evaluation description").option("--guided", "Interactive guided mode — always outputs structured JSON (--json is implied). prompt-id is optional when --guided is set.").addHelpText("after", `
4217
4545
  Examples:
4546
+ ${chalk10.dim("$")} mutagent prompts evaluation create --guided --json ${chalk10.dim("# no prompt-id: returns generic workflow + upload instructions")}
4218
4547
  ${chalk10.dim("$")} mutagent prompts evaluation create <prompt-id> --guided ${chalk10.dim("# recommended: shows workflow guide + schema fields")}
4219
4548
  ${chalk10.dim("$")} mutagent prompts evaluation create <prompt-id> --guided --json ${chalk10.dim("# structured workflow for AI agents")}
4220
4549
  ${chalk10.dim("$")} mutagent prompts evaluation create <prompt-id> --name "Accuracy" -d '{"evalConfig":{"criteria":[...]}}' ${chalk10.dim("# power user")}
@@ -4251,6 +4580,16 @@ ${chalk10.dim("Get prompt IDs: mutagent prompts list")}
4251
4580
  }
4252
4581
  const output = new OutputFormatter(isJson ? "json" : "table");
4253
4582
  try {
4583
+ if (options.guided && !promptId) {
4584
+ const result = buildGenericEvalWorkflow();
4585
+ output.output(result);
4586
+ return;
4587
+ }
4588
+ if (!promptId) {
4589
+ throw new MutagentError("MISSING_ARGUMENTS", "prompt-id is required", `Run: mutagent prompts evaluation create --help
4590
+ ` + `Usage: mutagent prompts evaluation create <prompt-id> [options]
4591
+ ` + "With --guided: mutagent prompts evaluation create --guided --json (returns generic workflow)");
4592
+ }
4254
4593
  const renderGuidedWorkflow = (wf) => {
4255
4594
  const { inputFields, outputFields, allFields } = wf._meta;
4256
4595
  if (isJson) {
@@ -7238,18 +7577,32 @@ AI AGENT: Execute these steps using Bash for install, Write/Edit for code change
7238
7577
 
7239
7578
  // src/commands/agents/index.ts
7240
7579
  import { Command as Command10 } from "commander";
7241
- import chalk25 from "chalk";
7580
+ import chalk26 from "chalk";
7242
7581
 
7243
7582
  // src/commands/agents/agents-crud.ts
7244
7583
  init_sdk_client();
7245
- import chalk24 from "chalk";
7584
+ import chalk25 from "chalk";
7246
7585
  init_errors();
7586
+
7587
+ // src/lib/agents-wip-banner.ts
7588
+ import chalk24 from "chalk";
7589
+ var AGENTS_WIP_BANNER = `${chalk24.yellow("Agents: Early Access / WIP")}
7590
+ ${chalk24.bold("Agent optimization & evaluation are actively in development.")}
7591
+ ${chalk24.dim("The Prompt path (single-shot LLM calls with output schemas) is production-ready.")}
7592
+ ${chalk24.dim("For Agents (multi-turn / tool-calling), join early access:")}
7593
+ ${chalk24.cyan("https://www.mutagent.io/agents-partnership")}
7594
+ ${chalk24.dim("`mutagent agents ...` CRUD commands exist but optimization/eval are NOT ready.")}`;
7595
+ var AGENTS_WIP_BANNER_SHORT = `${chalk24.yellow("[Agents WIP]")} ${chalk24.dim("CRUD only — optimization & evaluation in development.")} ${chalk24.cyan("https://www.mutagent.io/agents-partnership")}`;
7596
+
7597
+ // src/commands/agents/agents-crud.ts
7247
7598
  function registerAgentsCrud(agents) {
7248
7599
  agents.command("list").description("List all agents").option("-l, --limit <n>", "Limit results", "50").option("-o, --offset <n>", "Offset for pagination").option("-n, --name <name>", "Filter by name").option("-s, --status <status>", "Filter by status (active, paused, archived)").addHelpText("after", `
7600
+ ${AGENTS_WIP_BANNER_SHORT}
7601
+
7249
7602
  Examples:
7250
- ${chalk24.dim("$")} mutagent agents list
7251
- ${chalk24.dim("$")} mutagent agents list --status active
7252
- ${chalk24.dim("$")} mutagent agents list --name "reviewer" --json
7603
+ ${chalk25.dim("$")} mutagent agents list
7604
+ ${chalk25.dim("$")} mutagent agents list --status active
7605
+ ${chalk25.dim("$")} mutagent agents list --name "reviewer" --json
7253
7606
  `).action(async (options) => {
7254
7607
  const isJson = getJsonFlag(agents);
7255
7608
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7298,9 +7651,11 @@ Examples:
7298
7651
  }
7299
7652
  });
7300
7653
  agents.command("get").description("Get agent details").argument("<id>", "Agent ID").addHelpText("after", `
7654
+ ${AGENTS_WIP_BANNER_SHORT}
7655
+
7301
7656
  Examples:
7302
- ${chalk24.dim("$")} mutagent agents get <agent-id>
7303
- ${chalk24.dim("$")} mutagent agents get <agent-id> --json
7657
+ ${chalk25.dim("$")} mutagent agents get <agent-id>
7658
+ ${chalk25.dim("$")} mutagent agents get <agent-id> --json
7304
7659
  `).action(async (id) => {
7305
7660
  const isJson = getJsonFlag(agents);
7306
7661
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7330,11 +7685,11 @@ Examples:
7330
7685
  };
7331
7686
  output.output(formatted);
7332
7687
  if (agent.systemPrompt) {
7333
- console.log(chalk24.bold(`
7688
+ console.log(chalk25.bold(`
7334
7689
  System Prompt:`));
7335
- console.log(chalk24.gray("─".repeat(60)));
7690
+ console.log(chalk25.gray("─".repeat(60)));
7336
7691
  console.log(agent.systemPrompt);
7337
- console.log(chalk24.gray("─".repeat(60)));
7692
+ console.log(chalk25.gray("─".repeat(60)));
7338
7693
  }
7339
7694
  }
7340
7695
  } catch (error) {
@@ -7342,18 +7697,20 @@ System Prompt:`));
7342
7697
  }
7343
7698
  });
7344
7699
  agents.command("create").description("Create a new agent").option("-d, --data <json>", "Agent as JSON string (recommended for CI/scripts/agents)").option("-n, --name <name>", "Agent name").option("-s, --slug <slug>", "Agent slug (URL-friendly identifier)").option("-p, --system-prompt <prompt>", "System prompt").option("-m, --model <model>", "Model (claude-sonnet-4-5, claude-opus-4-5, claude-haiku-4-5)").option("--description <desc>", "Agent description").addHelpText("after", `
7700
+ ${AGENTS_WIP_BANNER_SHORT}
7701
+
7345
7702
  Examples:
7346
- ${chalk24.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
7347
- ${chalk24.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are a code reviewer..."}'
7703
+ ${chalk25.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
7704
+ ${chalk25.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are a code reviewer..."}'
7348
7705
 
7349
7706
  Expected JSON (--data):
7350
- ${chalk24.dim('{"name":"<name>","slug":"<slug>","systemPrompt":"<system prompt>","model":"<model-id>","description":"<description>"}')}
7707
+ ${chalk25.dim('{"name":"<name>","slug":"<slug>","systemPrompt":"<system prompt>","model":"<model-id>","description":"<description>"}')}
7351
7708
 
7352
7709
  Input Methods (pick one, priority order):
7353
- --name/--slug/... Individual flags ${chalk24.green("(recommended)")}
7710
+ --name/--slug/... Individual flags ${chalk25.green("(recommended)")}
7354
7711
  -d, --data Inline JSON object (CI/scripts/agents)
7355
7712
 
7356
- ${chalk24.red("Required: name, slug, systemPrompt.")} ${chalk24.dim("CLI flags override --data fields.")}
7713
+ ${chalk25.red("Required: name, slug, systemPrompt.")} ${chalk25.dim("CLI flags override --data fields.")}
7357
7714
  `).action(async (options) => {
7358
7715
  const isJson = getJsonFlag(agents);
7359
7716
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7398,16 +7755,18 @@ ${chalk24.red("Required: name, slug, systemPrompt.")} ${chalk24.dim("CLI flags o
7398
7755
  }
7399
7756
  });
7400
7757
  agents.command("update").description("Update an agent").argument("<id>", "Agent ID").option("-d, --data <json>", "Agent updates as JSON string (CI/scripts/agents)").option("-n, --name <name>", "New name").option("-p, --system-prompt <prompt>", "New system prompt").option("-m, --model <model>", "New model").option("--description <desc>", "New description").option("-s, --status <status>", "New status (active, paused, archived)").addHelpText("after", `
7758
+ ${AGENTS_WIP_BANNER_SHORT}
7759
+
7401
7760
  Examples:
7402
- ${chalk24.dim("$")} mutagent agents update <id> --name "New Name"
7403
- ${chalk24.dim("$")} mutagent agents update <id> --system-prompt "Updated prompt" --status active
7404
- ${chalk24.dim("$")} mutagent agents update <id> -d '{"name":"New Name","systemPrompt":"Updated prompt"}'
7761
+ ${chalk25.dim("$")} mutagent agents update <id> --name "New Name"
7762
+ ${chalk25.dim("$")} mutagent agents update <id> --system-prompt "Updated prompt" --status active
7763
+ ${chalk25.dim("$")} mutagent agents update <id> -d '{"name":"New Name","systemPrompt":"Updated prompt"}'
7405
7764
 
7406
7765
  Input Methods (pick one, priority order):
7407
- --name/--system-prompt/... Individual flags ${chalk24.green("(recommended)")}
7766
+ --name/--system-prompt/... Individual flags ${chalk25.green("(recommended)")}
7408
7767
  -d, --data Inline JSON object (CI/scripts/agents)
7409
7768
 
7410
- ${chalk24.dim("CLI flags override --data fields.")}
7769
+ ${chalk25.dim("CLI flags override --data fields.")}
7411
7770
  `).action(async (id, options) => {
7412
7771
  const isJson = getJsonFlag(agents);
7413
7772
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7454,12 +7813,14 @@ ${chalk24.dim("CLI flags override --data fields.")}
7454
7813
  }
7455
7814
  });
7456
7815
  agents.command("delete").description("Delete an agent").argument("<id>", "Agent ID").option("--force", "Skip confirmation").addHelpText("after", `
7816
+ ${AGENTS_WIP_BANNER_SHORT}
7817
+
7457
7818
  Examples:
7458
- ${chalk24.dim("$")} mutagent agents delete <id>
7459
- ${chalk24.dim("$")} mutagent agents delete <id> --force
7460
- ${chalk24.dim("$")} mutagent agents delete <id> --force --json
7819
+ ${chalk25.dim("$")} mutagent agents delete <id>
7820
+ ${chalk25.dim("$")} mutagent agents delete <id> --force
7821
+ ${chalk25.dim("$")} mutagent agents delete <id> --force --json
7461
7822
 
7462
- ${chalk24.dim("Tip: Use --force to skip confirmation (required for non-interactive/CI usage).")}
7823
+ ${chalk25.dim("Tip: Use --force to skip confirmation (required for non-interactive/CI usage).")}
7463
7824
  `).action(async (id, options) => {
7464
7825
  const isJson = getJsonFlag(agents);
7465
7826
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7488,14 +7849,16 @@ ${chalk24.dim("Tip: Use --force to skip confirmation (required for non-interacti
7488
7849
 
7489
7850
  // src/commands/agents/index.ts
7490
7851
  function createAgentsCommand() {
7491
- const agents = new Command10("agents").description("Manage AI agents").addHelpText("after", `
7852
+ const agents = new Command10("agents").description("Manage AI agents (CRUD only — optimization & evaluation are WIP)").addHelpText("after", `
7853
+ ${AGENTS_WIP_BANNER}
7854
+
7492
7855
  Examples:
7493
- ${chalk25.dim("$")} mutagent agents list
7494
- ${chalk25.dim("$")} mutagent agents get <agent-id>
7495
- ${chalk25.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
7496
- ${chalk25.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are..."}'
7497
- ${chalk25.dim("$")} mutagent agents update <agent-id> --name "Updated Name"
7498
- ${chalk25.dim("$")} mutagent agents delete <agent-id> --force
7856
+ ${chalk26.dim("$")} mutagent agents list
7857
+ ${chalk26.dim("$")} mutagent agents get <agent-id>
7858
+ ${chalk26.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
7859
+ ${chalk26.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are..."}'
7860
+ ${chalk26.dim("$")} mutagent agents update <agent-id> --name "Updated Name"
7861
+ ${chalk26.dim("$")} mutagent agents delete <agent-id> --force
7499
7862
 
7500
7863
  Subcommands:
7501
7864
  list, get, create, update, delete
@@ -7507,22 +7870,22 @@ Subcommands:
7507
7870
  // src/commands/config.ts
7508
7871
  init_config();
7509
7872
  import { Command as Command11 } from "commander";
7510
- import chalk26 from "chalk";
7873
+ import chalk27 from "chalk";
7511
7874
  init_errors();
7512
7875
  init_sdk_client();
7513
7876
  var VALID_CONFIG_KEYS = ["apiKey", "endpoint", "format", "timeout", "defaultWorkspace", "defaultOrganization"];
7514
7877
  function createConfigCommand() {
7515
7878
  const config = new Command11("config").description("Manage CLI configuration").addHelpText("after", `
7516
7879
  Examples:
7517
- ${chalk26.dim("$")} mutagent config list
7518
- ${chalk26.dim("$")} mutagent config get endpoint
7519
- ${chalk26.dim("$")} mutagent config set workspace <workspace-id>
7520
- ${chalk26.dim("$")} mutagent config set org <org-id>
7880
+ ${chalk27.dim("$")} mutagent config list
7881
+ ${chalk27.dim("$")} mutagent config get endpoint
7882
+ ${chalk27.dim("$")} mutagent config set workspace <workspace-id>
7883
+ ${chalk27.dim("$")} mutagent config set org <org-id>
7521
7884
  `);
7522
7885
  config.command("list").description("List all configuration").addHelpText("after", `
7523
7886
  Examples:
7524
- ${chalk26.dim("$")} mutagent config list
7525
- ${chalk26.dim("$")} mutagent config list --json
7887
+ ${chalk27.dim("$")} mutagent config list
7888
+ ${chalk27.dim("$")} mutagent config list --json
7526
7889
  `).action(() => {
7527
7890
  const isJson = getJsonFlag(config);
7528
7891
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7535,11 +7898,11 @@ Examples:
7535
7898
  });
7536
7899
  config.command("get").description("Get configuration value").argument("<key>", "Configuration key (apiKey, endpoint, format, timeout, defaultWorkspace, defaultOrganization)").addHelpText("after", `
7537
7900
  Examples:
7538
- ${chalk26.dim("$")} mutagent config get endpoint
7539
- ${chalk26.dim("$")} mutagent config get defaultWorkspace
7540
- ${chalk26.dim("$")} mutagent config get apiKey --json
7901
+ ${chalk27.dim("$")} mutagent config get endpoint
7902
+ ${chalk27.dim("$")} mutagent config get defaultWorkspace
7903
+ ${chalk27.dim("$")} mutagent config get apiKey --json
7541
7904
 
7542
- ${chalk26.dim("Keys: apiKey, endpoint, format, timeout, defaultWorkspace, defaultOrganization")}
7905
+ ${chalk27.dim("Keys: apiKey, endpoint, format, timeout, defaultWorkspace, defaultOrganization")}
7543
7906
  `).action((key) => {
7544
7907
  const isJson = getJsonFlag(config);
7545
7908
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7566,9 +7929,9 @@ ${chalk26.dim("Keys: apiKey, endpoint, format, timeout, defaultWorkspace, defaul
7566
7929
  });
7567
7930
  set.command("workspace").description("Set default workspace ID").argument("<id>", "Workspace ID to set as default").addHelpText("after", `
7568
7931
  Examples:
7569
- ${chalk26.dim("$")} mutagent config set workspace <workspace-id>
7932
+ ${chalk27.dim("$")} mutagent config set workspace <workspace-id>
7570
7933
 
7571
- ${chalk26.dim("Persists workspace ID so you don't need to pass headers on every request.")}
7934
+ ${chalk27.dim("Persists workspace ID so you don't need to pass headers on every request.")}
7572
7935
  `).action((id) => {
7573
7936
  const isJson = getJsonFlag(config);
7574
7937
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7582,9 +7945,9 @@ ${chalk26.dim("Persists workspace ID so you don't need to pass headers on every
7582
7945
  });
7583
7946
  set.command("org").description("Set default organization ID").argument("<id>", "Organization ID to set as default").addHelpText("after", `
7584
7947
  Examples:
7585
- ${chalk26.dim("$")} mutagent config set org <org-id>
7948
+ ${chalk27.dim("$")} mutagent config set org <org-id>
7586
7949
 
7587
- ${chalk26.dim("Persists organization ID for org-scoped API keys.")}
7950
+ ${chalk27.dim("Persists organization ID for org-scoped API keys.")}
7588
7951
  `).action((id) => {
7589
7952
  const isJson = getJsonFlag(config);
7590
7953
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7603,7 +7966,7 @@ ${chalk26.dim("Persists organization ID for org-scoped API keys.")}
7603
7966
  // src/commands/playground.ts
7604
7967
  init_sdk_client();
7605
7968
  import { Command as Command12 } from "commander";
7606
- import chalk27 from "chalk";
7969
+ import chalk28 from "chalk";
7607
7970
  init_errors();
7608
7971
  function parseSSELine(line3) {
7609
7972
  if (!line3 || line3.startsWith(":")) {
@@ -7630,11 +7993,11 @@ function parsePromptStreamEvent(data) {
7630
7993
  function createPlaygroundCommand() {
7631
7994
  const playground = new Command12("playground").description("Execute and test prompts interactively").addHelpText("after", `
7632
7995
  Examples:
7633
- ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
7634
- ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
7635
- ${chalk27.dim("$")} mutagent playground run <prompt-id> -i '{}' --model gpt-4-turbo
7636
- ${chalk27.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
7637
- ${chalk27.dim("$")} mutagent playground run <prompt-id> --messages '[{"role":"user","content":"Hi"}]'
7996
+ ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
7997
+ ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
7998
+ ${chalk28.dim("$")} mutagent playground run <prompt-id> -i '{}' --model gpt-4-turbo
7999
+ ${chalk28.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
8000
+ ${chalk28.dim("$")} mutagent playground run <prompt-id> --messages '[{"role":"user","content":"Hi"}]'
7638
8001
 
7639
8002
  Input Format:
7640
8003
  The input must be a valid JSON object matching the prompt's input schema.
@@ -7650,16 +8013,16 @@ Streaming:
7650
8013
  `);
7651
8014
  playground.command("run").description("Execute a prompt with input variables").argument("<prompt-id>", "Prompt ID to execute (from: mutagent prompts list)").option("-i, --input <json>", "Input variables as JSON").option("-s, --stream", "Stream the response").option("-m, --model <model>", "Override model").option("--system <text>", "Set system prompt text").option("--human <text>", "Set human/user message text").option("--messages <json>", "Pass full messages array as JSON string").addHelpText("after", `
7652
8015
  Examples:
7653
- ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
7654
- ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
7655
- ${chalk27.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
7656
- ${chalk27.dim("$")} mutagent playground run <prompt-id> --input '{}' --model gpt-4-turbo --json
8016
+ ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
8017
+ ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
8018
+ ${chalk28.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
8019
+ ${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{}' --model gpt-4-turbo --json
7657
8020
 
7658
8021
  Input Methods (pick one, priority order):
7659
- --system/--human Quick system + user message ${chalk27.green("(recommended)")}
8022
+ --system/--human Quick system + user message ${chalk28.green("(recommended)")}
7660
8023
  --input '{"key":"value"}' Inline JSON variables
7661
8024
  --messages '[...]' Full messages array
7662
- ${chalk27.dim(`Hint: Test before evaluating: mutagent playground run <id> --input '{"key":"value"}'`)}
8025
+ ${chalk28.dim(`Hint: Test before evaluating: mutagent playground run <id> --input '{"key":"value"}'`)}
7663
8026
  `).action(async (promptId, options) => {
7664
8027
  const isJson = getJsonFlag(playground);
7665
8028
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7681,21 +8044,21 @@ ${chalk27.dim(`Hint: Test before evaluating: mutagent playground run <id> --inpu
7681
8044
  }
7682
8045
  });
7683
8046
  } else {
7684
- console.log(chalk27.bold(`
8047
+ console.log(chalk28.bold(`
7685
8048
  Execution Result:`));
7686
- console.log(chalk27.gray("─".repeat(50)));
7687
- console.log(chalk27.cyan("Output:"));
8049
+ console.log(chalk28.gray("─".repeat(50)));
8050
+ console.log(chalk28.cyan("Output:"));
7688
8051
  console.log(result.output);
7689
- console.log(chalk27.gray("─".repeat(50)));
7690
- console.log(chalk27.dim(`Model: ${result.model}`));
7691
- console.log(chalk27.dim(`Execution Time: ${String(result.executionTimeMs)}ms`));
8052
+ console.log(chalk28.gray("─".repeat(50)));
8053
+ console.log(chalk28.dim(`Model: ${result.model}`));
8054
+ console.log(chalk28.dim(`Execution Time: ${String(result.executionTimeMs)}ms`));
7692
8055
  if (result.tokens) {
7693
- console.log(chalk27.dim(`Tokens: ${String(result.tokens.prompt)} prompt + ${String(result.tokens.completion)} completion = ${String(result.tokens.total)} total`));
8056
+ console.log(chalk28.dim(`Tokens: ${String(result.tokens.prompt)} prompt + ${String(result.tokens.completion)} completion = ${String(result.tokens.total)} total`));
7694
8057
  }
7695
8058
  if (result.cost !== undefined) {
7696
- console.log(chalk27.dim(`Cost: $${result.cost.toFixed(6)}`));
8059
+ console.log(chalk28.dim(`Cost: $${result.cost.toFixed(6)}`));
7697
8060
  }
7698
- console.log(chalk27.dim(`Playground: ${playgroundLink(promptId)}`));
8061
+ console.log(chalk28.dim(`Playground: ${playgroundLink(promptId)}`));
7699
8062
  console.log();
7700
8063
  }
7701
8064
  }
@@ -7784,9 +8147,9 @@ async function executeStreaming(client, promptId, input, model, isJson, output)
7784
8147
  const decoder = new TextDecoder;
7785
8148
  let buffer = "";
7786
8149
  if (!isJson) {
7787
- console.log(chalk27.bold(`
8150
+ console.log(chalk28.bold(`
7788
8151
  Streaming Output:`));
7789
- console.log(chalk27.gray("─".repeat(50)));
8152
+ console.log(chalk28.gray("─".repeat(50)));
7790
8153
  }
7791
8154
  try {
7792
8155
  for (;; ) {
@@ -7825,15 +8188,15 @@ Streaming Output:`));
7825
8188
  console.log(JSON.stringify({ type: "complete", result: event.result }));
7826
8189
  } else {
7827
8190
  console.log();
7828
- console.log(chalk27.gray("─".repeat(50)));
8191
+ console.log(chalk28.gray("─".repeat(50)));
7829
8192
  if (event.result) {
7830
- console.log(chalk27.dim(`Model: ${event.result.model}`));
7831
- console.log(chalk27.dim(`Execution Time: ${String(event.result.executionTimeMs)}ms`));
8193
+ console.log(chalk28.dim(`Model: ${event.result.model}`));
8194
+ console.log(chalk28.dim(`Execution Time: ${String(event.result.executionTimeMs)}ms`));
7832
8195
  if (event.result.tokens) {
7833
- console.log(chalk27.dim(`Tokens: ${String(event.result.tokens.prompt)} prompt + ${String(event.result.tokens.completion)} completion = ${String(event.result.tokens.total)} total`));
8196
+ console.log(chalk28.dim(`Tokens: ${String(event.result.tokens.prompt)} prompt + ${String(event.result.tokens.completion)} completion = ${String(event.result.tokens.total)} total`));
7834
8197
  }
7835
8198
  if (event.result.cost !== undefined) {
7836
- console.log(chalk27.dim(`Cost: $${event.result.cost.toFixed(6)}`));
8199
+ console.log(chalk28.dim(`Cost: $${event.result.cost.toFixed(6)}`));
7837
8200
  }
7838
8201
  }
7839
8202
  console.log();
@@ -7858,13 +8221,13 @@ Streaming Output:`));
7858
8221
  // src/commands/workspaces.ts
7859
8222
  init_sdk_client();
7860
8223
  import { Command as Command13 } from "commander";
7861
- import chalk28 from "chalk";
8224
+ import chalk29 from "chalk";
7862
8225
  init_errors();
7863
8226
  function createWorkspacesCommand() {
7864
8227
  const workspaces = new Command13("workspaces").description("View workspaces (read-only)").addHelpText("after", `
7865
8228
  Examples:
7866
- ${chalk28.dim("$")} mutagent workspaces list
7867
- ${chalk28.dim("$")} mutagent workspaces get <workspace-id>
8229
+ ${chalk29.dim("$")} mutagent workspaces list
8230
+ ${chalk29.dim("$")} mutagent workspaces get <workspace-id>
7868
8231
 
7869
8232
  Subcommands:
7870
8233
  list, get
@@ -7873,8 +8236,8 @@ Note: Workspace management (create, update, delete) is available in the Admin Pa
7873
8236
  `);
7874
8237
  workspaces.command("list").description("List all workspaces").option("-l, --limit <n>", "Limit results", "50").option("-o, --offset <n>", "Offset for pagination").addHelpText("after", `
7875
8238
  Examples:
7876
- ${chalk28.dim("$")} mutagent workspaces list
7877
- ${chalk28.dim("$")} mutagent workspaces list --limit 10 --json
8239
+ ${chalk29.dim("$")} mutagent workspaces list
8240
+ ${chalk29.dim("$")} mutagent workspaces list --limit 10 --json
7878
8241
  `).action(async (options) => {
7879
8242
  const isJson = getJsonFlag(workspaces);
7880
8243
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7914,8 +8277,8 @@ Examples:
7914
8277
  });
7915
8278
  workspaces.command("get").description("Get workspace details").argument("<id>", "Workspace ID").addHelpText("after", `
7916
8279
  Examples:
7917
- ${chalk28.dim("$")} mutagent workspaces get <workspace-id>
7918
- ${chalk28.dim("$")} mutagent workspaces get <workspace-id> --json
8280
+ ${chalk29.dim("$")} mutagent workspaces get <workspace-id>
8281
+ ${chalk29.dim("$")} mutagent workspaces get <workspace-id> --json
7919
8282
  `).action(async (id) => {
7920
8283
  const isJson = getJsonFlag(workspaces);
7921
8284
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -7948,12 +8311,12 @@ Examples:
7948
8311
  // src/commands/providers/index.ts
7949
8312
  init_sdk_client();
7950
8313
  import { Command as Command14 } from "commander";
7951
- import chalk32 from "chalk";
8314
+ import chalk33 from "chalk";
7952
8315
  init_errors();
7953
8316
 
7954
8317
  // src/commands/providers/add.ts
7955
8318
  init_sdk_client();
7956
- import chalk29 from "chalk";
8319
+ import chalk30 from "chalk";
7957
8320
  init_errors();
7958
8321
  function resolveScope(scopeFlag, client) {
7959
8322
  const scope = scopeFlag ?? "workspace";
@@ -7976,10 +8339,10 @@ function resolveScope(scopeFlag, client) {
7976
8339
  function registerAddCommand(parent) {
7977
8340
  parent.command("add").description("Add a new provider configuration").requiredOption("-p, --provider <type>", "Provider type (openai, anthropic, google, ...)").requiredOption("-n, --name <name>", "Display name for this provider").requiredOption("-k, --api-key <key>", "API key for the provider").option("-s, --scope <scope>", "Scope: workspace (default), org, user", "workspace").option("--base-url <url>", "Custom base URL for the provider API").option("--set-default", "Set as default provider for this scope").addHelpText("after", `
7978
8341
  Examples:
7979
- ${chalk29.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $OPENAI_API_KEY
7980
- ${chalk29.dim("$")} mutagent providers add --provider anthropic --name "Team Claude" --api-key $KEY --scope org
7981
- ${chalk29.dim("$")} mutagent providers add --provider openai --name "Personal" --api-key $KEY --scope user --set-default
7982
- ${chalk29.dim("$")} mutagent providers add --provider custom --name "Ollama" --api-key none --base-url http://localhost:11434 --json
8342
+ ${chalk30.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $OPENAI_API_KEY
8343
+ ${chalk30.dim("$")} mutagent providers add --provider anthropic --name "Team Claude" --api-key $KEY --scope org
8344
+ ${chalk30.dim("$")} mutagent providers add --provider openai --name "Personal" --api-key $KEY --scope user --set-default
8345
+ ${chalk30.dim("$")} mutagent providers add --provider custom --name "Ollama" --api-key none --base-url http://localhost:11434 --json
7983
8346
 
7984
8347
  Scope Resolution:
7985
8348
  workspace (default) Uses your configured workspace
@@ -8025,10 +8388,10 @@ The API key is encrypted server-side and never returned in plain text.
8025
8388
  console.log(` Scope: ${options.scope ?? "workspace"}`);
8026
8389
  console.log(` URL: ${providerLink(created.id)}`);
8027
8390
  if (options.setDefault) {
8028
- console.log(chalk29.green(" Set as default provider"));
8391
+ console.log(chalk30.green(" Set as default provider"));
8029
8392
  }
8030
8393
  console.log("");
8031
- console.log(chalk29.dim("API key is encrypted server-side. Use `providers get` to see masked key."));
8394
+ console.log(chalk30.dim("API key is encrypted server-side. Use `providers get` to see masked key."));
8032
8395
  }
8033
8396
  } catch (error) {
8034
8397
  handleError(error, isJson);
@@ -8066,15 +8429,15 @@ function buildProviderCreatedDirective(provider, scope) {
8066
8429
 
8067
8430
  // src/commands/providers/update.ts
8068
8431
  init_sdk_client();
8069
- import chalk30 from "chalk";
8432
+ import chalk31 from "chalk";
8070
8433
  init_errors();
8071
8434
  function registerUpdateCommand(parent) {
8072
8435
  parent.command("update").description("Update an existing provider configuration").argument("<id>", "Provider ID (from: mutagent providers list)").option("-n, --name <name>", "Updated display name").option("-k, --api-key <key>", "Updated API key (will be re-encrypted)").option("--active <bool>", "Activate or deactivate (true|false)").option("--set-default", "Set as default provider for its scope").option("--base-url <url>", 'Updated base URL (use "" to clear)').addHelpText("after", `
8073
8436
  Examples:
8074
- ${chalk30.dim("$")} mutagent providers update <id> --name "New Name"
8075
- ${chalk30.dim("$")} mutagent providers update <id> --api-key $NEW_KEY --json
8076
- ${chalk30.dim("$")} mutagent providers update <id> --active false
8077
- ${chalk30.dim("$")} mutagent providers update <id> --set-default --json
8437
+ ${chalk31.dim("$")} mutagent providers update <id> --name "New Name"
8438
+ ${chalk31.dim("$")} mutagent providers update <id> --api-key $NEW_KEY --json
8439
+ ${chalk31.dim("$")} mutagent providers update <id> --active false
8440
+ ${chalk31.dim("$")} mutagent providers update <id> --set-default --json
8078
8441
 
8079
8442
  PATCH semantics — only provided fields are updated.
8080
8443
  `).action(async (id, options) => {
@@ -8125,7 +8488,7 @@ PATCH semantics — only provided fields are updated.
8125
8488
  console.log(` ID: ${String(updated.id ?? id)}`);
8126
8489
  console.log(` URL: ${providerLink(updated.id ?? id)}`);
8127
8490
  if (options.apiKey) {
8128
- console.log(chalk30.dim(" API key re-encrypted server-side."));
8491
+ console.log(chalk31.dim(" API key re-encrypted server-side."));
8129
8492
  }
8130
8493
  }
8131
8494
  } catch (error) {
@@ -8164,17 +8527,17 @@ function buildProviderUpdatedDirective(provider, requestId) {
8164
8527
 
8165
8528
  // src/commands/providers/delete.ts
8166
8529
  init_sdk_client();
8167
- import chalk31 from "chalk";
8530
+ import chalk32 from "chalk";
8168
8531
  init_errors();
8169
8532
  function registerDeleteCommand(parent) {
8170
8533
  parent.command("delete").description("Delete a provider configuration").argument("<id>", "Provider ID (from: mutagent providers list)").option("-f, --force", "Skip confirmation prompt").addHelpText("after", `
8171
8534
  Examples:
8172
- ${chalk31.dim("$")} mutagent providers delete <id>
8173
- ${chalk31.dim("$")} mutagent providers delete <id> --force
8174
- ${chalk31.dim("$")} mutagent providers delete <id> --force --json
8535
+ ${chalk32.dim("$")} mutagent providers delete <id>
8536
+ ${chalk32.dim("$")} mutagent providers delete <id> --force
8537
+ ${chalk32.dim("$")} mutagent providers delete <id> --force --json
8175
8538
 
8176
- ${chalk31.dim("Note: --force is required. The CLI is non-interactive — confirm with the user via your native flow, then pass --force.")}
8177
- ${chalk31.dim("Warning: API keys are AES-256-GCM encrypted and irrecoverable after deletion. Agents referencing this provider will lose their model config.")}
8539
+ ${chalk32.dim("Note: --force is required. The CLI is non-interactive — confirm with the user via your native flow, then pass --force.")}
8540
+ ${chalk32.dim("Warning: API keys are AES-256-GCM encrypted and irrecoverable after deletion. Agents referencing this provider will lose their model config.")}
8178
8541
  `).action(async (id, options) => {
8179
8542
  const isJson = getJsonFlag(parent);
8180
8543
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8264,12 +8627,12 @@ function validateProviderType(type) {
8264
8627
  function createProvidersCommand() {
8265
8628
  const providers = new Command14("providers").description("Manage LLM provider configurations (BYOK)").addHelpText("after", `
8266
8629
  Examples:
8267
- ${chalk32.dim("$")} mutagent providers list
8268
- ${chalk32.dim("$")} mutagent providers get <provider-id>
8269
- ${chalk32.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $KEY
8270
- ${chalk32.dim("$")} mutagent providers update <id> --name "New Name"
8271
- ${chalk32.dim("$")} mutagent providers delete <id> --force
8272
- ${chalk32.dim("$")} mutagent providers test <provider-id>
8630
+ ${chalk33.dim("$")} mutagent providers list
8631
+ ${chalk33.dim("$")} mutagent providers get <provider-id>
8632
+ ${chalk33.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $KEY
8633
+ ${chalk33.dim("$")} mutagent providers update <id> --name "New Name"
8634
+ ${chalk33.dim("$")} mutagent providers delete <id> --force
8635
+ ${chalk33.dim("$")} mutagent providers test <provider-id>
8273
8636
 
8274
8637
  Provider Types:
8275
8638
  openai, anthropic, google, azure, bedrock, cohere, mistral, groq, together, replicate, custom
@@ -8279,9 +8642,9 @@ Subcommands:
8279
8642
  `);
8280
8643
  providers.command("list").description("List all providers").option("-l, --limit <n>", "Limit results", "50").option("-o, --offset <n>", "Offset for pagination").option("-t, --type <type>", "Filter by provider type").addHelpText("after", `
8281
8644
  Examples:
8282
- ${chalk32.dim("$")} mutagent providers list
8283
- ${chalk32.dim("$")} mutagent providers list --type openai
8284
- ${chalk32.dim("$")} mutagent providers list --json
8645
+ ${chalk33.dim("$")} mutagent providers list
8646
+ ${chalk33.dim("$")} mutagent providers list --type openai
8647
+ ${chalk33.dim("$")} mutagent providers list --json
8285
8648
  `).action(async (options) => {
8286
8649
  const isJson = getJsonFlag(providers);
8287
8650
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8330,8 +8693,8 @@ Examples:
8330
8693
  });
8331
8694
  providers.command("get").description("Get provider details").argument("<id>", "Provider ID (from: mutagent providers list)").addHelpText("after", `
8332
8695
  Examples:
8333
- ${chalk32.dim("$")} mutagent providers get <provider-id>
8334
- ${chalk32.dim("$")} mutagent providers get <provider-id> --json
8696
+ ${chalk33.dim("$")} mutagent providers get <provider-id>
8697
+ ${chalk33.dim("$")} mutagent providers get <provider-id> --json
8335
8698
  `).action(async (id) => {
8336
8699
  const isJson = getJsonFlag(providers);
8337
8700
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8360,10 +8723,10 @@ Examples:
8360
8723
  });
8361
8724
  providers.command("test").description("Test provider connectivity").argument("<id>", "Provider ID (from: mutagent providers list)").addHelpText("after", `
8362
8725
  Examples:
8363
- ${chalk32.dim("$")} mutagent providers test <provider-id>
8364
- ${chalk32.dim("$")} mutagent providers test <provider-id> --json
8726
+ ${chalk33.dim("$")} mutagent providers test <provider-id>
8727
+ ${chalk33.dim("$")} mutagent providers test <provider-id> --json
8365
8728
 
8366
- ${chalk32.dim("Tests connectivity and lists available models for the provider.")}
8729
+ ${chalk33.dim("Tests connectivity and lists available models for the provider.")}
8367
8730
  `).action(async (id) => {
8368
8731
  const isJson = getJsonFlag(providers);
8369
8732
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8378,9 +8741,9 @@ ${chalk32.dim("Tests connectivity and lists available models for the provider.")
8378
8741
  } else {
8379
8742
  if (result.success) {
8380
8743
  output.success(`Provider test passed (${String(result.responseTimeMs)}ms)`);
8381
- console.log(chalk32.green(`Message: ${result.message}`));
8744
+ console.log(chalk33.green(`Message: ${result.message}`));
8382
8745
  if (result.availableModels && result.availableModels.length > 0) {
8383
- console.log(chalk32.bold(`
8746
+ console.log(chalk33.bold(`
8384
8747
  Available Models:`));
8385
8748
  result.availableModels.forEach((model) => {
8386
8749
  console.log(` - ${model}`);
@@ -8389,7 +8752,7 @@ Available Models:`));
8389
8752
  } else {
8390
8753
  output.error(`Provider test failed: ${result.message}`);
8391
8754
  if (result.error) {
8392
- console.log(chalk32.red(`Error: ${result.error}`));
8755
+ console.log(chalk33.red(`Error: ${result.error}`));
8393
8756
  }
8394
8757
  }
8395
8758
  }
@@ -8407,7 +8770,7 @@ Available Models:`));
8407
8770
  init_config();
8408
8771
  import { Command as Command15 } from "commander";
8409
8772
  import inquirer2 from "inquirer";
8410
- import chalk33 from "chalk";
8773
+ import chalk34 from "chalk";
8411
8774
  import { existsSync as existsSync11, mkdirSync as mkdirSync3, writeFileSync as writeFileSync4 } from "fs";
8412
8775
  import { execSync as execSync3 } from "child_process";
8413
8776
  import { join as join6 } from "path";
@@ -8530,13 +8893,13 @@ function writeRcConfig(config, cwd = process.cwd()) {
8530
8893
  function createInitCommand() {
8531
8894
  const init = new Command15("init").description("Initialize MutagenT in your project").option("--non-interactive", "Skip interactive prompts (defaults to CLI-only mode)").addHelpText("after", `
8532
8895
  Examples:
8533
- ${chalk33.dim("$")} mutagent init # Interactive setup wizard
8534
- ${chalk33.dim("$")} mutagent init --non-interactive # CLI-only mode (no prompts)
8896
+ ${chalk34.dim("$")} mutagent init # Interactive setup wizard
8897
+ ${chalk34.dim("$")} mutagent init --non-interactive # CLI-only mode (no prompts)
8535
8898
 
8536
8899
  Modes:
8537
- ${chalk33.bold("Full scaffold")} Install SDK + integration package, create config, setup tracing
8538
- ${chalk33.bold("CLI-only")} Verify auth + create .mutagentrc.json with workspace/endpoint
8539
- ${chalk33.bold("Skip")} Exit without changes
8900
+ ${chalk34.bold("Full scaffold")} Install SDK + integration package, create config, setup tracing
8901
+ ${chalk34.bold("CLI-only")} Verify auth + create .mutagentrc.json with workspace/endpoint
8902
+ ${chalk34.bold("Skip")} Exit without changes
8540
8903
  `).action(async (options) => {
8541
8904
  const isJson = getJsonFlag(init);
8542
8905
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8808,23 +9171,36 @@ Modes:
8808
9171
 
8809
9172
  // src/commands/explore.ts
8810
9173
  import { Command as Command16 } from "commander";
8811
- import chalk34 from "chalk";
9174
+ import chalk35 from "chalk";
8812
9175
  import { resolve as resolve3 } from "path";
8813
9176
  init_errors();
8814
9177
  function createExploreCommand() {
8815
9178
  const explore = new Command16("explore").description("Scan codebase for prompts, datasets, and MutagenT markers").option("-p, --path <dir>", "Directory to scan", ".").option("--depth <n>", "Max directory depth", "10").option("--include <glob>", "Include file pattern", "**/*.{ts,js,py,tsx,jsx}").option("--exclude <dirs>", "Comma-separated directories to exclude", "node_modules,dist,.git,build,.next,__pycache__,venv,.venv").option("--markers-only", "Only find existing MutagenT markers").addHelpText("after", `
8816
9179
  Examples:
8817
- ${chalk34.dim("$")} mutagent explore
8818
- ${chalk34.dim("$")} mutagent explore --path ./src
8819
- ${chalk34.dim("$")} mutagent explore --include "**/*.{ts,py}" --depth 5
8820
- ${chalk34.dim("$")} mutagent explore --markers-only
8821
- ${chalk34.dim("$")} mutagent explore --json
9180
+ ${chalk35.dim("$")} mutagent explore
9181
+ ${chalk35.dim("$")} mutagent explore --path ./src
9182
+ ${chalk35.dim("$")} mutagent explore --include "**/*.{ts,py}" --depth 5
9183
+ ${chalk35.dim("$")} mutagent explore --markers-only
9184
+ ${chalk35.dim("$")} mutagent explore --json
8822
9185
 
8823
9186
  Detection modes:
8824
- ${chalk34.dim("Heuristic")} Template variables ({{var}}), prompt constants, schema definitions
8825
- ${chalk34.dim("Marker")} MutagenT:START/END comment markers from previous uploads
9187
+ ${chalk35.dim("Heuristic")} Template variables ({var} or {{var}}), prompt constants, schema definitions
9188
+ ${chalk35.dim("Marker")} MutagenT:START/END comment markers from previous uploads
9189
+ ${chalk35.dim("Agents")} Multi-turn / tool-calling patterns (informational — WIP)
9190
+
9191
+ ${chalk35.bold("Template variable delimiters (inferred per-file):")}
9192
+ ${chalk35.dim("single")} ${chalk35.cyan("{variable}")} MutagenT platform canonical; LangChain PromptTemplate
9193
+ ${chalk35.dim("double")} ${chalk35.cyan("{{variable}}")} Handlebars / Mustache / LiquidJS / Jinja2 / LangChain ChatPromptTemplate
9194
+
9195
+ ${chalk35.dim("Inference is per-file: majority of bracket style wins, tie goes to single")}
9196
+ ${chalk35.dim('(platform canonical). Fenced code blocks are stripped first; {"json": ...} ')}
9197
+ ${chalk35.dim("literals are skipped via JSON-key lookahead. See the mutagent-cli skill at ")}
9198
+ ${chalk35.dim("concepts/prompt-variables.md for the full contract.")}
8826
9199
 
8827
- ${chalk34.dim("Results are saved to .mutagent/mutation-context.md for use by other commands.")}
9200
+ ${chalk35.dim("With --json, each discovered prompt carries a ")}${chalk35.cyan("delimiter")}${chalk35.dim(" field:")}
9201
+ ${chalk35.dim('{ "file": "src/prompts/summarize.ts", "line": 12, "reason": "template-variable", "delimiter": "single" }')}
9202
+
9203
+ ${chalk35.dim("Results are saved to .mutagent/mutation-context.md for use by other commands.")}
8828
9204
  `).action((options) => {
8829
9205
  const isJson = getJsonFlag(explore);
8830
9206
  const output = new OutputFormatter(isJson ? "json" : "table");
@@ -8842,7 +9218,7 @@ ${chalk34.dim("Results are saved to .mutagent/mutation-context.md for use by oth
8842
9218
  markersOnly
8843
9219
  };
8844
9220
  if (!isJson) {
8845
- console.log(chalk34.cyan(`
9221
+ console.log(chalk35.cyan(`
8846
9222
  Scanning ${scanPath}...
8847
9223
  `));
8848
9224
  }
@@ -8860,52 +9236,78 @@ Scanning ${scanPath}...
8860
9236
  prompts: result.prompts,
8861
9237
  datasets: result.datasets,
8862
9238
  markers: result.markers,
9239
+ agents: result.agents,
9240
+ agentsWip: result.agents.length > 0 ? {
9241
+ status: "early-access",
9242
+ message: "Agent detection is informational only. Agent optimization & evaluation are WIP.",
9243
+ earlyAccessUrl: "https://www.mutagent.io/agents-partnership"
9244
+ } : undefined,
8863
9245
  summary: {
8864
9246
  totalPrompts: result.prompts.length,
8865
9247
  totalDatasets: result.datasets.length,
8866
- totalMarkers: result.markers.length
9248
+ totalMarkers: result.markers.length,
9249
+ totalAgents: result.agents.length
8867
9250
  },
8868
9251
  contextFile: ".mutagent/mutation-context.md"
8869
9252
  });
8870
9253
  } else {
8871
- const totalFindings = result.prompts.length + result.datasets.length + result.markers.length;
9254
+ const totalFindings = result.prompts.length + result.datasets.length + result.markers.length + result.agents.length;
8872
9255
  if (totalFindings === 0) {
8873
9256
  output.info("No prompts, datasets, or markers found.");
8874
- console.log(chalk34.dim(`
9257
+ console.log(chalk35.dim(`
8875
9258
  Tip: Create a prompt with template variables like {{input}} to get started.`));
8876
9259
  return;
8877
9260
  }
8878
9261
  if (result.prompts.length > 0) {
8879
- console.log(chalk34.bold(` Prompts Found (${String(result.prompts.length)}):`));
9262
+ console.log(chalk35.bold(` Prompts Found (${String(result.prompts.length)}):`));
8880
9263
  console.log();
8881
9264
  for (const p of result.prompts) {
8882
- const confidenceTag = p.confidence === "high" ? chalk34.green("[high]") : p.confidence === "medium" ? chalk34.yellow("[medium]") : chalk34.dim("[low]");
8883
- const reasonTag = chalk34.dim(`[${p.reason}]`);
8884
- console.log(` ${confidenceTag} ${chalk34.green(p.file)}:${chalk34.yellow(String(p.line))} ${reasonTag}`);
8885
- console.log(` ${chalk34.dim(p.preview)}`);
9265
+ const confidenceTag = p.confidence === "high" ? chalk35.green("[high]") : p.confidence === "medium" ? chalk35.yellow("[medium]") : chalk35.dim("[low]");
9266
+ const reasonTag = chalk35.dim(`[${p.reason}]`);
9267
+ console.log(` ${confidenceTag} ${chalk35.green(p.file)}:${chalk35.yellow(String(p.line))} ${reasonTag}`);
9268
+ console.log(` ${chalk35.dim(p.preview)}`);
8886
9269
  }
8887
9270
  console.log();
8888
9271
  }
8889
9272
  if (result.datasets.length > 0) {
8890
- console.log(chalk34.bold(` Datasets Found (${String(result.datasets.length)}):`));
9273
+ console.log(chalk35.bold(` Datasets Found (${String(result.datasets.length)}):`));
8891
9274
  console.log();
8892
9275
  for (const d of result.datasets) {
8893
- console.log(` ${chalk34.green(d.file)} ${chalk34.dim(`(${String(d.items)} items)`)}`);
9276
+ console.log(` ${chalk35.green(d.file)} ${chalk35.dim(`(${String(d.items)} items)`)}`);
8894
9277
  }
8895
9278
  console.log();
8896
9279
  }
8897
9280
  if (result.markers.length > 0) {
8898
- console.log(chalk34.bold(` MutagenT Markers (${String(result.markers.length)}):`));
9281
+ console.log(chalk35.bold(` MutagenT Markers (${String(result.markers.length)}):`));
8899
9282
  console.log();
8900
9283
  for (const m of result.markers) {
8901
- const idPart = m.platformId ? chalk34.cyan(` id=${m.platformId}`) : "";
8902
- console.log(` ${chalk34.green(m.file)}:${chalk34.yellow(String(m.line))} ${chalk34.magenta(m.type)}${idPart}`);
9284
+ const idPart = m.platformId ? chalk35.cyan(` id=${m.platformId}`) : "";
9285
+ console.log(` ${chalk35.green(m.file)}:${chalk35.yellow(String(m.line))} ${chalk35.magenta(m.type)}${idPart}`);
9286
+ }
9287
+ console.log();
9288
+ }
9289
+ if (result.agents.length > 0) {
9290
+ console.log(chalk35.bold(` Agents Detected (${String(result.agents.length)}):`));
9291
+ console.log();
9292
+ for (const a of result.agents) {
9293
+ const confidenceTag = a.confidence === "high" ? chalk35.green("[high]") : a.confidence === "medium" ? chalk35.yellow("[medium]") : chalk35.dim("[low]");
9294
+ const reasonTag = chalk35.dim(`[${a.reason}]`);
9295
+ console.log(` ${confidenceTag} ${chalk35.green(a.file)}:${chalk35.yellow(String(a.line))} ${reasonTag}`);
9296
+ console.log(` ${chalk35.dim(a.preview)}`);
8903
9297
  }
8904
9298
  console.log();
9299
+ console.log(chalk35.yellow(" ┌─ Agents: Early Access / WIP ──────────────────────────────"));
9300
+ console.log(chalk35.yellow(" │ ") + chalk35.bold("Agent optimization & evaluation are actively in development."));
9301
+ console.log(chalk35.yellow(" │ ") + chalk35.dim("Detected patterns above are informational only."));
9302
+ console.log(chalk35.yellow(" │ ") + chalk35.dim("The Prompt path (single-shot + output schema) is production-ready."));
9303
+ console.log(chalk35.yellow(" │ ") + chalk35.dim("Join early access for the Agent path:"));
9304
+ console.log(chalk35.yellow(" │ ") + chalk35.cyan("https://www.mutagent.io/agents-partnership"));
9305
+ console.log(chalk35.yellow(" └────────────────────────────────────────────────────────────"));
9306
+ console.log();
8905
9307
  }
8906
- console.log(chalk34.dim(" ─────────────────────────────────"));
8907
- console.log(` ${chalk34.bold("Summary:")} ${String(result.prompts.length)} prompts, ${String(result.datasets.length)} datasets, ${String(result.markers.length)} markers`);
8908
- console.log(chalk34.dim(` Saved to .mutagent/mutation-context.md`));
9308
+ console.log(chalk35.dim(" ─────────────────────────────────"));
9309
+ console.log(` ${chalk35.bold("Summary:")} ${String(result.prompts.length)} prompts, ${String(result.datasets.length)} datasets, ${String(result.markers.length)} markers, ${String(result.agents.length)} agents`);
9310
+ console.log(chalk35.dim(` Saved to .mutagent/mutation-context.md`));
8909
9311
  console.log();
8910
9312
  }
8911
9313
  } catch (error) {
@@ -8917,181 +9319,1180 @@ Scanning ${scanPath}...
8917
9319
 
8918
9320
  // src/commands/skills.ts
8919
9321
  import { Command as Command17 } from "commander";
8920
- import chalk35 from "chalk";
9322
+ import chalk36 from "chalk";
8921
9323
  import { existsSync as existsSync12, mkdirSync as mkdirSync4, writeFileSync as writeFileSync5 } from "fs";
8922
- import { join as join7 } from "path";
9324
+ import { dirname, join as join7 } from "path";
8923
9325
  import { execSync as execSync4 } from "child_process";
8924
- function findRepoRoot() {
8925
- try {
8926
- return execSync4("git rev-parse --show-toplevel", { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
8927
- } catch {
8928
- return process.cwd();
8929
- }
8930
- }
8931
- var SKILL_FRONTMATTER = `---
9326
+
9327
+ // src/generated/skill-content.ts
9328
+ var SKILL_FILES = {
9329
+ "SKILL.md": `---
8932
9330
  name: mutagent-cli
8933
9331
  description: |
8934
9332
  MutagenT CLI - AI Prompt Optimization Platform CLI.
8935
9333
  Guides coding agents through prompt upload, evaluation creation,
8936
9334
  dataset curation, optimization, and framework integration.
8937
9335
  Triggers: "mutagent", "optimize prompt", "upload prompt", "integrate tracing",
8938
- "create evaluation", "upload dataset", "mutagent cli".
8939
- ---`;
8940
- var SKILL_BODY = `# MutagenT CLI Skill
8941
-
8942
- ## First Things First
8943
- Run \`mutagent --help\` — this is the CENTRAL navigation hub containing ALL commands, workflows, and rules.
8944
- Run \`mutagent <command> --help\` BEFORE using ANY command for the first time.
8945
-
8946
- ## Agent Protocol (4 Rules)
8947
- 1. EVERY command MUST include \`--json\`
8948
- 2. Run \`<command> --help\` BEFORE first use
8949
- 3. **MANDATORY**: Parse \`_directive.renderedCard\` and copy it into your CHAT RESPONSE verbatim. The card appears in bash output but that is NOT sufficient — the user cannot read collapsed bash blocks. HARD STOP: do NOT run any further commands until the card text is rendered in your chat message.
8950
- 4. ALL user interaction via AskUserQuestion — CLI is non-interactive
8951
-
8952
- ## Directive System
8953
- Every JSON response may include:
8954
- - \`_directive.renderedCard\` — Pre-formatted card that MUST be shown to the user verbatim
8955
- - \`_directive.instruction\`Next step for the agent
8956
- - \`_directive.next\` — Array of suggested commands
8957
- - \`_links\` — Dashboard/API URLs (format as markdown)
8958
-
8959
- The agent MUST parse and act on directives. Cards in collapsed bash output are invisible to the user always render them in chat.
8960
-
8961
- ## Prompt Variables
8962
- Use \`{single_braces}\`: \`"Analyze: {document}"\`
8963
-
8964
- ## Post-Onboarding Decision Tree
8965
-
8966
- After \`mutagent auth login\`, the user lands in one of 3 paths:
8967
-
8968
- ### Path A: Guided Integration (Tracing)
8969
- 1. \`mutagent explore\`Map codebase, discover prompts/agents
8970
- 2. User selects framework from detected or asks
8971
- 3. \`mutagent integrate <framework>\` Get integration instructions
8972
- 4. Apply tracing code to user's codebase
8973
- 5. Verify with test run
8974
-
8975
- ### Path B: Free-flow Optimization
8976
- 1. \`mutagent explore\` Map codebase, discover prompts
8977
- 2. User selects prompts to upload
8978
- 3. \`mutagent prompts create --data '{...}'\` — Upload with REQUIRED outputSchema
8979
- 4. Guided eval creator OR \`mutagent prompts evaluation create\`
8980
- 5. \`mutagent prompts dataset add\` Upload/curate datasets (named)
8981
- 6. \`mutagent usage --json\` View resource counts
8982
- 7. \`mutagent prompts optimize start\` — Run optimization
8983
- 8. Review scorecard → Apply/Reject optimized prompt
8984
-
8985
- ### Path C: Manual
8986
- User uses CLI commands directly. Run \`mutagent --help\`.
8987
-
8988
- ## Guided Evaluation Creation
8989
- Use \`--guided --json\` when creating evaluations (NEVER \`--guided\` alone it launches interactive prompts that agents can't use):
8990
- \\\`\\\`\\\`
8991
- mutagent prompts evaluation create <id> --guided --json
8992
- \\\`\\\`\\\`
8993
-
8994
- **When to use guided mode:**
8995
- The optimizer requires each criterion to have \`name\`, \`description\` (scoring rubric), and \`evaluationParameter\` (a field from the prompt's inputSchema or outputSchema). If the user's existing evaluation format doesn't match this shape — e.g. they have generic rubrics without per-field targeting, or criteria that don't map 1:1 to schema fields — use \`--guided\` to:
8996
- 1. Fetch the prompt's inputSchema + outputSchema fields
8997
- 2. Show which fields need coverage
8998
- 3. Return a criteria template with the exact JSON shape
8999
-
9000
- **Validation rules the CLI enforces:**
9001
- - Every criterion MUST have \`name\`, \`description\`, and \`evaluationParameter\`
9002
- - \`evaluationParameter\` MUST match an actual schema field name
9003
- - No duplicate \`evaluationParameter\` values each criterion targets a unique field
9004
- - ALL schema fields must be covered (missing fields = error)
9336
+ "create evaluation", "upload dataset", "explore prompts", "mutagent cli".
9337
+ ---
9338
+
9339
+ # MutagenT CLI Skill
9340
+
9341
+ > **Canonical source**: \`mutagent-cli/.claude/skills/mutagent-cli/SKILL.md\`
9342
+ > Packed into the CLI binary via \`scripts/sync-skill.ts\`. Installed to end-user
9343
+ > dev environments via \`mutagent skills install\`. Edit this file, not the installed copy.
9344
+
9345
+ ## SKILL vs CLI responsibility split
9346
+
9347
+ | Layer | Owner | Responsibility |
9348
+ |---|---|---|
9349
+ | **SKILL** (this file + subfiles) | here | journeys, routing, 5 rules, enforcement |
9350
+ | **CLI** | \`mutagent <cmd>\` | commands, flags, \`--json\`, \`_directive.*\`, \`_links\` |
9351
+ | Platform | api.mutagent.io | storage, optimization, eval execution, \`{variable}\` rendering |
9352
+
9353
+ **Rule**: SKILL never duplicates CLI flag lists always \`mutagent <cmd> --help\` for flags.
9354
+
9355
+ ---
9356
+
9357
+ ## 5 Core RulesNON-NEGOTIABLE
9358
+
9359
+ 1. **\`--json\` on EVERY command.** No exceptions. Agents use JSON mode exclusively.
9360
+ 2. **\`<command> --help\` BEFORE first use of any command.** The CLI is the source of truth for flags — this SKILL never inlines them.
9361
+ 3. **NEVER auto-generate eval criteria — collect from user.** Ask the user for each rubric field. See [concepts/eval-criteria.md](./concepts/eval-criteria.md) for the 3-tier format.
9362
+ 4. **Explore-before-modify.** Run \`mutagent explore --json\` before any write operation. Present findings, get user confirmation. Never mutate without discovery first.
9363
+ 5. **Cost transparency before \`optimize start\`.** Run \`mutagent usage --json\` and show the result to the user. Get explicit confirmation before any optimization job.
9364
+
9365
+ ---
9366
+
9367
+ ## Journey Routerroute by user intent
9368
+
9369
+ Match the user's first request. Load ONLY the matching subfile. Do NOT preload the whole set.
9370
+
9371
+ | User said / signal detected | Load subfile | Why |
9372
+ |---|---|---|
9373
+ | "trace", "observe", "integrate", "add framework" | [workflows/tracing.md](./workflows/tracing.md) | Non-destructive, fastest first-value path |
9374
+ | "optimize", "improve", "tune", "evaluate", "upload prompt" | [workflows/optimization.md](./workflows/optimization.md) | Full create→dataset→eval→optimize loop |
9375
+ | "explore", "scan", "find prompts", "what prompts", "discover" | [workflows/exploration.md](./workflows/exploration.md) | Read-only discovery + taxonomy |
9376
+ | \`AgentExecutor\`, \`StateGraph\`, \`createReactAgent\`, \`tool_calls\`, \`@tool\`, \`langgraph\`, \`crewai\`, \`autogen\`, \`openai/agents\`, multi-turn | [workflows/agents.md](./workflows/agents.md) | WIP path surface partnership link |
9377
+ | "how do variables work", "single vs double braces", delimiter | [concepts/prompt-variables.md](./concepts/prompt-variables.md) | Delimiter inference contract |
9378
+ | "what makes a good eval", "rubric", "evaluation criteria" | [concepts/eval-criteria.md](./concepts/eval-criteria.md) | INPUT MVC + OUTPUT Standards |
9379
+ | Unclear / first time | run \`mutagent explore --json\` first, then reroute | Discovery before action |
9380
+
9381
+ ---
9382
+
9383
+ ## Subfile Map
9384
+
9385
+ | File | WHEN to load | WHY | ENFORCEMENT |
9386
+ |---|---|---|---|
9387
+ | [workflows/tracing.md](./workflows/tracing.md) | User wants to add framework tracing / observability | Non-destructive append-only integration sequence | Must run explore first (Rule 4) |
9388
+ | [workflows/optimization.md](./workflows/optimization.md) | User wants to optimize or evaluate a prompt | Full loop: explore → upload → dataset → eval → optimize → apply | Must check usage before optimize (Rule 5); must collect rubrics from user (Rule 3) |
9389
+ | [workflows/exploration.md](./workflows/exploration.md) | User wants to scan codebase, identify prompts vs agents | Read-only discovery; output taxonomy to user | Run only; no writes |
9390
+ | [workflows/agents.md](./workflows/agents.md) | Multi-turn / tool-calling code detected | WIP — do NOT attempt optimizer, surface partnership link | Show WIP card to user verbatim |
9391
+ | [concepts/prompt-variables.md](./concepts/prompt-variables.md) | Any question about \`{var}\` vs \`{{var}}\`, delimiter inference | Brace convention + conversion rules | Load before \`prompts create\` in optimization workflow |
9392
+ | [concepts/eval-criteria.md](./concepts/eval-criteria.md) | Any question about rubric design, MVC, Output Standards | 3-tier rubric format — INPUT-param vs OUTPUT-param scope | Load before \`evaluation create --guided\` in optimization workflow |
9393
+
9394
+ ---
9395
+
9396
+ ## Output handling
9397
+
9398
+ After every CLI command:
9399
+ - **Show the command output to the user.** Command output appears in bash blocks that users may not see — always present the key results in your chat response.
9400
+ - **For evaluation create \`--guided\`**: the CLI provides per-field questions for each schema field. Ask the user each question in turn. Do not skip any field. Do not pre-fill answers.
9401
+ - **For \`optimize results\`**: present the before/after scorecard to the user and confirm whether to apply, view diff, or reject.
9402
+
9403
+ ---
9404
+
9405
+ ## Anti-patterns — NEVER do these
9406
+
9407
+ - Run any command without \`--json\`
9408
+ - Auto-generate eval criteria — always collect from the user
9409
+ - Skip any schema field when collecting evaluation rubrics
9410
+ - Skip \`mutagent explore --json\` before any write operation
9411
+ - Run \`optimize start\` without first showing \`usage --json\` to the user
9412
+ - Increase \`--max-iterations\` above 1 without explicit user consent (each iteration = LLM spend)
9413
+ - Run a multi-turn agent through the prompt optimizer
9414
+ - Skip showing command output results to the user
9415
+ - Inline CLI flags from memory — always read \`--help\` first
9416
+
9417
+ ---
9005
9418
 
9006
9419
  ## State Tracking
9007
- - \`.mutagent/mutation-context.md\` — Codebase index of discovered/uploaded prompts
9008
- - Check and update this file after explore, create, and dataset operations
9009
- - \`mutagent auth status\` — Auth + workspace state
9010
- - Comment markers (\`// MutagenT:START ... // MutagenT:END\`) in source files
9011
-
9012
- ## Post-Optimization
9013
- After \`optimize results\`: ALWAYS show the before/after diff to the user first. Then offer choices via AskUserQuestion: Apply / Reject.
9014
-
9015
- ## Evaluation Criteria Reminder
9016
- Every evaluation MUST specify criteria targeting either:
9017
- - Input variable fields (from inputSchema)
9018
- - Output fields (from outputSchema / structured output)
9019
-
9020
- ## Optimization Cost Control
9021
- - Default max-iterations is 1. NEVER increase without explicit user request.
9022
- - Each iteration incurs LLM costs. Starting >1 iterations without consent = protocol violation.
9023
- - If user wants more, confirm the number via AskUserQuestion first.
9024
-
9025
- ## Claude Code Hooks (Session Telemetry)
9026
- Optional — sends session activity to MutagenT traces API for observability.
9027
-
9028
- Install via \`.claude/settings.local.json\`:
9029
- \\\`\\\`\\\`json
9030
- {"hooks": {
9031
- "SessionStart": [{"matcher": "startup", "hooks": [{"type": "command", "command": "mutagent hooks claude-code session-start"}]}],
9032
- "Stop": [{"hooks": [{"type": "command", "command": "mutagent hooks claude-code session-end"}]}],
9033
- "PreToolUse": [{"hooks": [{"type": "command", "command": "mutagent hooks claude-code pre-tool-use"}]}],
9034
- "PostToolUse": [{"hooks": [{"type": "command", "command": "mutagent hooks claude-code post-tool-use"}]}]
9035
- }}
9036
- \\\`\\\`\\\`
9037
- Or run: \`mutagent init\` (option 1 installs skill + hooks together)`;
9038
- var SKILL_DIR = ".claude/skills/mutagent-cli";
9039
- var SKILL_FILE = "SKILL.md";
9040
- function createSkillsCommand() {
9041
- const skills = new Command17("skills").description("Manage MutagenT CLI skills for coding agents");
9042
- skills.command("install").description("Install MutagenT CLI skill for Claude Code").addHelpText("after", `
9043
- Examples:
9044
- ${chalk35.dim("$")} mutagent skills install
9045
9420
 
9046
- This creates a Claude Code skill at .claude/skills/mutagent-cli/SKILL.md
9047
- that teaches coding agents how to use the MutagenT CLI effectively.
9048
- `).action((_options, cmd) => {
9049
- const parentCmd = cmd.parent?.parent;
9050
- const isJson = parentCmd ? getJsonFlag(parentCmd) : false;
9051
- const output = new OutputFormatter(isJson ? "json" : "table");
9052
- const repoRoot = findRepoRoot();
9053
- const skillDir = join7(repoRoot, SKILL_DIR);
9054
- const skillPath = join7(skillDir, SKILL_FILE);
9055
- if (!existsSync12(skillDir)) {
9056
- mkdirSync4(skillDir, { recursive: true });
9057
- }
9058
- const content = `${SKILL_FRONTMATTER}
9421
+ - \`.mutagent/mutation-context.md\` codebase index of discovered/uploaded prompts. Update after explore, create, dataset ops.
9422
+ - \`mutagent auth status --json\` auth + workspace state.
9059
9423
 
9060
- ${SKILL_BODY}
9061
- `;
9062
- writeFileSync5(skillPath, content, "utf-8");
9063
- if (isJson) {
9064
- output.output({
9065
- installed: true,
9066
- path: skillPath,
9067
- name: "mutagent-cli"
9068
- });
9069
- } else {
9070
- output.success(`Installed MutagenT CLI skill`);
9071
- console.log(` ${chalk35.dim("Path:")} ${skillPath}`);
9072
- console.log("");
9073
- console.log(` ${chalk35.dim("This skill teaches coding agents how to use the MutagenT CLI.")}`);
9074
- console.log(` ${chalk35.dim("It will be automatically loaded by Claude Code when relevant triggers match.")}`);
9075
- }
9076
- });
9077
- return skills;
9078
- }
9424
+ ---
9079
9425
 
9080
- // src/commands/usage.ts
9081
- init_config();
9082
- import { Command as Command18 } from "commander";
9083
- import chalk36 from "chalk";
9084
- init_errors();
9085
- init_sdk_client();
9086
- var PROVIDERS_URL = "https://app.mutagent.io/settings/providers";
9087
- function createUsageCommand() {
9088
- const usage = new Command18("usage").description("Show resource counts (prompts, datasets, evaluations, optimizations, experiments)").addHelpText("after", `
9089
- Examples:
9090
- ${chalk36.dim("$")} mutagent usage
9091
- ${chalk36.dim("$")} mutagent usage --json
9092
- `);
9093
- usage.action(async () => {
9094
- const isJson = getJsonFlag(usage);
9426
+ ## Login (two paths)
9427
+
9428
+ - **CI / automated**: \`export MUTAGENT_API_KEY=mt_... && mutagent login --json\` — no browser, no prompts.
9429
+ - **Onboarding a user**: \`mutagent login --browser --json\` — CLI prints auth URL to stdout, polls 5 min. **Surface the URL verbatim to the user.** \`--non-interactive\` is NOT needed when \`--browser\` is set.
9430
+
9431
+ \`mutagent login\` is canonical. \`mutagent auth login\` is a back-compat alias. Both delegate to \`lib/auth-flow.ts\`. Decision record: [cli-design-principles.md](../../docs/cli-design-principles.md) → Login Unification.
9432
+
9433
+ ---
9434
+
9435
+ ## Extensibility
9436
+
9437
+ Add \`workflows/custom-<name>.md\` with frontmatter \`triggers: ["phrase"]\` auto-discovered by the decision tree fallback row. No rebuild needed.
9438
+ `,
9439
+ "concepts/eval-criteria.md": `---
9440
+ name: mutagent-cli-concepts-eval-criteria
9441
+ description: |
9442
+ Canonical source for MutagenT evaluation-criteria framing:
9443
+ INPUT-param criteria → Minimum Viable Context (MVC);
9444
+ OUTPUT-param criteria → Output Standards.
9445
+ 3-tier rubric discipline: grounded, observable, never vague.
9446
+ Includes current platform validation rules for criterion shape.
9447
+ Mirrored in mutagent/src/modules/prompts/prompt-evaluations/README.md.
9448
+ triggers:
9449
+ - "evaluation criteria"
9450
+ - "eval criteria"
9451
+ - "rubric"
9452
+ - "how do I evaluate"
9453
+ - "minimum viable context"
9454
+ - "output standards"
9455
+ - "evaluationParameter"
9456
+ - "guided eval"
9457
+ ---
9458
+
9459
+ # Concept — Guided Evaluation Criteria
9460
+
9461
+ > **Canonical source** for the INPUT vs OUTPUT framing. Mirrored in:
9462
+ > - \`mutagent-cli/src/commands/prompts/evaluation/guided-workflow.ts\` — kept in sync.
9463
+ > - \`mutagent/src/modules/prompts/prompt-evaluations/README.md\` as BE-side
9464
+ > reference — mirror, not a fork.
9465
+
9466
+ ## The golden rule
9467
+
9468
+ **Every evaluation criterion is scoped to EITHER the inputs OR the outputs of
9469
+ the prompt. Never both in one criterion.**
9470
+
9471
+ Mixing them produces vague criteria ("is the output good given the inputs?")
9472
+ that the optimizer cannot act on. When collecting rubrics from the user,
9473
+ explicitly frame each question around one of these two scopes.
9474
+
9475
+ ---
9476
+
9477
+ ## NEVER auto-generate criteria
9478
+
9479
+ This is Rule 3 of the [5 Core Rules](../SKILL.md). Reasons:
9480
+
9481
+ - Auto-generated rubrics are vague by default ("score based on accuracy")
9482
+ - The optimizer cannot act on vague criteria — it needs observable tiers
9483
+ - The user knows what "correct" means for their domain; the agent does not
9484
+ - Generic rubrics produce noisy scores that mislead the optimizer
9485
+
9486
+ **Always use AskUserQuestion to collect rubrics, one per variable/dimension.**
9487
+
9488
+ When \`evaluation create --guided --json\` runs, the CLI provides a list of
9489
+ fields that need rubrics. Ask the user about EVERY field in that list.
9490
+ Do not skip any field. Do not pre-fill answers. The user must provide each rubric.
9491
+
9492
+ ---
9493
+
9494
+ ## Input-param criteria → Minimum Viable Context (MVC)
9495
+
9496
+ **Scope**: the \`{variables}\` the prompt template consumes. Each \`{variable}\` is
9497
+ an **input param**. The criterion asks: *is the information required for the
9498
+ prompt to succeed actually present?*
9499
+
9500
+ ### 3-tier completeness scale (grounded, never vague)
9501
+
9502
+ - \`1.0\` — all required context present, no ambiguity
9503
+ - \`0.5\` — some context present, enough to attempt but likely partial/hedged answer
9504
+ - \`0.0\` — critical context missing, prompt cannot succeed regardless of model
9505
+
9506
+ ### Discipline rules
9507
+
9508
+ - **Grounded**: every tier must describe an observable property of the input
9509
+ data, not a feeling.
9510
+ - **Observable**: a human reader should assign a tier by looking at a single
9511
+ input row — no model output needed.
9512
+ - **Per-variable**: one criterion per \`{variable}\`. This localizes optimizer signal.
9513
+
9514
+ ### Before asking the user
9515
+
9516
+ Enumerate variables using the delimiter inferred by \`mutagent explore --json\`:
9517
+ - \`delimiter: "single"\` → \`{foo}\` — platform canonical
9518
+ - \`delimiter: "double"\` → \`{{foo}}\` — framework template; convert before upload
9519
+
9520
+ See [concepts/prompt-variables.md](./prompt-variables.md) for the full inference contract.
9521
+
9522
+ ### Example
9523
+
9524
+ For \`"Summarize {document} for {audience}"\`:
9525
+
9526
+ \`\`\`json
9527
+ [
9528
+ {
9529
+ "name": "document-present",
9530
+ "evaluationParameter": "document",
9531
+ "description": "1.0 = actual prose text >= 100 chars. 0.5 = short snippet or summary stub. 0.0 = filename, null, TODO, or empty."
9532
+ },
9533
+ {
9534
+ "name": "audience-concrete",
9535
+ "evaluationParameter": "audience",
9536
+ "description": "1.0 = concrete persona (e.g. 'junior Python devs'). 0.5 = broad category ('engineers'). 0.0 = empty, 'general', 'everyone'."
9537
+ }
9538
+ ]
9539
+ \`\`\`
9540
+
9541
+ Note the 1:1 mapping: one criterion per variable, \`evaluationParameter\` equal
9542
+ to the variable name.
9543
+
9544
+ ---
9545
+
9546
+ ## Output-param criteria → Output Standards
9547
+
9548
+ **Scope**: the model's response shape and content. Pick the dimensions relevant
9549
+ to the task and write one criterion per dimension.
9550
+
9551
+ ### Common dimensions
9552
+
9553
+ - **Content correctness** — right answer, right facts, right tone
9554
+ - **Structural correctness** — matches \`outputSchema\`, required fields, enums, length
9555
+ - **Groundedness** — facts in the output traceable to facts in the input
9556
+ - **Format compliance** — JSON validity, markdown shape, regex match
9557
+
9558
+ ### 3-tier correctness scale
9559
+
9560
+ Concrete pass/fail per tier. **Never** write "0-1 scale" or "score based on
9561
+ accuracy" — those are vague and the optimizer can't act on them.
9562
+
9563
+ ### Example (JSON-output summarizer)
9564
+
9565
+ \`\`\`json
9566
+ {
9567
+ "name": "summary-correctness",
9568
+ "evaluationParameter": "summary",
9569
+ "description": "1.0 = valid JSON, all 3 required fields, covers all key arguments. 0.5 = valid JSON, 1-2 arguments missing/hedged. 0.0 = invalid JSON OR fabricated facts."
9570
+ }
9571
+ \`\`\`
9572
+
9573
+ The \`evaluationParameter\` here is the output-schema field name, not an input
9574
+ variable. Same 1:1 discipline — one criterion per output dimension.
9575
+
9576
+ ---
9577
+
9578
+ ## Platform validation rules (current)
9579
+
9580
+ When creating an evaluation via \`mutagent prompts evaluation create -d '<json>'\`,
9581
+ each criterion must pass these platform-enforced checks:
9582
+
9583
+ | Field | Required | Validation |
9584
+ |---|---|---|
9585
+ | \`name\` | yes | slug-like, no spaces, \`[a-z0-9-_]\` |
9586
+ | \`description\` | yes | non-empty, ideally >= 20 chars with tier definitions |
9587
+ | \`evaluationParameter\` | yes | must match a variable name from the prompt OR an output field name |
9588
+
9589
+ **Common validation failures:**
9590
+ - \`evaluationParameter\` references a variable not in the prompt template → rejected
9591
+ - \`description\` is too short or vague → accepted by platform but produces poor scores
9592
+ - Multiple criteria with the same \`evaluationParameter\` → accepted but wasteful
9593
+
9594
+ ---
9595
+
9596
+ ## How to apply when creating an evaluation
9597
+
9598
+ 1. **Read the prompt template.** Enumerate \`{variables}\` (input params) +
9599
+ expected output shape (from \`outputSchema\` or the code's parse logic).
9600
+ 2. **Ask the user**: "Evaluate INPUTS (is context sufficient) or OUTPUTS
9601
+ (is response correct) first?" — let the user pick the scope.
9602
+ 3. **Collect criteria**: use AskUserQuestion to collect from user, never auto-generate — one per variable (INPUT) or per dimension (OUTPUT),
9603
+ always with a 3-tier rubric describing observable behavior.
9604
+ 4. **Map to platform shape**:
9605
+ \`\`\`typescript
9606
+ {
9607
+ name: string; // short, slug-like
9608
+ description: string; // the 3-tier rubric verbatim
9609
+ evaluationParameter: string; // the variable name OR output field
9610
+ }
9611
+ \`\`\`
9612
+ 5. **Upload** via \`mutagent prompts evaluation create <id> -d '<json>' --json\`.
9613
+
9614
+ The \`--guided\` flag walks the user through this flow interactively — use it
9615
+ when the user is new to the concept. Follow the CLI's next-step guidance in
9616
+ the output to collect rubrics in the correct order.
9617
+
9618
+ ---
9619
+
9620
+ ## Anti-patterns
9621
+
9622
+ - **Auto-generating criteria** — Rule 3: NEVER. Always collect from user.
9623
+ - **Mixing input and output in one criterion** — breaks signal; split into two.
9624
+ - **Vague rubrics** — "0.8 if mostly good" → rewrite with observable tiers.
9625
+ - **One criterion for many variables** — reduces signal, slows optimization.
9626
+ - **Scoring the model, not the data** — MVC scores the INPUT data quality.
9627
+
9628
+ ---
9629
+
9630
+ ## Cross-references
9631
+
9632
+ - [SKILL.md](../SKILL.md) → 5 rules (Rule 3: never auto-generate)
9633
+ - [workflows/optimization.md](../workflows/optimization.md) → steps 7-9 (where this concept is applied)
9634
+ - [concepts/prompt-variables.md](./prompt-variables.md) → delimiter inference (used in MVC step)
9635
+ - \`mutagent/src/modules/prompts/prompt-evaluations/README.md\` → BE mirror
9636
+ `,
9637
+ "concepts/prompt-variables.md": `---
9638
+ name: mutagent-cli-concepts-prompt-variables
9639
+ description: |
9640
+ Prompt template variable delimiter inference contract.
9641
+ Platform canonical is single-brace {variable}. Third-party frameworks vary
9642
+ (Handlebars / Mustache / Liquid / Jinja2 use double {{variable}}).
9643
+ \`mutagent explore\` infers the delimiter per file and surfaces it in
9644
+ \`--json\` output as \`delimiter: "single" | "double"\`.
9645
+ Includes conversion rules for upload and apply phases.
9646
+ triggers:
9647
+ - "prompt variables"
9648
+ - "template variables"
9649
+ - "single vs double brace"
9650
+ - "{variable}"
9651
+ - "{{variable}}"
9652
+ - "delimiter"
9653
+ - "inferPromptVariables"
9654
+ - "brace convention"
9655
+ - "convert variables"
9656
+ ---
9657
+
9658
+ # Concept — Prompt Variables
9659
+
9660
+ ## Platform canonical
9661
+
9662
+ **MutagenT platform uses single-brace \`{variable}\`.** The platform renders
9663
+ prompts by substituting \`{name}\` with the provided value at optimization /
9664
+ evaluation time.
9665
+
9666
+ ## Third-party framework variance
9667
+
9668
+ Real-world codebases use different delimiters depending on which prompt
9669
+ framework the user already has installed:
9670
+
9671
+ | Framework | Delimiter | Example |
9672
+ |---|---|---|
9673
+ | **MutagenT platform** (canonical) | single | \`{document}\` |
9674
+ | **LangChain** \`PromptTemplate\` | single | \`{document}\` |
9675
+ | **LangChain** \`ChatPromptTemplate\` + Mustache | double | \`{{document}}\` |
9676
+ | **Handlebars** | double | \`{{document}}\` |
9677
+ | **Mustache** | double | \`{{document}}\` |
9678
+ | **LiquidJS** | double | \`{{ document }}\` |
9679
+ | **Jinja2** (Python) | double | \`{{ document }}\` |
9680
+
9681
+ ---
9682
+
9683
+ ## Brace conversion — upload and apply
9684
+
9685
+ Getting this wrong breaks templates after optimization. Follow the two-phase rule:
9686
+
9687
+ ### Phase 1 — Upload (code → MutagenT)
9688
+
9689
+ If the code has \`{{double}}\` braces:
9690
+ 1. Warn the user: "Your template uses \`{{double}}\` braces (Handlebars/LangChain Mustache). MutagenT uses \`{single}\` braces. I'll convert before uploading."
9691
+ 2. Convert \`{{name}}\` → \`{name}\` in the prompt content passed to \`mutagent prompts create\`.
9692
+ 3. Record the original delimiter in \`.mutagent/mutation-context.md\` so the apply phase knows to convert back.
9693
+
9694
+ If the code has \`{single}\` braces: no conversion needed — upload as-is.
9695
+
9696
+ ### Phase 2 — Apply (MutagenT → code)
9697
+
9698
+ After optimization, the platform returns a prompt with \`{single}\` braces.
9699
+
9700
+ If the original codebase used \`{{double}}\` braces:
9701
+ 1. Convert \`{name}\` → \`{{name}}\` in the optimized prompt before writing to the source file.
9702
+ 2. Confirm with the user before saving.
9703
+
9704
+ If the original codebase used \`{single}\` braces: write the optimized prompt as-is.
9705
+
9706
+ **Summary table:**
9707
+
9708
+ | Code uses | Upload | Optimized output | Write back to code |
9709
+ |---|---|---|---|
9710
+ | \`{single}\` | as-is | \`{single}\` | as-is |
9711
+ | \`{{double}}\` | convert to \`{single}\` | \`{single}\` | convert back to \`{{double}}\` |
9712
+
9713
+ ---
9714
+
9715
+ ## Per-file inference — \`mutagent explore\`
9716
+
9717
+ The CLI's \`mutagent explore\` command calls \`inferPromptVariables()\` on every
9718
+ matching source file and **infers the delimiter per file** rather than
9719
+ globally. A single repository may contain both LangChain \`PromptTemplate\`
9720
+ (single) and Handlebars email templates (double) side by side.
9721
+
9722
+ ### Inference algorithm
9723
+
9724
+ 1. **Strip fenced markdown code blocks** first (\` \`\`\` ... \`\`\` \`). Avoids false
9725
+ positives from prompts that document JSON examples in fenced blocks.
9726
+ 2. **Count \`{{name}}\` matches** → \`doubleHits\`.
9727
+ 3. **Count \`{name}\` matches** that are NOT adjacent to \`{\` (not part of \`{{...}}\`)
9728
+ and NOT followed by \`"\` (JSON-key skip) → \`singleHits\`.
9729
+ 4. **Majority wins**: \`doubleHits > singleHits\` → \`double\`, else \`single\`.
9730
+ 5. **Tie-break**: singleHits === doubleHits (including the 0/0 case) →
9731
+ \`single\` (platform canonical).
9732
+
9733
+ ### Escaped-JSON caveat
9734
+
9735
+ Prompts like \`"Return {\\"status\\": \\"ok\\"}"\` — the \`{\` is followed by \`"\`, so
9736
+ the single-brace regex deliberately skips it. Never treat literal JSON keys as
9737
+ template variables.
9738
+
9739
+ ---
9740
+
9741
+ ## How to use the delimiter field
9742
+
9743
+ \`mutagent explore --json\` surfaces the inferred delimiter per discovered prompt:
9744
+
9745
+ \`\`\`json
9746
+ {
9747
+ "prompts": [
9748
+ {
9749
+ "file": "src/prompts/summarize.ts",
9750
+ "line": 12,
9751
+ "preview": "const prompt = \`Summarize {document} for {audience}\`;",
9752
+ "reason": "template-variable",
9753
+ "confidence": "high",
9754
+ "delimiter": "single"
9755
+ },
9756
+ {
9757
+ "file": "src/emails/welcome.hbs",
9758
+ "line": 3,
9759
+ "preview": "<p>Hello {{name}}, welcome to {{product}}!</p>",
9760
+ "reason": "template-variable",
9761
+ "confidence": "high",
9762
+ "delimiter": "double"
9763
+ }
9764
+ ]
9765
+ }
9766
+ \`\`\`
9767
+
9768
+ Use the delimiter field to:
9769
+ - Enumerate variables correctly (don't treat \`{{foo}}\` as two \`{foo}\` tokens).
9770
+ - Decide whether to convert before upload (Phase 1 above).
9771
+ - Drive the [concepts/eval-criteria.md](./eval-criteria.md) → MVC step (one criterion per variable).
9772
+
9773
+ ---
9774
+
9775
+ ## Edge cases
9776
+
9777
+ - **Empty prompt** — no variables → tie (0/0) → \`single\` (canonical).
9778
+ - **Mixed delimiters in one file** — majority wins, tied files default to \`single\`.
9779
+ Warn the user: their codebase probably has two prompt systems co-existing.
9780
+ - **Nested braces** \`{{{ foo }}}\` — Handlebars triple-brace (no-escape). Currently
9781
+ matched by the double regex as \`{{ foo }}\`; outer \`}\` ignored. Fine for inference.
9782
+
9783
+ ---
9784
+
9785
+ ## Cross-references
9786
+
9787
+ - [SKILL.md](../SKILL.md) → 5 rules + journey router
9788
+ - [workflows/optimization.md](../workflows/optimization.md) → step 3 (delimiter drives variable enumeration) and step 15 (apply conversion)
9789
+ - [concepts/eval-criteria.md](./eval-criteria.md) → MVC (Minimum Viable Context) — uses delimiter to enumerate input params
9790
+ - Source: \`mutagent-cli/src/lib/explorer.ts\` → \`inferPromptVariables()\` and \`DiscoveredPrompt.delimiter\`
9791
+ - Tests: \`mutagent-cli/src/__tests__/lib/explorer.test.ts\`
9792
+ `,
9793
+ "workflows/agents.md": `---
9794
+ name: mutagent-cli-workflows-agents
9795
+ description: |
9796
+ Agent path (multi-turn, tool-calling) — WORK IN PROGRESS.
9797
+ Agent optimization & evaluation are actively in development.
9798
+ This workflow short-circuits to a partnership link, offers read-only CRUD,
9799
+ and optionally extracts a sub-prompt back to the Optimization path.
9800
+ triggers:
9801
+ - "agent"
9802
+ - "langchain agent"
9803
+ - "langgraph"
9804
+ - "crewai"
9805
+ - "autogen"
9806
+ - "openai agents"
9807
+ - "tool calling"
9808
+ - "multi-turn"
9809
+ - "AgentExecutor"
9810
+ - "createReactAgent"
9811
+ - "StateGraph"
9812
+ ---
9813
+
9814
+ # Workflow — Agents (WIP)
9815
+
9816
+ > **This path is WORK IN PROGRESS.** Agent optimization and evaluation are
9817
+ > actively in development. Do NOT run a multi-turn agent through the prompt
9818
+ > optimizer — the platform will reject it, and scores would be meaningless for
9819
+ > a tool-calling loop.
9820
+
9821
+ Read the **5 rules** in [SKILL.md](../SKILL.md) before executing.
9822
+
9823
+ ---
9824
+
9825
+ ## When this workflow applies
9826
+
9827
+ Load this file when the journey router in [SKILL.md](../SKILL.md) matched one of these
9828
+ signals in the user's code:
9829
+
9830
+ - \`AgentExecutor\`, \`createReactAgent\`, \`createToolCallingAgent\`, \`createStructuredChatAgent\`
9831
+ - \`from "openai/agents"\` or \`from "@openai/agents"\`
9832
+ - \`from "crewai"\`, \`from "autogen"\`, \`from "autogen_agentchat"\`
9833
+ - \`from "@langchain/langgraph"\`, \`StateGraph(\`
9834
+ - \`tool_calls\` / \`toolCalls\` property access
9835
+ - \`@tool\` decorator (Python)
9836
+ - \`tools: [...]\` array in an LLM call config
9837
+ - \`function_call\` / \`tool_choice\` fields
9838
+ - \`while\` loop + LLM call
9839
+
9840
+ \`mutagent explore --json\` flags these under \`agents[]\` (not \`prompts[]\`).
9841
+
9842
+ ---
9843
+
9844
+ ## Required card (show this verbatim to the user)
9845
+
9846
+ When the agent path is triggered, copy this card into your chat response
9847
+ verbatim — do NOT paraphrase, do NOT collapse into a bash block:
9848
+
9849
+ \`\`\`
9850
+ I see you have an Agent (multi-turn / tool-calling). Agent Optimization &
9851
+ Evaluations are actively in development in MutagenT. For early access and
9852
+ to partner with us on the roadmap:
9853
+ → https://www.mutagent.io/agents-partnership
9854
+ \`\`\`
9855
+
9856
+ ---
9857
+
9858
+ ## Sequence
9859
+
9860
+ \`\`\`
9861
+ 1. Run \`mutagent explore --json\` if you haven't already.
9862
+ → Confirm agents[] is non-empty.
9863
+ → Show command output to user.
9864
+
9865
+ 2. Show the WIP card above verbatim in your chat response.
9866
+
9867
+ 3. Use AskUserQuestion to explain that agent code cannot be directly optimized:
9868
+ "Your code looks like a multi-turn agent. I can't run it
9869
+ through the prompt optimizer yet. Would you like to:
9870
+ (a) Join early access → https://www.mutagent.io/agents-partnership
9871
+ (b) Inspect existing agents in MutagenT (read-only CRUD)
9872
+ (c) Extract a single sub-prompt inside the agent loop and optimize it"
9873
+
9874
+ 4. Branch:
9875
+ (a) → surface the URL verbatim. STOP.
9876
+ (b) → \`mutagent agents list --json\` ; \`mutagent agents get <id> --json\`
9877
+ → show results to user. STOP (no mutations available).
9878
+ (c) → extract one sub-prompt, then route to [workflows/optimization.md](./optimization.md)
9879
+ treating the sub-prompt as a standalone Prompt.
9880
+ \`\`\`
9881
+
9882
+ ---
9883
+
9884
+ ## Branch (c) — extracting a sub-prompt
9885
+
9886
+ Multi-turn agents often contain inner prompts that ARE suitable for the
9887
+ Optimization path:
9888
+
9889
+ - a planner prompt ("given this user goal, list the tools you'd call")
9890
+ - a summarizer prompt ("given these tool results, write the final answer")
9891
+ - a classifier prompt ("which tool should handle this input?")
9892
+
9893
+ Each of these is a single-shot prompt with a clear output schema. Extract ONE,
9894
+ treat it as a standalone Prompt.
9895
+
9896
+ When extracting:
9897
+ 1. Identify the exact string literal or template that becomes the sub-prompt.
9898
+ 2. Enumerate its \`{variables}\` per [concepts/prompt-variables.md](../concepts/prompt-variables.md).
9899
+ 3. Confirm with user: "I'll optimize the planner prompt only, not the full agent. Sound right?"
9900
+ 4. On confirmation → load [workflows/optimization.md](./optimization.md) from step 3 (prompts create).
9901
+
9902
+ Do NOT try to extract the whole agent loop at once.
9903
+
9904
+ ---
9905
+
9906
+ ## What NOT to do
9907
+
9908
+ - **Do not** call \`mutagent prompts optimize start\` on an agent file.
9909
+ - **Do not** upload an agent's full system prompt + tool definitions as a single "prompt".
9910
+ - **Do not** suggest "try it anyway" — the WIP status is deliberate.
9911
+ - **Do not** skip showing the WIP card to the user — they need the partnership URL.
9912
+
9913
+ ---
9914
+
9915
+ ## Cross-references
9916
+
9917
+ - [SKILL.md](../SKILL.md) → 5 rules + journey router
9918
+ - [workflows/exploration.md](./exploration.md) → where \`agents[]\` entries are first detected
9919
+ - [workflows/optimization.md](./optimization.md) → branch (c) destination
9920
+ - [concepts/prompt-variables.md](../concepts/prompt-variables.md) → \`{foo}\` vs \`{{foo}}\` for sub-prompt extraction
9921
+ - Partnership link: https://www.mutagent.io/agents-partnership
9922
+ `,
9923
+ "workflows/exploration.md": `---
9924
+ name: mutagent-cli-workflows-exploration
9925
+ description: |
9926
+ Read-only codebase scan workflow. Discovers prompts and agents, classifies
9927
+ each as optimization-eligible (prompt) or WIP (agent), and presents a
9928
+ structured taxonomy to the user before any write operations.
9929
+ triggers:
9930
+ - "explore"
9931
+ - "scan codebase"
9932
+ - "find prompts"
9933
+ - "what prompts do I have"
9934
+ - "discover prompts"
9935
+ - "what's in my codebase"
9936
+ ---
9937
+
9938
+ # Workflow — Exploration (Read-Only Discovery)
9939
+
9940
+ > **This workflow is read-only.** No writes, no uploads, no mutations.
9941
+ > Use it to understand what's in the codebase before deciding next steps.
9942
+
9943
+ Read the **5 rules** in [SKILL.md](../SKILL.md) before executing. Key reminders:
9944
+ - \`--json\` on every command
9945
+ - \`<command> --help\` before first use
9946
+ - This workflow ends with a user question — never auto-proceed to writes
9947
+
9948
+ ---
9949
+
9950
+ ## When this workflow applies
9951
+
9952
+ - User said "explore my codebase", "what prompts do I have", "find prompts", "scan"
9953
+ - Intent is unclear and you need to discover before acting
9954
+ - User wants to understand what's optimizable before committing to a path
9955
+
9956
+ ---
9957
+
9958
+ ## Prompt vs Agent — taxonomy
9959
+
9960
+ Before running explore, understand what the CLI will return:
9961
+
9962
+ **PROMPT (optimization-eligible)**
9963
+ - Single LLM call: input → LLM → output
9964
+ - Template with \`{variables}\` (single) or \`{{variables}}\` (double brace)
9965
+ - Can be evaluated with G-Eval (input/output pair)
9966
+ - Can be optimized by Metatuner
9967
+ - Appears in \`prompts[]\` in explore output
9968
+
9969
+ **AGENT (NOT directly optimizable)**
9970
+ - Multi-turn loop: input → LLM → tool → LLM → ... → output
9971
+ - Dynamic branching execution
9972
+ - Cannot be optimized like a prompt (no fixed output schema)
9973
+ - Requires agent-level evaluation (not yet available)
9974
+ - Appears in \`agents[]\` in explore output
9975
+
9976
+ ### Agent detection heuristics (what \`mutagent explore\` flags)
9977
+
9978
+ | Pattern | Framework | Classification |
9979
+ |---|---|---|
9980
+ | \`new StateGraph(\` / \`StateGraph.compile()\` | LangGraph | agent |
9981
+ | \`new Agent(\` / \`agent.execute()\` | Mastra / custom | agent |
9982
+ | \`AgentExecutor\` / \`createReactAgent\` / \`createToolCallingAgent\` | LangChain | agent |
9983
+ | \`from "openai/agents"\` / \`from "@openai/agents"\` | OpenAI Agents SDK | agent |
9984
+ | \`from "crewai"\` / \`from "autogen"\` | CrewAI / AutoGen | agent |
9985
+ | \`tools: [...]\` + LLM call in a loop | custom | agent |
9986
+ | \`while\` loop + LLM call | custom agent loop | agent |
9987
+ | \`@tool\` decorator (Python) | any | agent |
9988
+ | \`tool_calls\` / \`toolCalls\` property access | any | agent |
9989
+ | Single \`openai.chat()\` or template string | — | prompt |
9990
+ | String with \`{variable}\` / \`{{variable}}\` | — | prompt |
9991
+
9992
+ ---
9993
+
9994
+ ## Workflow steps
9995
+
9996
+ \`\`\`
9997
+ 1. mutagent explore --json
9998
+ → surfaces: prompts[], agents[], datasets[], markers[]
9999
+ → show command output to user
10000
+
10001
+ 2. Classify results:
10002
+ - prompts[] → optimization-eligible (single-shot, output schema)
10003
+ - agents[] → WIP (multi-turn/tool-calling) — route to [workflows/agents.md](./agents.md)
10004
+ - datasets[] → existing local data (uploadable in optimization workflow)
10005
+ - markers[] → already-uploaded items (show dashboard links)
10006
+
10007
+ 3. Note the \`delimiter\` field on each prompt entry:
10008
+ - "single" → {variable} — MutagenT native, no conversion needed
10009
+ - "double" → {{variable}} — framework template, conversion required on upload
10010
+ See [concepts/prompt-variables.md](../concepts/prompt-variables.md) for the conversion rules.
10011
+
10012
+ 4. Use AskUserQuestion to present findings and ask which prompts to upload:
10013
+ "Here's what I found in your codebase:
10014
+ - N prompt(s) found: [list files]
10015
+ - N agent(s) found: [list files] (WIP — not optimizable yet)
10016
+ - N dataset(s) found: [list]
10017
+ - N already-uploaded: [list]
10018
+ What would you like to do?"
10019
+
10020
+ 5. Route based on user answer:
10021
+ - "optimize this prompt" → load [workflows/optimization.md](./optimization.md)
10022
+ - "add tracing" → load [workflows/tracing.md](./tracing.md)
10023
+ - "tell me about the agent" → load [workflows/agents.md](./agents.md)
10024
+ - "nothing yet" → STOP (read-only complete)
10025
+ \`\`\`
10026
+
10027
+ ---
10028
+
10029
+ ## Output handling
10030
+
10031
+ After step 1, show the command output to the user before proceeding to classification. Do NOT proceed to step 2 until the user has seen the results.
10032
+
10033
+ ---
10034
+
10035
+ ## Brace convention note
10036
+
10037
+ \`mutagent explore --json\` surfaces \`delimiter: "single" | "double"\` per discovered prompt. Use this before deciding how to enumerate variables. See [concepts/prompt-variables.md](../concepts/prompt-variables.md) for the full inference contract and conversion rules.
10038
+
10039
+ ---
10040
+
10041
+ ## Common pitfalls
10042
+
10043
+ - Skipping the classification step → user gets a raw JSON dump instead of a next-action recommendation
10044
+ - Treating \`agents[]\` entries as optimization-eligible → they are NOT; route to [workflows/agents.md](./agents.md)
10045
+ - Auto-proceeding to writes after explore → always confirm with user first
10046
+ - Ignoring the \`delimiter\` field → wrong variable enumeration when uploading a double-brace prompt
10047
+
10048
+ ---
10049
+
10050
+ ## Cross-references
10051
+
10052
+ - [SKILL.md](../SKILL.md) → 5 rules + journey router
10053
+ - [concepts/prompt-variables.md](../concepts/prompt-variables.md) → \`{foo}\` vs \`{{foo}}\` inference + conversion
10054
+ - [workflows/optimization.md](./optimization.md) → next step after exploration (prompt path)
10055
+ - [workflows/tracing.md](./tracing.md) → next step after exploration (integration path)
10056
+ - [workflows/agents.md](./agents.md) → next step after exploration (agent path)
10057
+ `,
10058
+ "workflows/optimization.md": `---
10059
+ name: mutagent-cli-workflows-optimization
10060
+ description: |
10061
+ Full prompt optimization journey: explore → prompts create → dataset add →
10062
+ evaluation create (guided) → optimize start → watch/status → results → apply.
10063
+ Enforces 5 rules: --json always, --help before first use, user-collected
10064
+ eval criteria, explore-before-modify, cost transparency before optimize.
10065
+ triggers:
10066
+ - "optimize prompt"
10067
+ - "improve prompt"
10068
+ - "tune prompt"
10069
+ - "evaluate prompt"
10070
+ - "upload prompt"
10071
+ - "create evaluation"
10072
+ - "upload dataset"
10073
+ - "run optimizer"
10074
+ - "start optimization"
10075
+ ---
10076
+
10077
+ # Workflow — Optimization (Full Journey)
10078
+
10079
+ > **This is the full loop.** Expect 5-10 CLI calls and at least one long-running
10080
+ > optimizer job. Each step requires user confirmation. Never auto-run the full
10081
+ > chain without presenting findings at each gate.
10082
+
10083
+ Read the **5 rules** in [SKILL.md](../SKILL.md) before executing. All 5 rules apply here:
10084
+ - \`--json\` on every command (Rule 1)
10085
+ - \`--help\` before first use of any command (Rule 2)
10086
+ - **NEVER auto-generate eval criteria** — collect from user (Rule 3)
10087
+ - \`mutagent explore --json\` before any write (Rule 4)
10088
+ - \`mutagent usage --json\` before \`optimize start\` (Rule 5)
10089
+
10090
+ ---
10091
+
10092
+ ## When this workflow applies
10093
+
10094
+ - User said "optimize prompt", "improve prompt", "tune prompt"
10095
+ - User wants to upload a prompt and measure its quality
10096
+ - User wants to run the Metatuner optimizer
10097
+
10098
+ ---
10099
+
10100
+ ## Required pre-reads (load these before the relevant steps)
10101
+
10102
+ | Step | Pre-read | Why |
10103
+ |---|---|---|
10104
+ | Before \`prompts create\` | [concepts/prompt-variables.md](../concepts/prompt-variables.md) | Brace convention — single \`{var}\` vs double \`{{var}}\` affects how variables are parsed |
10105
+ | Before \`evaluation create --guided\` | [concepts/eval-criteria.md](../concepts/eval-criteria.md) | INPUT MVC + OUTPUT Standards — 3-tier rubric format |
10106
+
10107
+ ---
10108
+
10109
+ ## Directive chain
10110
+
10111
+ \`\`\`
10112
+ explore → prompts create → dataset add → evaluation create --guided
10113
+ → [Use AskUserQuestion to collect rubrics from the user for each field]
10114
+ → evaluation create -d '<json>'
10115
+ → usage check
10116
+ → [Use AskUserQuestion to confirm optimization cost with user]
10117
+ → optimize start
10118
+ → optimize status (poll)
10119
+ → optimize results
10120
+ → [Use AskUserQuestion to present scorecard: Apply / View Diff / Reject]
10121
+ → On Apply: Edit source file
10122
+ \`\`\`
10123
+
10124
+ ---
10125
+
10126
+ ## Full workflow steps
10127
+
10128
+ \`\`\`
10129
+ 1. mutagent explore --json
10130
+ → find candidate prompts in codebase
10131
+ → show command output to user
10132
+ → confirm with user: "Which prompt would you like to optimize?"
10133
+
10134
+ 2. mutagent prompts --help
10135
+ mutagent prompts create --help
10136
+ → read flags before using (Rule 2)
10137
+
10138
+ 3. Load [concepts/prompt-variables.md](../concepts/prompt-variables.md)
10139
+ → determine if prompt uses {single} or {{double}} braces
10140
+ → if double-brace: warn user about conversion requirement
10141
+
10142
+ 4. mutagent prompts create --name <name> [--system-file / --raw-file] --json
10143
+ → show command output to user
10144
+ → record promptId from response
10145
+
10146
+ 5. mutagent prompts dataset add --help
10147
+ → read flags before using
10148
+
10149
+ 6. mutagent prompts dataset add <promptId> -d '[...]' --name "<name>" --json
10150
+ → upload dataset rows (input/output pairs)
10151
+ → show command output to user
10152
+ → record datasetId
10153
+
10154
+ 7. Load [concepts/eval-criteria.md](../concepts/eval-criteria.md)
10155
+ → understand INPUT-param (MVC) vs OUTPUT-param (Standards) scope
10156
+
10157
+ 8. mutagent prompts evaluation create <promptId> --guided --json
10158
+ → the CLI provides a list of fields, each needing a rubric
10159
+ → follow the CLI's next-step guidance in the output
10160
+ → for EVERY field listed (INPUT scope first, then OUTPUT):
10161
+ - ask the user the provided question for that field
10162
+ - wait for user response
10163
+ - do NOT skip any field
10164
+ - do NOT auto-generate any answer
10165
+ → collect at minimum: one INPUT criterion per {variable}, one OUTPUT criterion
10166
+
10167
+ 9. mutagent prompts evaluation create <promptId> -d '<json>' --json
10168
+ → upload the criteria collected in step 8
10169
+ → show command output to user
10170
+ → record evaluationId
10171
+
10172
+ 10. mutagent usage --json
10173
+ → show usage/quota to user
10174
+ → confirm with user: "This optimization will use N iterations (~X min each).
10175
+ You have Y remaining. Proceed?"
10176
+ → STOP if user declines
10177
+
10178
+ 11. mutagent prompts optimize start <promptId> \\
10179
+ --dataset <datasetId> \\
10180
+ --evaluation <evaluationId> \\
10181
+ --max-iterations 1 \\
10182
+ --json
10183
+ → NEVER set --max-iterations > 1 without explicit user consent
10184
+ → record jobId from response
10185
+
10186
+ 12. mutagent prompts optimize status <jobId> --json
10187
+ → poll until status = "completed" or "failed"
10188
+ → show progress to user
10189
+
10190
+ 13. mutagent prompts optimize results <jobId> --json
10191
+ → ALWAYS show before/after scorecard to user
10192
+
10193
+ 14. Confirm with user: "Here's the before/after scorecard. What would you like to do?
10194
+ (a) Apply — update the prompt in your source file
10195
+ (b) View diff first
10196
+ (c) Reject — keep the original"
10197
+
10198
+ 15. On Apply (a): Edit the prompt in the user's source file
10199
+ → replace old prompt text with optimized version
10200
+ → if double-brace codebase: convert {variable} back to {{variable}}
10201
+ → confirm with user before saving
10202
+ \`\`\`
10203
+
10204
+ ---
10205
+
10206
+ ## Cost control
10207
+
10208
+ - Default \`--max-iterations 1\` is the only value you may use without explicit consent.
10209
+ - If user requests more: confirm the number with user → confirm the cost implication.
10210
+ - Each iteration = one full G-Eval run over the dataset × LLM calls. This costs real money.
10211
+
10212
+ ---
10213
+
10214
+ ## Apply / Reject rules
10215
+
10216
+ - **Apply**: edit the source file with the optimized prompt. If the codebase used \`{{double}}\` braces, convert the optimized \`{single}\` brace output back to \`{{double}}\` before writing. See [concepts/prompt-variables.md](../concepts/prompt-variables.md) → Conversion.
10217
+ - **Reject**: no file changes. Record the jobId in \`.mutagent/mutation-context.md\` for future reference.
10218
+ - **View diff**: show a unified diff of old vs new prompt text before asking again.
10219
+
10220
+ ---
10221
+
10222
+ ## Common pitfalls
10223
+
10224
+ - Running \`optimize start\` before \`evaluation create\` → optimizer has no scoring signal
10225
+ - Mixing INPUT and OUTPUT criteria in the same rubric → vague scores
10226
+ - Applying results without showing the before/after scorecard first
10227
+ - Forgetting to convert \`{single}\` back to \`{{double}}\` after apply in double-brace codebases
10228
+ - Starting with \`--max-iterations 3\` without consent
10229
+
10230
+ ---
10231
+
10232
+ ## Guided Dataset Creation
10233
+
10234
+ When no local dataset exists, use the guided mode to curate high-quality test data:
10235
+
10236
+ \`\`\`
10237
+ mutagent prompts dataset add <prompt-id> --guided --json
10238
+ \`\`\`
10239
+
10240
+ The CLI analyzes the prompt's inputSchema + outputSchema and returns:
10241
+ - **Suggested categories**: edge cases, hard cases, representative cases
10242
+ - **Per-field questions**: what values, what edge cases, what correct output looks like
10243
+ - **Template item**: showing the expected shape for each dataset entry
10244
+ - **Priority rule**: hard cases that expose prompt weaknesses > easy cases that always pass
10245
+
10246
+ Collect answers from the user, then construct 5-10 dataset items covering all categories.
10247
+ Ensure at least 2 hard/edge cases per category. Then upload:
10248
+
10249
+ \`\`\`
10250
+ mutagent prompts dataset add <prompt-id> -d '<constructed-json>' --name '<name>' --json
10251
+ \`\`\`
10252
+
10253
+ ---
10254
+
10255
+ ## Cross-references
10256
+
10257
+ - [SKILL.md](../SKILL.md) → 5 rules + journey router
10258
+ - [concepts/prompt-variables.md](../concepts/prompt-variables.md) → brace convention + conversion (critical for steps 3 and 15)
10259
+ - [concepts/eval-criteria.md](../concepts/eval-criteria.md) → INPUT MVC + OUTPUT Standards + 3-tier rubric (critical for steps 7-8)
10260
+ - [workflows/exploration.md](./exploration.md) → step 1 of this workflow
10261
+ - [workflows/tracing.md](./tracing.md) → parallel or follow-up path
10262
+ `,
10263
+ "workflows/tracing.md": `---
10264
+ name: mutagent-cli-workflows-tracing
10265
+ description: |
10266
+ Framework integration workflow. Adds MutagenT tracing/observability to the
10267
+ user's codebase. Non-destructive (append-only), fastest first-value path.
10268
+ Detects framework via \`mutagent explore\`, generates snippet via
10269
+ \`mutagent integrate\`, applies via Edit tool, verifies via traces list.
10270
+ triggers:
10271
+ - "add tracing"
10272
+ - "add observability"
10273
+ - "integrate framework"
10274
+ - "integrate mutagent"
10275
+ - "add mutagent to my code"
10276
+ - "instrument my prompts"
10277
+ - "trace my prompts"
10278
+ ---
10279
+
10280
+ # Workflow — Tracing (Framework Integration)
10281
+
10282
+ > **Scope**: read + append-only on user code. Never modify existing business
10283
+ > logic — only add tracing imports and decorators.
10284
+
10285
+ Read the **5 rules** in [SKILL.md](../SKILL.md) before executing. Key reminders:
10286
+ - \`--json\` on every command
10287
+ - \`<command> --help\` before first use
10288
+ - Explore before modify (Rule 4)
10289
+ - Show command output to user after every mutation
10290
+
10291
+ ---
10292
+
10293
+ ## When this workflow applies
10294
+
10295
+ - User said "add tracing", "add observability", "integrate \\<framework\\>"
10296
+ - User wants to see their prompts captured in the MutagenT dashboard
10297
+ - Fastest path to first value — prefer this before suggesting optimization
10298
+
10299
+ ---
10300
+
10301
+ ## Supported frameworks
10302
+
10303
+ \`mutagent integrate --help\` is the authoritative list. Common entries:
10304
+
10305
+ | Framework | Signal in codebase |
10306
+ |---|---|
10307
+ | LangChain | \`from "langchain"\`, \`PromptTemplate\`, \`ChatPromptTemplate\` |
10308
+ | LangGraph | \`from "@langchain/langgraph"\`, \`StateGraph\` |
10309
+ | OpenAI SDK | \`import OpenAI\`, \`openai.chat.completions.create\` |
10310
+ | Vercel AI SDK | \`import { generateText }\` from \`"ai"\` |
10311
+ | Mastra | \`from "@mastra/core"\` |
10312
+ | Custom / raw | any string template with \`{variable}\` |
10313
+
10314
+ ---
10315
+
10316
+ ## Workflow steps
10317
+
10318
+ \`\`\`
10319
+ 1. mutagent explore --json
10320
+ → detect which framework is in use
10321
+ → show command output to user
10322
+
10323
+ 2. mutagent integrate --help
10324
+ → read the available frameworks and flags (ALWAYS before step 3)
10325
+
10326
+ 3. mutagent integrate <framework> --json
10327
+ → get the integration snippet
10328
+
10329
+ 4. Use AskUserQuestion to confirm before applying code changes: "I'll add the tracing snippet to <file>. Proceed?"
10330
+ → show the snippet preview before applying
10331
+
10332
+ 5. Apply snippet via Edit tool
10333
+ → code change happens here — append imports + decorators only
10334
+ → never touch existing business logic
10335
+
10336
+ 6. mutagent traces list --json
10337
+ → verify integration: check traces count > 0
10338
+ → show results and dashboard link to user
10339
+ \`\`\`
10340
+
10341
+ ---
10342
+
10343
+ ## Scope guard
10344
+
10345
+ This path is **READ + APPEND-ONLY** on the user's code:
10346
+ - ✓ Add import at top of file
10347
+ - ✓ Wrap existing function call with tracing decorator
10348
+ - ✗ Rename variables
10349
+ - ✗ Refactor logic
10350
+ - ✗ Remove existing code
10351
+
10352
+ If the integration snippet requires a significant rewrite, confirm scope with the user before proceeding.
10353
+
10354
+ ---
10355
+
10356
+ ## Post-integration state
10357
+
10358
+ After step 6:
10359
+ - Update \`.mutagent/mutation-context.md\` with the integration marker
10360
+ - Show the dashboard link from traces output so user can verify traces in UI
10361
+ - If user wants to optimize the traced prompt → route to [workflows/optimization.md](./optimization.md)
10362
+
10363
+ ---
10364
+
10365
+ ## Common pitfalls
10366
+
10367
+ - Skipping step 1 and guessing the framework → let \`explore\` detect it
10368
+ - Forgetting step 6 → user has no proof the integration works
10369
+ - Editing files outside the \`integrate --json\` snippet block
10370
+ - Not showing command results to the user after mutations
10371
+
10372
+ ---
10373
+
10374
+ ## Cross-references
10375
+
10376
+ - [SKILL.md](../SKILL.md) → 5 rules + journey router
10377
+ - [workflows/exploration.md](./exploration.md) → step 1 of this workflow
10378
+ - [workflows/optimization.md](./optimization.md) → natural next step after tracing
10379
+ - [concepts/prompt-variables.md](../concepts/prompt-variables.md) → variable inference for traced prompts
10380
+ `
10381
+ };
10382
+ var SKILL_CONTENT = SKILL_FILES["SKILL.md"] ?? "";
10383
+
10384
+ // src/commands/skills.ts
10385
+ function findRepoRoot() {
10386
+ try {
10387
+ return execSync4("git rev-parse --show-toplevel", {
10388
+ encoding: "utf-8",
10389
+ stdio: ["pipe", "pipe", "pipe"]
10390
+ }).trim();
10391
+ } catch {
10392
+ return process.cwd();
10393
+ }
10394
+ }
10395
+ var SKILL_DIR = ".claude/skills/mutagent-cli";
10396
+ var SKILL_ENTRY_FILE = "SKILL.md";
10397
+ function assertSkillFilesValid() {
10398
+ if (Object.keys(SKILL_FILES).length === 0) {
10399
+ throw new Error("SKILL_FILES is missing or empty. This indicates a broken build: " + "scripts/sync-skill.ts must regenerate src/generated/skill-content.ts " + "from .claude/skills/mutagent-cli/ before build/test. " + "Run `bun run sync-skill` to debug.");
10400
+ }
10401
+ const entry = SKILL_FILES[SKILL_ENTRY_FILE];
10402
+ if (typeof entry !== "string" || entry.trim().length === 0) {
10403
+ throw new Error(`SKILL_FILES["${SKILL_ENTRY_FILE}"] is missing or empty. ` + "Check .claude/skills/mutagent-cli/SKILL.md and run `bun run sync-skill`.");
10404
+ }
10405
+ if (!entry.startsWith("---")) {
10406
+ throw new Error(`SKILL_FILES["${SKILL_ENTRY_FILE}"] is malformed: expected YAML frontmatter ("---") at start.`);
10407
+ }
10408
+ }
10409
+ function getSkillFiles() {
10410
+ assertSkillFilesValid();
10411
+ return SKILL_FILES;
10412
+ }
10413
+ function createSkillsCommand() {
10414
+ const skills = new Command17("skills").description("Manage MutagenT CLI skills for coding agents");
10415
+ skills.command("install").description("Install MutagenT CLI skill for Claude Code").addHelpText("after", `
10416
+ Examples:
10417
+ ${chalk36.dim("$")} mutagent skills install
10418
+
10419
+ This creates a Claude Code skill at .claude/skills/mutagent-cli/SKILL.md
10420
+ that teaches coding agents how to use the MutagenT CLI effectively.
10421
+ `).action((_options, cmd) => {
10422
+ const parentCmd = cmd.parent?.parent;
10423
+ const isJson = parentCmd ? getJsonFlag(parentCmd) : false;
10424
+ const output = new OutputFormatter(isJson ? "json" : "table");
10425
+ const repoRoot = findRepoRoot();
10426
+ const skillDir = join7(repoRoot, SKILL_DIR);
10427
+ const files = getSkillFiles();
10428
+ const writtenFiles = [];
10429
+ let totalBytes = 0;
10430
+ const sortedKeys = Object.keys(files).sort((a, b) => {
10431
+ if (a === SKILL_ENTRY_FILE)
10432
+ return -1;
10433
+ if (b === SKILL_ENTRY_FILE)
10434
+ return 1;
10435
+ return a.localeCompare(b);
10436
+ });
10437
+ for (const relPath of sortedKeys) {
10438
+ const destPath = join7(skillDir, relPath);
10439
+ const parentDir = dirname(destPath);
10440
+ if (!existsSync12(parentDir)) {
10441
+ mkdirSync4(parentDir, { recursive: true });
10442
+ }
10443
+ const raw = files[relPath] ?? "";
10444
+ const finalContent = raw.endsWith(`
10445
+ `) ? raw : `${raw}
10446
+ `;
10447
+ writeFileSync5(destPath, finalContent, "utf-8");
10448
+ writtenFiles.push({ path: destPath, bytes: finalContent.length });
10449
+ totalBytes += finalContent.length;
10450
+ }
10451
+ if (isJson) {
10452
+ output.output({
10453
+ installed: true,
10454
+ name: "mutagent-cli",
10455
+ skillDir,
10456
+ files: writtenFiles,
10457
+ totalBytes
10458
+ });
10459
+ } else {
10460
+ output.success(`Installed MutagenT CLI skill`);
10461
+ console.log(` ${chalk36.dim("Dir:")} ${skillDir}`);
10462
+ console.log("");
10463
+ for (let i = 0;i < writtenFiles.length; i++) {
10464
+ const entry = writtenFiles[i];
10465
+ if (!entry)
10466
+ continue;
10467
+ const relPath = sortedKeys[i] ?? "";
10468
+ const isLast = i === writtenFiles.length - 1;
10469
+ const prefix = isLast ? "└─" : "├─";
10470
+ console.log(` ${chalk36.dim(prefix)} ${relPath} ${chalk36.dim(`(${String(entry.bytes)} bytes)`)}`);
10471
+ }
10472
+ console.log("");
10473
+ console.log(` ${chalk36.dim(`${String(sortedKeys.length)} file(s), ${String(totalBytes)} bytes total.`)}`);
10474
+ console.log(` ${chalk36.dim("This skill teaches coding agents how to use the MutagenT CLI.")}`);
10475
+ console.log(` ${chalk36.dim("It will be automatically loaded by Claude Code when relevant triggers match.")}`);
10476
+ }
10477
+ });
10478
+ return skills;
10479
+ }
10480
+
10481
+ // src/commands/usage.ts
10482
+ init_config();
10483
+ import { Command as Command18 } from "commander";
10484
+ import chalk37 from "chalk";
10485
+ init_errors();
10486
+ init_sdk_client();
10487
+ var PROVIDERS_URL = "https://app.mutagent.io/settings/providers";
10488
+ function createUsageCommand() {
10489
+ const usage = new Command18("usage").description("Show resource counts (prompts, datasets, evaluations, optimizations, experiments)").addHelpText("after", `
10490
+ Examples:
10491
+ ${chalk37.dim("$")} mutagent usage
10492
+ ${chalk37.dim("$")} mutagent usage --json
10493
+ `);
10494
+ usage.action(async () => {
10495
+ const isJson = getJsonFlag(usage);
9095
10496
  const output = new OutputFormatter(isJson ? "json" : "table");
9096
10497
  try {
9097
10498
  const apiKey = getApiKey();
@@ -9143,17 +10544,17 @@ Examples:
9143
10544
  });
9144
10545
  } else {
9145
10546
  console.log("");
9146
- console.log(chalk36.bold("\uD83D\uDCCA MutagenT Usage"));
9147
- console.log(chalk36.dim("─".repeat(45)));
10547
+ console.log(chalk37.bold("\uD83D\uDCCA MutagenT Usage"));
10548
+ console.log(chalk37.dim("─".repeat(45)));
9148
10549
  console.log("");
9149
- console.log(chalk36.bold("Resources:"));
9150
- console.log(` Prompts: ${chalk36.cyan(String(promptCount))}`);
9151
- console.log(` Datasets: ${chalk36.cyan(String(datasetCount))}`);
9152
- console.log(` Evaluations: ${chalk36.cyan(String(evaluationCount))}`);
9153
- console.log(` Optimizations: ${chalk36.cyan(String(optimizationCount))}`);
9154
- console.log(` Experiments: ${chalk36.cyan(String(experimentCount))}`);
10550
+ console.log(chalk37.bold("Resources:"));
10551
+ console.log(` Prompts: ${chalk37.cyan(String(promptCount))}`);
10552
+ console.log(` Datasets: ${chalk37.cyan(String(datasetCount))}`);
10553
+ console.log(` Evaluations: ${chalk37.cyan(String(evaluationCount))}`);
10554
+ console.log(` Optimizations: ${chalk37.cyan(String(optimizationCount))}`);
10555
+ console.log(` Experiments: ${chalk37.cyan(String(experimentCount))}`);
9155
10556
  console.log("");
9156
- console.log(` Providers: ${chalk36.underline(PROVIDERS_URL)}`);
10557
+ console.log(` Providers: ${chalk37.underline(PROVIDERS_URL)}`);
9157
10558
  console.log("");
9158
10559
  }
9159
10560
  } catch (error) {
@@ -9441,11 +10842,11 @@ Claude Code Session Telemetry:
9441
10842
 
9442
10843
  // src/commands/feedback.ts
9443
10844
  import { Command as Command20 } from "commander";
9444
- import chalk37 from "chalk";
10845
+ import chalk38 from "chalk";
9445
10846
  init_errors();
9446
10847
  init_config();
9447
10848
  import { readFileSync as readFileSync11 } from "fs";
9448
- import { join as join9, dirname } from "path";
10849
+ import { join as join9, dirname as dirname2 } from "path";
9449
10850
  import { fileURLToPath } from "url";
9450
10851
  var VALID_CATEGORIES = ["bug", "feature", "improvement", "praise"];
9451
10852
  function getCliVersion() {
@@ -9453,7 +10854,7 @@ function getCliVersion() {
9453
10854
  return process.env.CLI_VERSION;
9454
10855
  }
9455
10856
  try {
9456
- const __dirname2 = dirname(fileURLToPath(import.meta.url));
10857
+ const __dirname2 = dirname2(fileURLToPath(import.meta.url));
9457
10858
  const pkgPath = join9(__dirname2, "..", "..", "package.json");
9458
10859
  const pkg = JSON.parse(readFileSync11(pkgPath, "utf-8"));
9459
10860
  return pkg.version ?? "0.1.1";
@@ -9495,12 +10896,12 @@ async function postToServer(payload, endpoint, apiKey, workspaceId, organization
9495
10896
  }
9496
10897
  function createFeedbackCommand() {
9497
10898
  const feedback = new Command20("feedback").description("Send product feedback to MutagenT").addHelpText("after", `
9498
- ${chalk37.bold("Examples:")}
9499
- ${chalk37.cyan('mutagent feedback send -m "Great optimization results!"')}
9500
- ${chalk37.cyan('mutagent feedback send -m "CLI crashed on export" --category bug')}
9501
- ${chalk37.cyan('mutagent feedback send -m "Need batch operations" --category feature --json')}
10899
+ ${chalk38.bold("Examples:")}
10900
+ ${chalk38.cyan('mutagent feedback send -m "Great optimization results!"')}
10901
+ ${chalk38.cyan('mutagent feedback send -m "CLI crashed on export" --category bug')}
10902
+ ${chalk38.cyan('mutagent feedback send -m "Need batch operations" --category feature --json')}
9502
10903
 
9503
- ${chalk37.yellow("AI Agent (MANDATORY):")}
10904
+ ${chalk38.yellow("AI Agent (MANDATORY):")}
9504
10905
  ALWAYS use --json: mutagent feedback send -m "..." --category improvement --json
9505
10906
  Use this command to report bugs, request features, or share UX feedback.
9506
10907
  `).action(() => {
@@ -9511,18 +10912,18 @@ ${chalk37.yellow("AI Agent (MANDATORY):")}
9511
10912
  }
9512
10913
  function registerFeedbackSend(feedback) {
9513
10914
  feedback.command("send").description("Send feedback about the MutagenT platform").requiredOption("-m, --message <text>", "Feedback message").option("--category <type>", `Feedback category: ${VALID_CATEGORIES.join(", ")}`, "improvement").option("--session <id>", "Link feedback to a specific session").addHelpText("after", `
9514
- ${chalk37.bold("Examples:")}
9515
- ${chalk37.dim("$")} mutagent feedback send -m "The optimization UX could show progress better"
9516
- ${chalk37.dim("$")} mutagent feedback send -m "CLI errored on traces export" --category bug
9517
- ${chalk37.dim("$")} mutagent feedback send -m "Love the guided eval!" --category praise --json
9518
-
9519
- ${chalk37.bold("Categories:")}
9520
- ${chalk37.bold("bug")} Something is broken or not working as expected
9521
- ${chalk37.bold("feature")} Request a new capability
9522
- ${chalk37.bold("improvement")} Suggest a UX or workflow enhancement (default)
9523
- ${chalk37.bold("praise")} Share what you love about the platform
9524
-
9525
- ${chalk37.yellow("AI Agent (MANDATORY):")}
10915
+ ${chalk38.bold("Examples:")}
10916
+ ${chalk38.dim("$")} mutagent feedback send -m "The optimization UX could show progress better"
10917
+ ${chalk38.dim("$")} mutagent feedback send -m "CLI errored on traces export" --category bug
10918
+ ${chalk38.dim("$")} mutagent feedback send -m "Love the guided eval!" --category praise --json
10919
+
10920
+ ${chalk38.bold("Categories:")}
10921
+ ${chalk38.bold("bug")} Something is broken or not working as expected
10922
+ ${chalk38.bold("feature")} Request a new capability
10923
+ ${chalk38.bold("improvement")} Suggest a UX or workflow enhancement (default)
10924
+ ${chalk38.bold("praise")} Share what you love about the platform
10925
+
10926
+ ${chalk38.yellow("AI Agent (MANDATORY):")}
9526
10927
  ALWAYS use --json: mutagent feedback send -m "..." --json
9527
10928
  Auto-captured context (CLI version, platform, node version) is included automatically.
9528
10929
  `).action(async (options) => {
@@ -9579,7 +10980,7 @@ if (process.env.CLI_VERSION) {
9579
10980
  cliVersion = process.env.CLI_VERSION;
9580
10981
  } else {
9581
10982
  try {
9582
- const __dirname2 = dirname2(fileURLToPath2(import.meta.url));
10983
+ const __dirname2 = dirname3(fileURLToPath2(import.meta.url));
9583
10984
  const pkgPath = join10(__dirname2, "..", "..", "package.json");
9584
10985
  const pkg = JSON.parse(readFileSync12(pkgPath, "utf-8"));
9585
10986
  cliVersion = pkg.version ?? cliVersion;
@@ -9594,90 +10995,100 @@ program.name("mutagent").description(`MutagenT CLI - AI-native prompt optimizati
9594
10995
  showGlobalOptions: true
9595
10996
  });
9596
10997
  program.addHelpText("after", `
9597
- ${chalk38.yellow("Non-Interactive Mode (CI/CD & Coding Agents):")}
9598
- export MUTAGENT_API_KEY=mt_... ${chalk38.dim("or")} --api-key mt_...
9599
- --json ${chalk38.dim("for structured output")} --non-interactive ${chalk38.dim("to disable prompts")}
9600
-
9601
- ${chalk38.yellow("Command Navigation:")}
9602
- mutagent login ${chalk38.dim("Login (browser OAuth — recommended)")}
9603
- mutagent auth status ${chalk38.dim("Check auth + workspace")}
9604
- mutagent init ${chalk38.dim("Initialize project (.mutagentrc.json)")}
9605
- mutagent explore ${chalk38.dim("Discover prompts in codebase")}
9606
- mutagent workspaces list --json ${chalk38.dim("List workspaces (verify ID)")}
9607
- mutagent config set workspace <id> ${chalk38.dim("Set active workspace")}
9608
- mutagent usage --json ${chalk38.dim("Show account usage + provider status")}
9609
-
9610
- mutagent prompts create --help ${chalk38.dim("Upload prompt (read help first!)")}
9611
- mutagent prompts list --json ${chalk38.dim("List prompts")}
9612
- mutagent prompts get <id> --json ${chalk38.dim("Full prompt details + schemas")}
9613
-
9614
- mutagent prompts dataset add --help ${chalk38.dim("Upload dataset (read help first!)")}
9615
- mutagent prompts dataset list <id> ${chalk38.dim("List datasets")}
9616
-
9617
- mutagent prompts evaluation create --help ${chalk38.dim("Create eval (read help first!)")}
9618
- mutagent prompts evaluation create <id> --guided --json ${chalk38.dim("Guided eval workflow")}
9619
- mutagent prompts evaluation list <id> --json ${chalk38.dim("List evaluations")}
9620
-
9621
- mutagent prompts optimize start --help ${chalk38.dim("Run optimization (read help first!)")}
9622
- mutagent prompts optimize status <job-id> ${chalk38.dim("Poll progress")}
9623
- mutagent prompts optimize results <job-id> ${chalk38.dim("View scorecard")}
9624
-
9625
- mutagent feedback send -m "..." ${chalk38.dim("Send product feedback")}
9626
- mutagent feedback send -m "..." --category bug ${chalk38.dim("Report a bug")}
9627
-
9628
- mutagent integrate <framework> ${chalk38.dim("Framework integration guide")}
9629
- mutagent hooks --help ${chalk38.dim("Hook setup for Claude Code telemetry")}
9630
- mutagent playground run <id> --input '{...}' ${chalk38.dim("Quick test")}
9631
-
9632
- ${chalk38.yellow(" Workflow: Framework Integration (Tracing):")}
9633
- 1. mutagent explore ${chalk38.dim("← discover prompts/agents in codebase")}
9634
- 2. mutagent integrate <framework> ${chalk38.dim(" get integration instructions")}
9635
- 3. Apply tracing code to your codebase ${chalk38.dim("← follow the guide output")}
9636
- 4. mutagent traces list --json ${chalk38.dim("← verify traces are arriving")}
9637
-
9638
- ${chalk38.yellow(" Workflow: Evaluate Optimize:")}
9639
- 1. mutagent prompts create --help ${chalk38.dim("← read help")}
9640
- 2. mutagent prompts create ... --json ${chalk38.dim(" upload prompt with {variables} + inputSchema")}
9641
- 3. mutagent prompts dataset add --help ${chalk38.dim("← read help")}
9642
- 4. mutagent prompts dataset add <id> ... --json ${chalk38.dim("← upload dataset")}
9643
- 5. mutagent prompts evaluation create <id> --guided --json ${chalk38.dim("← guided eval")}
10998
+ ${AGENTS_WIP_BANNER}
10999
+
11000
+ ${chalk39.yellow("Non-Interactive Mode (CI/CD & Coding Agents):")}
11001
+ export MUTAGENT_API_KEY=mt_... ${chalk39.dim("or")} --api-key mt_...
11002
+ --json ${chalk39.dim("for structured output")} --non-interactive ${chalk39.dim("to disable prompts")}
11003
+
11004
+ ${chalk39.yellow("Command Navigation:")}
11005
+ mutagent login ${chalk39.dim("Login (browser OAuth — recommended)")}
11006
+ mutagent auth status ${chalk39.dim("Check auth + workspace")}
11007
+ mutagent init ${chalk39.dim("Initialize project (.mutagentrc.json)")}
11008
+ mutagent explore ${chalk39.dim("Discover prompts in codebase")}
11009
+ mutagent workspaces list --json ${chalk39.dim("List workspaces (verify ID)")}
11010
+ mutagent config set workspace <id> ${chalk39.dim("Set active workspace")}
11011
+ mutagent usage --json ${chalk39.dim("Show account usage + provider status")}
11012
+
11013
+ mutagent prompts create --help ${chalk39.dim("Upload prompt (read help first!)")}
11014
+ mutagent prompts list --json ${chalk39.dim("List prompts")}
11015
+ mutagent prompts get <id> --json ${chalk39.dim("Full prompt details + schemas")}
11016
+
11017
+ mutagent prompts dataset add --help ${chalk39.dim("Upload dataset (read help first!)")}
11018
+ mutagent prompts dataset list <id> ${chalk39.dim("List datasets")}
11019
+
11020
+ mutagent prompts evaluation create --help ${chalk39.dim("Create eval (read help first!)")}
11021
+ mutagent prompts evaluation create <id> --guided --json ${chalk39.dim("Guided eval workflow")}
11022
+ mutagent prompts evaluation list <id> --json ${chalk39.dim("List evaluations")}
11023
+
11024
+ mutagent prompts optimize start --help ${chalk39.dim("Run optimization (read help first!)")}
11025
+ mutagent prompts optimize status <job-id> ${chalk39.dim("Poll progress")}
11026
+ mutagent prompts optimize results <job-id> ${chalk39.dim("View scorecard")}
11027
+
11028
+ mutagent feedback send -m "..." ${chalk39.dim("Send product feedback")}
11029
+ mutagent feedback send -m "..." --category bug ${chalk39.dim("Report a bug")}
11030
+
11031
+ mutagent integrate <framework> ${chalk39.dim("Framework integration guide")}
11032
+ mutagent hooks --help ${chalk39.dim("Hook setup for Claude Code telemetry")}
11033
+ mutagent playground run <id> --input '{...}' ${chalk39.dim("Quick test")}
11034
+
11035
+ ${chalk39.yellow(" Workflow: Framework Integration (Tracing):")}
11036
+ 1. mutagent explore ${chalk39.dim("← discover prompts/agents in codebase")}
11037
+ 2. mutagent integrate <framework> ${chalk39.dim("← get integration instructions")}
11038
+ 3. Apply tracing code to your codebase ${chalk39.dim("← follow the guide output")}
11039
+ 4. mutagent traces list --json ${chalk39.dim(" verify traces are arriving")}
11040
+
11041
+ ${chalk39.yellow(" Workflow: Evaluate Optimize:")}
11042
+ 1. mutagent prompts create --help ${chalk39.dim("← read help")}
11043
+ 2. mutagent prompts create ... --json ${chalk39.dim("← upload prompt with {variables} + inputSchema")}
11044
+ 3. mutagent prompts dataset add --help ${chalk39.dim("← read help")}
11045
+ 4. mutagent prompts dataset add <id> ... --json ${chalk39.dim("← upload dataset")}
11046
+ 5. mutagent prompts evaluation create <id> --guided --json ${chalk39.dim("← guided eval")}
9644
11047
  6. mutagent prompts optimize start <id> --dataset <d> --evaluation <e> --json
9645
11048
 
9646
- ${chalk38.yellow("Post-Onboarding Decision Tree:")}
9647
- After ${chalk38.bold("mutagent auth login")}, users land in one of 3 paths:
9648
- ${chalk38.bold("Path A")} (Tracing): explore → integrate <framework> → apply tracing → verify
9649
- ${chalk38.bold("Path B")} (Optimization): explore → prompts create → dataset add → eval create → optimize
9650
- ${chalk38.bold("Path C")} (Manual): Use CLI commands directly — run mutagent <command> --help
11049
+ ${chalk39.yellow("Post-Onboarding Decision Tree:")}
11050
+ After ${chalk39.bold("mutagent auth login")}, users land in one of 3 paths:
11051
+ ${chalk39.bold("Path A")} (Tracing): explore → integrate <framework> → apply tracing → verify
11052
+ ${chalk39.bold("Path B")} (Optimization): explore → prompts create → dataset add → eval create → optimize
11053
+ ${chalk39.bold("Path C")} (Manual): Use CLI commands directly — run mutagent <command> --help
9651
11054
 
9652
- ${chalk38.yellow("Directive System:")}
11055
+ ${chalk39.yellow("Directive System:")}
9653
11056
  Every --json response may include:
9654
- ${chalk38.bold("_directive.renderedCard")} Pre-formatted card for the user ${chalk38.red("(MUST be shown in chat)")}
9655
- ${chalk38.bold("_directive.instruction")} Next step for the agent
9656
- ${chalk38.bold("_directive.next")} Array of suggested follow-up commands
9657
- ${chalk38.bold("_links")} Dashboard/API URLs (format as markdown links)
11057
+ ${chalk39.bold("_directive.renderedCard")} Pre-formatted card for the user ${chalk39.red("(MUST be shown in chat)")}
11058
+ ${chalk39.bold("_directive.instruction")} Next step for the agent
11059
+ ${chalk39.bold("_directive.next")} Array of suggested follow-up commands
11060
+ ${chalk39.bold("_links")} Dashboard/API URLs (format as markdown links)
9658
11061
 
9659
- ${chalk38.yellow("Evaluation Criteria Format:")}
9660
- Each criterion MUST have: ${chalk38.bold("name")}, ${chalk38.bold("description")} (scoring rubric), ${chalk38.bold("evaluationParameter")}
11062
+ ${chalk39.yellow("Evaluation Criteria Format:")}
11063
+ Each criterion MUST have: ${chalk39.bold("name")}, ${chalk39.bold("description")} (scoring rubric), ${chalk39.bold("evaluationParameter")}
9661
11064
  evaluationParameter MUST match an inputSchema or outputSchema field name
9662
11065
  No duplicate evaluationParameter values — each criterion targets a unique field
9663
11066
  ALL schema fields must be covered (missing fields = error)
9664
- Use ${chalk38.bold("--guided --json")} to generate criteria templates from prompt schemas
9665
-
9666
- ${chalk38.yellow("Optimization Cost Control:")}
9667
- Default max-iterations is 1. ${chalk38.red("NEVER increase without explicit user request.")}
11067
+ Use ${chalk39.bold("--guided --json")} to generate criteria templates from prompt schemas
11068
+
11069
+ ${chalk39.bold("Two framings — coding agents MUST distinguish:")}
11070
+ ${chalk39.bold(" INPUT fields Minimum Viable Context")}
11071
+ What info MUST be present in the input for the prompt to succeed.
11072
+ Rubric should include a ${chalk39.bold("completeness scale")} (e.g., 0 = missing critical context, 5 = complete).
11073
+ ${chalk39.dim("Input fields are typically `{variables}` in your prompt template.")}
11074
+ ${chalk39.bold("• OUTPUT fields → Output Standards")}
11075
+ Content correctness + structural correctness + a ${chalk39.bold("correctness scale")} (e.g., 0 = wrong, 5 = perfect).
11076
+
11077
+ ${chalk39.yellow("Optimization Cost Control:")}
11078
+ Default max-iterations is 1. ${chalk39.red("NEVER increase without explicit user request.")}
9668
11079
  Each iteration incurs LLM costs — confirm with user before starting >1.
9669
11080
 
9670
- ${chalk38.yellow("Post-Optimization:")}
9671
- After ${chalk38.bold("optimize results")}: ALWAYS show the before/after diff to the user first.
9672
- Then offer choices: ${chalk38.bold("Apply")} / ${chalk38.bold("Reject")}.
11081
+ ${chalk39.yellow("Post-Optimization:")}
11082
+ After ${chalk39.bold("optimize results")}: ALWAYS show the before/after diff to the user first.
11083
+ Then offer choices: ${chalk39.bold("Apply")} / ${chalk39.bold("Reject")}.
9673
11084
 
9674
- ${chalk38.yellow("State Tracking:")}
11085
+ ${chalk39.yellow("State Tracking:")}
9675
11086
  .mutagent/mutation-context.md — Codebase index of discovered/uploaded prompts
9676
11087
  Update after explore, create, and dataset operations
9677
11088
  mutagent auth status — Auth + workspace state
9678
11089
  Comment markers (// MutagenT:START ... // MutagenT:END) in source files
9679
11090
 
9680
- ${chalk38.yellow("AI Agent Rules (MANDATORY for coding agents):")}
11091
+ ${chalk39.yellow("AI Agent Rules (MANDATORY for coding agents):")}
9681
11092
  1. Login (two paths):
9682
11093
  - CI / fully automated: export MUTAGENT_API_KEY=mt_... then mutagent login --json
9683
11094
  - Helping a user onboard: mutagent login --browser --json
@@ -9688,15 +11099,15 @@ ${chalk38.yellow("AI Agent Rules (MANDATORY for coding agents):")}
9688
11099
  3. Run <command> --help BEFORE first use of any command
9689
11100
  4. Use --guided --json for evaluation creation (NEVER --guided alone)
9690
11101
  5. Parse _directive.renderedCard and copy it into your CHAT RESPONSE verbatim
9691
- ${chalk38.red("HARD STOP")}: do NOT run further commands until the card is rendered in chat
11102
+ ${chalk39.red("HARD STOP")}: do NOT run further commands until the card is rendered in chat
9692
11103
  6. After mutagent init, verify workspace: mutagent workspaces list --json
9693
11104
  7. Use {single_braces} for template variables in prompts
9694
11105
  8. Collect evaluation criteria from the user — NEVER auto-generate
9695
11106
  9. ALL user interaction via AskUserQuestion — CLI is non-interactive
9696
11107
  ${!hasCredentials() ? `
9697
- ` + chalk38.yellow(" Warning: Not authenticated. Run: mutagent login") + `
11108
+ ` + chalk39.yellow(" Warning: Not authenticated. Run: mutagent login") + `
9698
11109
  ` : ""}${!hasRcConfig() ? `
9699
- ` + chalk38.green(" Get started: mutagent init") + `
11110
+ ` + chalk39.green(" Get started: mutagent init") + `
9700
11111
  ` : ""}`);
9701
11112
  var rawArgs = process.argv.slice(2);
9702
11113
  if (rawArgs.includes("-v") || rawArgs.includes("--version")) {
@@ -9737,5 +11148,5 @@ program.addCommand(createHooksCommand());
9737
11148
  program.addCommand(createFeedbackCommand());
9738
11149
  program.parse();
9739
11150
 
9740
- //# debugId=C3949CD3A9488F4264756E2164756E21
11151
+ //# debugId=4EA6EB9078B51CC264756E2164756E21
9741
11152
  //# sourceMappingURL=cli.js.map