@mutagent/cli 0.1.116 → 0.1.118
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/cli.js +1985 -574
- package/dist/bin/cli.js.map +15 -12
- package/package.json +13 -8
package/dist/bin/cli.js
CHANGED
|
@@ -1241,9 +1241,9 @@ var init_sdk_client = __esm(() => {
|
|
|
1241
1241
|
|
|
1242
1242
|
// src/bin/cli.ts
|
|
1243
1243
|
import { Command as Command21 } from "commander";
|
|
1244
|
-
import
|
|
1244
|
+
import chalk39 from "chalk";
|
|
1245
1245
|
import { readFileSync as readFileSync12 } from "fs";
|
|
1246
|
-
import { join as join10, dirname as
|
|
1246
|
+
import { join as join10, dirname as dirname3 } from "path";
|
|
1247
1247
|
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
1248
1248
|
|
|
1249
1249
|
// src/commands/auth.ts
|
|
@@ -1683,12 +1683,61 @@ import { resolve as resolve2 } from "path";
|
|
|
1683
1683
|
// src/lib/explorer.ts
|
|
1684
1684
|
import { readdirSync, readFileSync as readFileSync3, statSync } from "fs";
|
|
1685
1685
|
import { join as join3, relative, extname, basename } from "path";
|
|
1686
|
-
var
|
|
1686
|
+
var DOUBLE_VAR_PATTERN = /\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}/g;
|
|
1687
|
+
var SINGLE_VAR_PATTERN = /(?<![\\{])\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}(?!\})/g;
|
|
1688
|
+
var FENCED_CODE_PATTERN = /```[\s\S]*?```/g;
|
|
1687
1689
|
var PROMPT_NAME_PATTERN = /(?:const|let|var|export)\s+\w*(?:prompt|system|agent|instruction|template)\w*\s*=/i;
|
|
1688
1690
|
var SCHEMA_PATTERN = /["']?(?:inputSchema|outputSchema|properties|required)["']?\s*[=:]/;
|
|
1689
1691
|
var ZOD_PROMPT_SCHEMA_PATTERN = /(?:const|let|var|export)\s+\w*(?:prompt|output|input|response|schema|message)\w*\s*=\s*z\.object\s*\(/i;
|
|
1690
1692
|
var PYDANTIC_PATTERN = /class\s+\w+\s*\(\s*(?:BaseModel|BaseSettings)\s*\)/;
|
|
1691
1693
|
var MARKER_START_PATTERN = /MutagenT:START\s+(\w+)(?:\s+id=(\S+))?/;
|
|
1694
|
+
var AGENT_HIGH_PATTERNS = [
|
|
1695
|
+
{ pattern: /\bAgentExecutor\b/, reason: "langchain-agent-executor" },
|
|
1696
|
+
{ pattern: /\bcreate(?:React|OpenAIFunctions|Tool(?:Calling)?|Structured(?:Chat)?)Agent\b/, reason: "langchain-create-agent" },
|
|
1697
|
+
{ pattern: /\bnew\s+Agent\s*\(/, reason: "agent-constructor" },
|
|
1698
|
+
{ pattern: /from\s+['"]openai\/agents['"]/, reason: "openai-agents-sdk" },
|
|
1699
|
+
{ pattern: /from\s+['"]@openai\/agents['"]/, reason: "openai-agents-sdk" },
|
|
1700
|
+
{ pattern: /from\s+['"]crewai['"]/, reason: "crewai-import" },
|
|
1701
|
+
{ pattern: /from\s+['"]autogen(?:_agentchat)?['"]/, reason: "autogen-import" },
|
|
1702
|
+
{ pattern: /from\s+['"]@?langgraph['"]/, reason: "langgraph-import" },
|
|
1703
|
+
{ pattern: /from\s+['"]@langchain\/langgraph['"]/, reason: "langgraph-import" },
|
|
1704
|
+
{ pattern: /\bStateGraph\s*\(/, reason: "langgraph-stategraph" }
|
|
1705
|
+
];
|
|
1706
|
+
var AGENT_MEDIUM_PATTERNS = [
|
|
1707
|
+
{ pattern: /\btool_calls\b/, reason: "tool-calls" },
|
|
1708
|
+
{ pattern: /\btoolCalls\b/, reason: "tool-calls" },
|
|
1709
|
+
{ pattern: /^\s*tools\s*[:=]\s*\[/, reason: "tools-array" },
|
|
1710
|
+
{ pattern: /@tool\b/, reason: "tool-decorator" },
|
|
1711
|
+
{ pattern: /\bfunction_call\b/, reason: "function-call" },
|
|
1712
|
+
{ pattern: /\btool_choice\b/, reason: "tool-choice" }
|
|
1713
|
+
];
|
|
1714
|
+
function inferPromptVariables(content) {
|
|
1715
|
+
const stripped = content.replace(FENCED_CODE_PATTERN, "");
|
|
1716
|
+
const doubleMatches = [];
|
|
1717
|
+
const singleMatches = [];
|
|
1718
|
+
const dbl = new RegExp(DOUBLE_VAR_PATTERN.source, "g");
|
|
1719
|
+
let m;
|
|
1720
|
+
while ((m = dbl.exec(stripped)) !== null) {
|
|
1721
|
+
if (m[1])
|
|
1722
|
+
doubleMatches.push(m[1]);
|
|
1723
|
+
}
|
|
1724
|
+
const sgl = new RegExp(SINGLE_VAR_PATTERN.source, "g");
|
|
1725
|
+
while ((m = sgl.exec(stripped)) !== null) {
|
|
1726
|
+
if (m[1])
|
|
1727
|
+
singleMatches.push(m[1]);
|
|
1728
|
+
}
|
|
1729
|
+
const delimiter = doubleMatches.length > singleMatches.length ? "double" : "single";
|
|
1730
|
+
const chosen = delimiter === "double" ? doubleMatches : singleMatches;
|
|
1731
|
+
const seen = new Set;
|
|
1732
|
+
const variables = [];
|
|
1733
|
+
for (const v of chosen) {
|
|
1734
|
+
if (!seen.has(v)) {
|
|
1735
|
+
seen.add(v);
|
|
1736
|
+
variables.push(v);
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
return { variables, delimiter };
|
|
1740
|
+
}
|
|
1692
1741
|
function walkDir(dir, extensions, excludeDirs, maxDepth, currentDepth = 0) {
|
|
1693
1742
|
if (currentDepth >= maxDepth)
|
|
1694
1743
|
return [];
|
|
@@ -1728,20 +1777,23 @@ function scanForPrompts(filePath, relativePath) {
|
|
|
1728
1777
|
} catch {
|
|
1729
1778
|
return results;
|
|
1730
1779
|
}
|
|
1780
|
+
const fileDelimiter = inferPromptVariables(content).delimiter;
|
|
1731
1781
|
const lines = content.split(`
|
|
1732
1782
|
`);
|
|
1733
1783
|
for (let i = 0;i < lines.length; i++) {
|
|
1734
1784
|
const line = lines[i];
|
|
1735
1785
|
if (!line)
|
|
1736
1786
|
continue;
|
|
1737
|
-
|
|
1787
|
+
const inferred = inferPromptVariables(line);
|
|
1788
|
+
if (inferred.variables.length > 0) {
|
|
1738
1789
|
const preview = line.trim().substring(0, 80);
|
|
1739
1790
|
results.push({
|
|
1740
1791
|
file: relativePath,
|
|
1741
1792
|
line: i + 1,
|
|
1742
1793
|
preview,
|
|
1743
1794
|
reason: "template-variable",
|
|
1744
|
-
confidence: "high"
|
|
1795
|
+
confidence: "high",
|
|
1796
|
+
delimiter: fileDelimiter
|
|
1745
1797
|
});
|
|
1746
1798
|
}
|
|
1747
1799
|
if (PROMPT_NAME_PATTERN.test(line)) {
|
|
@@ -1752,7 +1804,8 @@ function scanForPrompts(filePath, relativePath) {
|
|
|
1752
1804
|
line: i + 1,
|
|
1753
1805
|
preview,
|
|
1754
1806
|
reason: "prompt-constant",
|
|
1755
|
-
confidence: "medium"
|
|
1807
|
+
confidence: "medium",
|
|
1808
|
+
delimiter: fileDelimiter
|
|
1756
1809
|
});
|
|
1757
1810
|
}
|
|
1758
1811
|
}
|
|
@@ -1764,7 +1817,8 @@ function scanForPrompts(filePath, relativePath) {
|
|
|
1764
1817
|
line: i + 1,
|
|
1765
1818
|
preview,
|
|
1766
1819
|
reason: "zod-schema",
|
|
1767
|
-
confidence: "low"
|
|
1820
|
+
confidence: "low",
|
|
1821
|
+
delimiter: fileDelimiter
|
|
1768
1822
|
});
|
|
1769
1823
|
}
|
|
1770
1824
|
}
|
|
@@ -1776,8 +1830,55 @@ function scanForPrompts(filePath, relativePath) {
|
|
|
1776
1830
|
line: i + 1,
|
|
1777
1831
|
preview,
|
|
1778
1832
|
reason: "pydantic-model",
|
|
1833
|
+
confidence: "medium",
|
|
1834
|
+
delimiter: fileDelimiter
|
|
1835
|
+
});
|
|
1836
|
+
}
|
|
1837
|
+
}
|
|
1838
|
+
}
|
|
1839
|
+
return results;
|
|
1840
|
+
}
|
|
1841
|
+
function scanForAgents(filePath, relativePath) {
|
|
1842
|
+
const results = [];
|
|
1843
|
+
let content;
|
|
1844
|
+
try {
|
|
1845
|
+
content = readFileSync3(filePath, "utf-8");
|
|
1846
|
+
} catch {
|
|
1847
|
+
return results;
|
|
1848
|
+
}
|
|
1849
|
+
const lines = content.split(`
|
|
1850
|
+
`);
|
|
1851
|
+
const seen = new Set;
|
|
1852
|
+
for (let i = 0;i < lines.length; i++) {
|
|
1853
|
+
const line = lines[i];
|
|
1854
|
+
if (!line)
|
|
1855
|
+
continue;
|
|
1856
|
+
for (const { pattern, reason } of AGENT_HIGH_PATTERNS) {
|
|
1857
|
+
if (pattern.test(line) && !seen.has(i)) {
|
|
1858
|
+
results.push({
|
|
1859
|
+
file: relativePath,
|
|
1860
|
+
line: i + 1,
|
|
1861
|
+
preview: line.trim().substring(0, 80),
|
|
1862
|
+
reason,
|
|
1863
|
+
confidence: "high"
|
|
1864
|
+
});
|
|
1865
|
+
seen.add(i);
|
|
1866
|
+
break;
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
if (seen.has(i))
|
|
1870
|
+
continue;
|
|
1871
|
+
for (const { pattern, reason } of AGENT_MEDIUM_PATTERNS) {
|
|
1872
|
+
if (pattern.test(line)) {
|
|
1873
|
+
results.push({
|
|
1874
|
+
file: relativePath,
|
|
1875
|
+
line: i + 1,
|
|
1876
|
+
preview: line.trim().substring(0, 80),
|
|
1877
|
+
reason,
|
|
1779
1878
|
confidence: "medium"
|
|
1780
1879
|
});
|
|
1880
|
+
seen.add(i);
|
|
1881
|
+
break;
|
|
1781
1882
|
}
|
|
1782
1883
|
}
|
|
1783
1884
|
}
|
|
@@ -1833,7 +1934,8 @@ function scanJsonForSchemas(filePath, relativePath) {
|
|
|
1833
1934
|
line: 1,
|
|
1834
1935
|
preview: preview.substring(0, 80),
|
|
1835
1936
|
reason: "json-schema",
|
|
1836
|
-
confidence: "medium"
|
|
1937
|
+
confidence: "medium",
|
|
1938
|
+
delimiter: "single"
|
|
1837
1939
|
});
|
|
1838
1940
|
return results;
|
|
1839
1941
|
}
|
|
@@ -1890,12 +1992,14 @@ function exploreCodebase(options) {
|
|
|
1890
1992
|
const prompts = [];
|
|
1891
1993
|
const datasets = [];
|
|
1892
1994
|
const markers = [];
|
|
1995
|
+
const agents = [];
|
|
1893
1996
|
const sourceFiles = walkDir(rootPath, options.extensions, options.excludeDirs, options.depth);
|
|
1894
1997
|
for (const filePath of sourceFiles) {
|
|
1895
1998
|
const relativePath = relative(rootPath, filePath);
|
|
1896
1999
|
markers.push(...scanForMarkers(filePath, relativePath));
|
|
1897
2000
|
if (!options.markersOnly) {
|
|
1898
2001
|
prompts.push(...scanForPrompts(filePath, relativePath));
|
|
2002
|
+
agents.push(...scanForAgents(filePath, relativePath));
|
|
1899
2003
|
}
|
|
1900
2004
|
}
|
|
1901
2005
|
if (!options.markersOnly) {
|
|
@@ -1906,7 +2010,7 @@ function exploreCodebase(options) {
|
|
|
1906
2010
|
}
|
|
1907
2011
|
datasets.push(...scanForDatasets(rootPath, rootPath, options.excludeDirs, options.depth));
|
|
1908
2012
|
}
|
|
1909
|
-
return { prompts, datasets, markers };
|
|
2013
|
+
return { prompts, datasets, markers, agents };
|
|
1910
2014
|
}
|
|
1911
2015
|
function parseExtensions(includeGlob) {
|
|
1912
2016
|
const braceMatch = /\{([^}]+)\}/.exec(includeGlob);
|
|
@@ -3539,6 +3643,317 @@ init_sdk_client();
|
|
|
3539
3643
|
import { Command as Command3 } from "commander";
|
|
3540
3644
|
import chalk8 from "chalk";
|
|
3541
3645
|
init_errors();
|
|
3646
|
+
|
|
3647
|
+
// src/commands/prompts/guided-dataset.ts
|
|
3648
|
+
init_sdk_client();
|
|
3649
|
+
|
|
3650
|
+
// src/commands/prompts/guided-workflow.ts
|
|
3651
|
+
init_sdk_client();
|
|
3652
|
+
function buildGenericEvalWorkflow() {
|
|
3653
|
+
return {
|
|
3654
|
+
mode: "generic",
|
|
3655
|
+
message: "No prompt-id provided. Follow this sequence to create an evaluation:",
|
|
3656
|
+
workflow: [
|
|
3657
|
+
"1. mutagent explore --json — scan codebase for prompts",
|
|
3658
|
+
"2. mutagent prompts create ... --json — upload the target prompt",
|
|
3659
|
+
"3. Re-run: mutagent prompts evaluation create <prompt-id> --guided --json"
|
|
3660
|
+
],
|
|
3661
|
+
formatReference: {
|
|
3662
|
+
evalConfig: {
|
|
3663
|
+
criteria: [{
|
|
3664
|
+
name: "<criterion_name>",
|
|
3665
|
+
description: "<rubric: what correct vs incorrect looks like>",
|
|
3666
|
+
evaluationParameter: "<must match a field in inputSchema or outputSchema>"
|
|
3667
|
+
}]
|
|
3668
|
+
}
|
|
3669
|
+
},
|
|
3670
|
+
_directive: {
|
|
3671
|
+
instruction: "Ask the user which prompt they want to evaluate. If unclear, run mutagent explore --json first.",
|
|
3672
|
+
next: ["mutagent explore --json", "mutagent prompts list --json"]
|
|
3673
|
+
}
|
|
3674
|
+
};
|
|
3675
|
+
}
|
|
3676
|
+
async function checkPromptDisambiguation(promptId, promptName) {
|
|
3677
|
+
try {
|
|
3678
|
+
const client = await getSDKClient();
|
|
3679
|
+
const allPrompts = await client.listPrompts();
|
|
3680
|
+
const targetName = promptName.toLowerCase();
|
|
3681
|
+
const similar = allPrompts.filter((p) => String(p.id) !== promptId && (p.name.toLowerCase().includes(targetName) || targetName.includes(p.name.toLowerCase())));
|
|
3682
|
+
if (similar.length === 0)
|
|
3683
|
+
return null;
|
|
3684
|
+
return {
|
|
3685
|
+
message: `Found ${String(similar.length)} prompt(s) with similar names. Confirm this is the right one:`,
|
|
3686
|
+
selected: { id: promptId, name: promptName },
|
|
3687
|
+
similar: similar.map((p) => ({ id: String(p.id), name: p.name })),
|
|
3688
|
+
askUserQuestions: [{
|
|
3689
|
+
question: `You selected "${promptName}" (ID: ${promptId}). Is this correct, or did you mean one of these?`,
|
|
3690
|
+
options: [
|
|
3691
|
+
`${promptName} (ID: ${promptId}) — SELECTED`,
|
|
3692
|
+
...similar.map((p) => `${p.name} (ID: ${String(p.id)})`)
|
|
3693
|
+
]
|
|
3694
|
+
}]
|
|
3695
|
+
};
|
|
3696
|
+
} catch {
|
|
3697
|
+
return null;
|
|
3698
|
+
}
|
|
3699
|
+
}
|
|
3700
|
+
async function buildGuidedWorkflow(promptId) {
|
|
3701
|
+
const client = await getSDKClient();
|
|
3702
|
+
const prompt = await client.getPrompt(promptId);
|
|
3703
|
+
const truncate = (s, max) => s.length > max ? s.slice(0, max) + "..." : s;
|
|
3704
|
+
const outputProperties = prompt.outputSchema && typeof prompt.outputSchema === "object" ? prompt.outputSchema.properties ?? {} : {};
|
|
3705
|
+
const inputProperties = prompt.inputSchema && typeof prompt.inputSchema === "object" ? prompt.inputSchema.properties ?? {} : {};
|
|
3706
|
+
const inputFields = Object.keys(inputProperties);
|
|
3707
|
+
const outputFields = Object.keys(outputProperties);
|
|
3708
|
+
const allFields = [
|
|
3709
|
+
...inputFields.map((f) => ({ field: f, source: "inputSchema", fieldSchema: inputProperties[f] })),
|
|
3710
|
+
...outputFields.map((f) => ({ field: f, source: "outputSchema", fieldSchema: outputProperties[f] }))
|
|
3711
|
+
];
|
|
3712
|
+
let datasetExample = null;
|
|
3713
|
+
try {
|
|
3714
|
+
const datasets = await client.listDatasets(promptId);
|
|
3715
|
+
if (datasets.length > 0) {
|
|
3716
|
+
try {
|
|
3717
|
+
const dsId = datasets[0]?.id;
|
|
3718
|
+
const items = dsId != null ? await client.listDatasetItems(String(dsId)) : [];
|
|
3719
|
+
if (items.length > 0) {
|
|
3720
|
+
const firstItem = items[0];
|
|
3721
|
+
datasetExample = firstItem?.expectedOutput ?? firstItem?.input ?? null;
|
|
3722
|
+
}
|
|
3723
|
+
} catch {}
|
|
3724
|
+
}
|
|
3725
|
+
} catch {}
|
|
3726
|
+
const askUserQuestions = allFields.map(({ field, source, fieldSchema }) => {
|
|
3727
|
+
const isInput = source === "inputSchema";
|
|
3728
|
+
const question = isInput ? `Define the Minimum Viable Context for "${field}". What data MUST be present in this input for the prompt to produce a correct output? Describe what constitutes complete vs incomplete input, and WHY this field matters.` : `Define what correct "${field}" looks like. What structure, content, or qualities make it good vs bad? Give concrete examples of good and bad outputs.`;
|
|
3729
|
+
const hint = isInput ? `Input fields define what data the prompt NEEDS to work correctly. Without defining minimum viable context, the optimizer cannot detect whether failures come from bad input or bad prompt logic.` : null;
|
|
3730
|
+
const header = isInput ? `${field} [INPUT]` : `${field} [OUTPUT]`;
|
|
3731
|
+
const defineDesc = isInput ? `Describe what data MUST be present in "${field}" and WHY the prompt needs it. Focus on completeness and minimum viable context.` : `Describe what a correct vs incorrect "${field}" looks like. Focus on structure, content quality, and concrete examples — not numeric scores.`;
|
|
3732
|
+
return {
|
|
3733
|
+
question,
|
|
3734
|
+
header,
|
|
3735
|
+
...hint != null ? { hint } : {},
|
|
3736
|
+
options: [
|
|
3737
|
+
{
|
|
3738
|
+
label: "Define rubric",
|
|
3739
|
+
description: defineDesc
|
|
3740
|
+
},
|
|
3741
|
+
{
|
|
3742
|
+
label: "See suggestion",
|
|
3743
|
+
description: `Get a suggested rubric based on the prompt and schema definition for "${field}". You can refine it.`
|
|
3744
|
+
}
|
|
3745
|
+
],
|
|
3746
|
+
multiSelect: false,
|
|
3747
|
+
context: {
|
|
3748
|
+
fieldType: fieldSchema?.type ?? "unknown",
|
|
3749
|
+
fieldDescription: fieldSchema?.description ?? null,
|
|
3750
|
+
fieldSource: source,
|
|
3751
|
+
promptExcerpt: truncate(prompt.humanPrompt ?? prompt.systemPrompt ?? prompt.rawPrompt ?? "", 200),
|
|
3752
|
+
exampleValue: datasetExample?.[field] ?? null
|
|
3753
|
+
}
|
|
3754
|
+
};
|
|
3755
|
+
});
|
|
3756
|
+
const result = {
|
|
3757
|
+
prompt: { id: promptId, name: prompt.name },
|
|
3758
|
+
inputSchema: { fields: inputFields },
|
|
3759
|
+
outputSchema: { fields: outputFields },
|
|
3760
|
+
workflow: {
|
|
3761
|
+
description: "Follow these steps to create an evaluation for this prompt:",
|
|
3762
|
+
steps: [
|
|
3763
|
+
{ step: 1, action: "Review prompt schemas and context", detail: "Understand what the prompt does and what each field means." },
|
|
3764
|
+
{ step: 2, action: "Define correctness criteria for EVERY field", detail: `For EACH of the ${String(allFields.length)} fields in askUserQuestions, use AskUserQuestion to collect a rubric. INPUT fields (${String(inputFields.length)}): collect minimum viable context definitions — what data MUST exist. OUTPUT fields (${String(outputFields.length)}): collect correctness criteria — what correct vs incorrect looks like. Do NOT skip any field. Use the context hints to suggest rubrics when asked.` },
|
|
3765
|
+
{ step: 3, action: "Build criteria JSON", detail: "Map each rubric to: { name: field, description: rubric, evaluationParameter: field }" },
|
|
3766
|
+
{ step: 4, action: "Ask for evaluation name", detail: "Ask user what to name this evaluation" },
|
|
3767
|
+
{ step: 5, action: "Create the evaluation", detail: `mutagent prompts evaluation create ${promptId} --name "<name>" -d '<json>' --json` }
|
|
3768
|
+
],
|
|
3769
|
+
criteriaTemplate: {
|
|
3770
|
+
evalConfig: {
|
|
3771
|
+
criteria: [{
|
|
3772
|
+
name: "<criterion_name>",
|
|
3773
|
+
description: "<correctness rubric - describe what correct vs incorrect looks like>",
|
|
3774
|
+
evaluationParameter: `<one of: ${allFields.length > 0 ? allFields.map((f) => f.field).join(", ") : "no fields detected - check prompt schemas"}>`
|
|
3775
|
+
}]
|
|
3776
|
+
}
|
|
3777
|
+
},
|
|
3778
|
+
exampleCommand: `mutagent prompts evaluation create ${promptId} --name "My Evaluation" -d '${JSON.stringify({
|
|
3779
|
+
evalConfig: {
|
|
3780
|
+
criteria: outputFields.slice(0, 1).map((f) => ({
|
|
3781
|
+
name: `${f} accuracy`,
|
|
3782
|
+
description: `Correct if ${f} matches expected structure and content; incorrect if missing, malformed, or off-topic`,
|
|
3783
|
+
evaluationParameter: f
|
|
3784
|
+
}))
|
|
3785
|
+
}
|
|
3786
|
+
})}'`
|
|
3787
|
+
},
|
|
3788
|
+
askUserQuestions,
|
|
3789
|
+
_directive: {
|
|
3790
|
+
display: "workflow_guide",
|
|
3791
|
+
instruction: `MANDATORY ORDERS — READ CAREFULLY:
|
|
3792
|
+
` + `1. You MUST ask the user about EVERY SINGLE field listed in askUserQuestions. There are ${String(allFields.length)} fields total.
|
|
3793
|
+
` + `2. Do NOT skip ANY field. Do NOT decide that a field "doesn't need" a definition.
|
|
3794
|
+
` + `3. INPUT fields (${String(inputFields.length)}): These define MINIMUM VIABLE CONTEXT — the data that MUST exist in the input for the prompt to work. Ask: "What data MUST be present in this field and WHY does the prompt need it?"
|
|
3795
|
+
` + ` WHY THIS MATTERS: Without minimum viable context definitions, the optimizer cannot distinguish between failures caused by incomplete input vs failures caused by bad prompt logic.
|
|
3796
|
+
` + `4. OUTPUT fields (${String(outputFields.length)}): These define correctness criteria. Ask: "What does correct vs incorrect for this field look like?"
|
|
3797
|
+
` + `5. After collecting ALL ${String(allFields.length)} definitions, construct the --data JSON with criteria for EVERY field.
|
|
3798
|
+
` + `6. VIOLATION: Skipping ANY field or telling the user a field doesn't need a definition is a protocol violation.
|
|
3799
|
+
` + `
|
|
3800
|
+
` + `7. EVAL CRITERIA FRAMING — INPUT vs OUTPUT scoping (mirror of SKILL.md → "Guided Eval Criteria"):
|
|
3801
|
+
` + ` INPUT fields → Minimum Viable Context (MVC).
|
|
3802
|
+
` + ` Ask: "Is the data required for the prompt to succeed actually present in this {variable}?"
|
|
3803
|
+
` + ` Rubric uses a COMPLETENESS scale grounded in observable presence of required data:
|
|
3804
|
+
` + ` 1.0 = all required context present, no ambiguity
|
|
3805
|
+
` + ` 0.5 = some context present, enough to attempt but likely partial/hedged answer
|
|
3806
|
+
` + ` 0.0 = critical context missing, prompt cannot succeed regardless of model
|
|
3807
|
+
` + ` OUTPUT fields → Output Standards.
|
|
3808
|
+
` + ` Ask: "Does the response meet content correctness + structural correctness?"
|
|
3809
|
+
` + ` Rubric uses a CORRECTNESS scale with concrete pass/fail tiers, e.g.:
|
|
3810
|
+
` + ` 1.0 = valid shape, all required fields, faithful content
|
|
3811
|
+
` + ` 0.5 = valid shape, 1-2 fields hedged or partially correct
|
|
3812
|
+
` + ` 0.0 = invalid shape OR fabricated facts
|
|
3813
|
+
` + ` NEVER use vague rubrics like "0-1 scale" or "score based on accuracy".
|
|
3814
|
+
` + ` Canonical source: .claude/skills/mutagent-cli/SKILL.md → "Guided Eval Criteria".
|
|
3815
|
+
` + " Note: template variables use single-brace `{variable}` (platform canonical). `mutagent explore`\n" + " infers single vs double per-file via inferPromptVariables().",
|
|
3816
|
+
aiAgentDecisionTree: {
|
|
3817
|
+
step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
|
|
3818
|
+
step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
|
|
3819
|
+
step3: `After collecting all criteria, construct the --data JSON and run: mutagent prompts evaluation create <prompt-id> --name "<name>" -d '<json>'`
|
|
3820
|
+
}
|
|
3821
|
+
},
|
|
3822
|
+
_meta: { inputFields, outputFields, allFields }
|
|
3823
|
+
};
|
|
3824
|
+
const disambiguation = await checkPromptDisambiguation(promptId, prompt.name);
|
|
3825
|
+
if (disambiguation) {
|
|
3826
|
+
result.disambiguation = disambiguation;
|
|
3827
|
+
}
|
|
3828
|
+
return result;
|
|
3829
|
+
}
|
|
3830
|
+
|
|
3831
|
+
// src/commands/prompts/guided-dataset.ts
|
|
3832
|
+
function buildGenericDatasetWorkflow() {
|
|
3833
|
+
return {
|
|
3834
|
+
mode: "generic",
|
|
3835
|
+
message: "No prompt-id provided. Follow this sequence to create a dataset:",
|
|
3836
|
+
workflow: [
|
|
3837
|
+
"1. mutagent explore --json — scan codebase for prompts",
|
|
3838
|
+
"2. mutagent prompts create ... --json — upload the target prompt",
|
|
3839
|
+
"3. Re-run: mutagent prompts dataset add <prompt-id> --guided --json"
|
|
3840
|
+
],
|
|
3841
|
+
formatReference: {
|
|
3842
|
+
item: {
|
|
3843
|
+
input: { "<inputSchema_field>": "<value>" },
|
|
3844
|
+
expectedOutput: { "<outputSchema_field>": "<expected_value>" }
|
|
3845
|
+
}
|
|
3846
|
+
},
|
|
3847
|
+
suggestedCategories: [
|
|
3848
|
+
{ name: "Edge Cases", description: "Boundary inputs, empty fields, malformed data", priority: "high" },
|
|
3849
|
+
{ name: "Hard Cases", description: "Ambiguous inputs, adversarial examples", priority: "high" },
|
|
3850
|
+
{ name: "Representative Cases", description: "Typical production inputs", priority: "medium" }
|
|
3851
|
+
],
|
|
3852
|
+
_directive: {
|
|
3853
|
+
instruction: "Ask the user which prompt they want to create a dataset for. If unclear, run mutagent explore --json first.",
|
|
3854
|
+
next: ["mutagent explore --json", "mutagent prompts list --json"]
|
|
3855
|
+
}
|
|
3856
|
+
};
|
|
3857
|
+
}
|
|
3858
|
+
function buildGuidedDatasetWorkflow(prompt) {
|
|
3859
|
+
const promptId = String(prompt.id);
|
|
3860
|
+
const outputProperties = prompt.outputSchema && typeof prompt.outputSchema === "object" ? prompt.outputSchema.properties ?? {} : {};
|
|
3861
|
+
const inputProperties = prompt.inputSchema && typeof prompt.inputSchema === "object" ? prompt.inputSchema.properties ?? {} : {};
|
|
3862
|
+
const inputFields = Object.keys(inputProperties);
|
|
3863
|
+
const outputFields = Object.keys(outputProperties);
|
|
3864
|
+
const templateInput = {};
|
|
3865
|
+
for (const f of inputFields) {
|
|
3866
|
+
templateInput[f] = "<value>";
|
|
3867
|
+
}
|
|
3868
|
+
const templateOutput = {};
|
|
3869
|
+
for (const f of outputFields) {
|
|
3870
|
+
templateOutput[f] = "<expected>";
|
|
3871
|
+
}
|
|
3872
|
+
const askUserQuestions = [
|
|
3873
|
+
{
|
|
3874
|
+
field: "_general",
|
|
3875
|
+
source: "inputSchema",
|
|
3876
|
+
question: "What are the hardest inputs for this prompt? Describe scenarios that typically cause failures or ambiguous outputs."
|
|
3877
|
+
},
|
|
3878
|
+
{
|
|
3879
|
+
field: "_edge_cases",
|
|
3880
|
+
source: "inputSchema",
|
|
3881
|
+
question: "What edge cases have caused failures in production? Include boundary inputs, empty fields, and malformed data."
|
|
3882
|
+
},
|
|
3883
|
+
...inputFields.map((f) => ({
|
|
3884
|
+
field: f,
|
|
3885
|
+
source: "inputSchema",
|
|
3886
|
+
question: `What values should "${f}" have in test cases? Include edge cases (empty, unicode, very long, malformed).`
|
|
3887
|
+
})),
|
|
3888
|
+
...outputFields.map((f) => ({
|
|
3889
|
+
field: f,
|
|
3890
|
+
source: "outputSchema",
|
|
3891
|
+
question: `What should correct "${f}" look like? Describe the format, tone, completeness criteria, and examples of incorrect outputs.`
|
|
3892
|
+
}))
|
|
3893
|
+
];
|
|
3894
|
+
const suggestedCategories = [
|
|
3895
|
+
{
|
|
3896
|
+
name: "Edge Cases",
|
|
3897
|
+
description: "Boundary inputs, empty fields, malformed data",
|
|
3898
|
+
priority: "high"
|
|
3899
|
+
},
|
|
3900
|
+
{
|
|
3901
|
+
name: "Hard Cases",
|
|
3902
|
+
description: "Ambiguous inputs, adversarial examples, domain traps",
|
|
3903
|
+
priority: "high"
|
|
3904
|
+
},
|
|
3905
|
+
{
|
|
3906
|
+
name: "Representative Cases",
|
|
3907
|
+
description: "Typical production inputs",
|
|
3908
|
+
priority: "medium"
|
|
3909
|
+
}
|
|
3910
|
+
];
|
|
3911
|
+
const totalSchemaFields = inputFields.length + outputFields.length;
|
|
3912
|
+
const hasSchema = totalSchemaFields > 0;
|
|
3913
|
+
return {
|
|
3914
|
+
promptId,
|
|
3915
|
+
promptName: prompt.name,
|
|
3916
|
+
schemaFields: {
|
|
3917
|
+
input: inputFields,
|
|
3918
|
+
output: outputFields
|
|
3919
|
+
},
|
|
3920
|
+
suggestedCategories,
|
|
3921
|
+
templateItem: {
|
|
3922
|
+
input: hasSchema ? templateInput : { "<field>": "<value>" },
|
|
3923
|
+
expectedOutput: hasSchema ? templateOutput : { "<field>": "<expected>" }
|
|
3924
|
+
},
|
|
3925
|
+
askUserQuestions,
|
|
3926
|
+
guidance: {
|
|
3927
|
+
minItems: 5,
|
|
3928
|
+
priorityRule: "Hard cases that expose prompt weaknesses > easy cases that always pass",
|
|
3929
|
+
steps: [
|
|
3930
|
+
"Collect answers to each question from the user",
|
|
3931
|
+
"Generate 5-10 dataset items covering all categories",
|
|
3932
|
+
"Ensure at least 2 hard/edge cases per category",
|
|
3933
|
+
'Format as JSON array: [{"input": {...}, "expectedOutput": {...}}, ...]',
|
|
3934
|
+
`Run: mutagent prompts dataset add ${promptId} -d '<json>' --name '<name>' --json`
|
|
3935
|
+
]
|
|
3936
|
+
},
|
|
3937
|
+
_directive: {
|
|
3938
|
+
instruction: "Collect domain-specific dataset items from the user. " + "Ask about each schema field. " + "Prioritize hard cases over easy cases. " + "Then construct the dataset JSON and upload.",
|
|
3939
|
+
next: [
|
|
3940
|
+
`mutagent prompts dataset add ${promptId} -d '<constructed-json>' --name '<name>' --json`
|
|
3941
|
+
]
|
|
3942
|
+
}
|
|
3943
|
+
};
|
|
3944
|
+
}
|
|
3945
|
+
async function fetchAndBuildGuidedDatasetWorkflow(promptId) {
|
|
3946
|
+
const client = await getSDKClient();
|
|
3947
|
+
const prompt = await client.getPrompt(promptId);
|
|
3948
|
+
const result = buildGuidedDatasetWorkflow(prompt);
|
|
3949
|
+
const disambiguation = await checkPromptDisambiguation(promptId, prompt.name);
|
|
3950
|
+
if (disambiguation) {
|
|
3951
|
+
result.disambiguation = disambiguation;
|
|
3952
|
+
}
|
|
3953
|
+
return result;
|
|
3954
|
+
}
|
|
3955
|
+
|
|
3956
|
+
// src/commands/prompts/datasets.ts
|
|
3542
3957
|
function registerDatasetCommands(prompts) {
|
|
3543
3958
|
const dataset = new Command3("dataset").description("Manage datasets for prompts").addHelpText("after", `
|
|
3544
3959
|
Examples:
|
|
@@ -3638,11 +4053,19 @@ Verify the dataset ID exists, or list datasets for a prompt to find valid IDs.`)
|
|
|
3638
4053
|
}
|
|
3639
4054
|
}
|
|
3640
4055
|
});
|
|
3641
|
-
dataset.command("add").description("Add dataset to a prompt").argument("
|
|
4056
|
+
dataset.command("add").description("Add dataset to a prompt").argument("[prompt-id]", "Prompt ID (from: mutagent prompts list) — optional when --guided is set").option("-d, --data <json>", "Inline JSON array of dataset items").option("-n, --name <name>", "Dataset name").option("--guided", "Guided mode — analyze prompt schema and suggest dataset categories. prompt-id is optional when --guided is set.").addHelpText("after", `
|
|
3642
4057
|
Examples:
|
|
4058
|
+
${chalk8.dim("$")} mutagent prompts dataset add --guided --json # no prompt-id: returns generic workflow + upload instructions
|
|
4059
|
+
${chalk8.dim("$")} mutagent prompts dataset add <prompt-id> --guided --json # recommended: schema-aware category suggestions
|
|
3643
4060
|
${chalk8.dim("$")} mutagent prompts dataset add <prompt-id> -d '[{"input":{"text":"hello"},"expectedOutput":{"result":"world"}}]'
|
|
3644
4061
|
${chalk8.dim("$")} mutagent prompts dataset add <prompt-id> -d '[{"input":{"text":"hello"},"expectedOutput":{"result":"world"}}]' --name "My Dataset"
|
|
3645
4062
|
|
|
4063
|
+
Guided mode (--guided):
|
|
4064
|
+
Fetches the prompt's inputSchema + outputSchema and returns structured JSON
|
|
4065
|
+
that guides a coding agent to collect domain-specific dataset items.
|
|
4066
|
+
The agent uses the guidance to ask the user questions, then constructs
|
|
4067
|
+
the dataset JSON and calls: mutagent prompts dataset add <id> -d '<json>' --name '<name>' --json
|
|
4068
|
+
|
|
3646
4069
|
Inline data format (-d):
|
|
3647
4070
|
JSON array of objects, e.g.:
|
|
3648
4071
|
${chalk8.dim('[{"input": {"text": "hello"}, "expectedOutput": {"result": "world"}}]')}
|
|
@@ -3657,11 +4080,26 @@ ${chalk8.yellow("AI Agent (MANDATORY):")}
|
|
|
3657
4080
|
expectedOutput is REQUIRED for evaluation scoring.
|
|
3658
4081
|
Check schemas: mutagent prompts get <prompt-id> --json
|
|
3659
4082
|
|
|
3660
|
-
${chalk8.red("Required: --data must be provided.")}
|
|
4083
|
+
${chalk8.red("Required: --data or --guided must be provided.")}
|
|
3661
4084
|
`).action(async (promptId, options) => {
|
|
3662
|
-
const isJson = getJsonFlag(prompts);
|
|
4085
|
+
const isJson = options.guided ? true : getJsonFlag(prompts);
|
|
3663
4086
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
3664
4087
|
try {
|
|
4088
|
+
if (options.guided && !promptId) {
|
|
4089
|
+
const result = buildGenericDatasetWorkflow();
|
|
4090
|
+
output.output(result);
|
|
4091
|
+
return;
|
|
4092
|
+
}
|
|
4093
|
+
if (options.guided) {
|
|
4094
|
+
const guidedResult = await fetchAndBuildGuidedDatasetWorkflow(promptId);
|
|
4095
|
+
output.output(guidedResult);
|
|
4096
|
+
return;
|
|
4097
|
+
}
|
|
4098
|
+
if (!promptId) {
|
|
4099
|
+
throw new MutagentError("MISSING_ARGUMENTS", "prompt-id is required", `Run: mutagent prompts dataset add --help
|
|
4100
|
+
` + `Usage: mutagent prompts dataset add <prompt-id> [options]
|
|
4101
|
+
` + "With --guided: mutagent prompts dataset add --guided --json (returns generic workflow)");
|
|
4102
|
+
}
|
|
3665
4103
|
if (!options.data) {
|
|
3666
4104
|
throw new MutagentError("MISSING_ARGUMENTS", "-d/--data is required", `Run: mutagent prompts dataset add --help
|
|
3667
4105
|
` + "Use -d '[{...}]' to provide inline JSON data");
|
|
@@ -3704,20 +4142,21 @@ Each item must have: {"input": {...}, "expectedOutput": {...}}`);
|
|
|
3704
4142
|
if (!datasetName) {
|
|
3705
4143
|
throw new MutagentError("MISSING_ARGUMENTS", "Dataset name is required", `Run: mutagent prompts dataset add --help
|
|
3706
4144
|
` + `[Agent: Ask the user for a dataset name via AskUserQuestion, then pass --name]
|
|
3707
|
-
` + `Use --name <name>, e.g., mutagent prompts dataset add ${promptId} --name "my-dataset" -d '[...]'`);
|
|
4145
|
+
` + `Use --name <name>, e.g., mutagent prompts dataset add ${promptId ?? "<prompt-id>"} --name "my-dataset" -d '[...]'`);
|
|
3708
4146
|
}
|
|
4147
|
+
const resolvedPromptId = promptId;
|
|
3709
4148
|
const client = await getSDKClient();
|
|
3710
|
-
const datasetResult = await client.addDataset(
|
|
4149
|
+
const datasetResult = await client.addDataset(resolvedPromptId, content, datasetName);
|
|
3711
4150
|
if (isJson) {
|
|
3712
4151
|
let rsState;
|
|
3713
4152
|
try {
|
|
3714
|
-
const evals = await client.listEvaluations(
|
|
4153
|
+
const evals = await client.listEvaluations(resolvedPromptId);
|
|
3715
4154
|
rsState = { evaluations: evals.length };
|
|
3716
4155
|
} catch {}
|
|
3717
|
-
const directive = datasetAddedDirective(
|
|
4156
|
+
const directive = datasetAddedDirective(resolvedPromptId, datasetResult.id, datasetResult.name, datasetResult.itemCount, rsState);
|
|
3718
4157
|
output.output({
|
|
3719
4158
|
...datasetResult,
|
|
3720
|
-
_links: datasetLinks(
|
|
4159
|
+
_links: datasetLinks(resolvedPromptId, datasetResult.id),
|
|
3721
4160
|
_directive: directive
|
|
3722
4161
|
});
|
|
3723
4162
|
echoDirectiveToStderr(directive);
|
|
@@ -3725,7 +4164,7 @@ Each item must have: {"input": {...}, "expectedOutput": {...}}`);
|
|
|
3725
4164
|
for (const w of warnings) {
|
|
3726
4165
|
output.warn(w);
|
|
3727
4166
|
}
|
|
3728
|
-
output.success(`Added dataset "${datasetResult.name}" to prompt: ${
|
|
4167
|
+
output.success(`Added dataset "${datasetResult.name}" to prompt: ${resolvedPromptId} (id: ${String(datasetResult.id)})`);
|
|
3729
4168
|
if (datasetResult.itemCount !== undefined && datasetResult.itemCount > 0) {
|
|
3730
4169
|
output.info(`Items uploaded: ${String(datasetResult.itemCount)}`);
|
|
3731
4170
|
}
|
|
@@ -3736,14 +4175,14 @@ Each item must have: {"input": {...}, "expectedOutput": {...}}`);
|
|
|
3736
4175
|
resourceType: "Dataset",
|
|
3737
4176
|
id: datasetResult.id,
|
|
3738
4177
|
name: datasetResult.name,
|
|
3739
|
-
dashboardUrl: datasetLink(
|
|
3740
|
-
apiPath: `/api/prompts/${
|
|
4178
|
+
dashboardUrl: datasetLink(resolvedPromptId, datasetResult.id),
|
|
4179
|
+
apiPath: `/api/prompts/${resolvedPromptId}/datasets/${String(datasetResult.id)}`
|
|
3741
4180
|
});
|
|
3742
4181
|
console.log(hints);
|
|
3743
4182
|
}
|
|
3744
4183
|
updateMutationContext((ctx) => {
|
|
3745
4184
|
ctx.addDiscoveredDataset("inline-data", datasetResult.name, datasetResult.itemCount ?? 0);
|
|
3746
|
-
ctx.markDatasetUploaded("inline-data", String(datasetResult.id),
|
|
4185
|
+
ctx.markDatasetUploaded("inline-data", String(datasetResult.id), resolvedPromptId);
|
|
3747
4186
|
});
|
|
3748
4187
|
} catch (error) {
|
|
3749
4188
|
handleError(error, isJson);
|
|
@@ -3883,144 +4322,33 @@ async function resolveNumericPromptId(client, promptGroupId) {
|
|
|
3883
4322
|
return sorted[0]?.id ?? null;
|
|
3884
4323
|
}
|
|
3885
4324
|
|
|
3886
|
-
// src/
|
|
3887
|
-
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
|
-
|
|
3891
|
-
|
|
3892
|
-
|
|
3893
|
-
|
|
3894
|
-
|
|
3895
|
-
|
|
3896
|
-
|
|
3897
|
-
|
|
3898
|
-
|
|
3899
|
-
|
|
3900
|
-
|
|
3901
|
-
|
|
3902
|
-
|
|
3903
|
-
|
|
3904
|
-
|
|
3905
|
-
|
|
3906
|
-
|
|
3907
|
-
|
|
3908
|
-
|
|
3909
|
-
|
|
3910
|
-
|
|
3911
|
-
|
|
3912
|
-
|
|
3913
|
-
} catch {}
|
|
3914
|
-
const askUserQuestions = allFields.map(({ field, source, fieldSchema }) => {
|
|
3915
|
-
const isInput = source === "inputSchema";
|
|
3916
|
-
const question = isInput ? `Define the Minimum Viable Context for "${field}". What data MUST be present in this input for the prompt to produce a correct output? Describe what constitutes complete vs incomplete input, and WHY this field matters.` : `Define what correct "${field}" looks like. What structure, content, or qualities make it good vs bad? Give concrete examples of good and bad outputs.`;
|
|
3917
|
-
const hint = isInput ? `Input fields define what data the prompt NEEDS to work correctly. Without defining minimum viable context, the optimizer cannot detect whether failures come from bad input or bad prompt logic.` : null;
|
|
3918
|
-
const header = isInput ? `${field} [INPUT]` : `${field} [OUTPUT]`;
|
|
3919
|
-
const defineDesc = isInput ? `Describe what data MUST be present in "${field}" and WHY the prompt needs it. Focus on completeness and minimum viable context.` : `Describe what a correct vs incorrect "${field}" looks like. Focus on structure, content quality, and concrete examples — not numeric scores.`;
|
|
3920
|
-
return {
|
|
3921
|
-
question,
|
|
3922
|
-
header,
|
|
3923
|
-
...hint != null ? { hint } : {},
|
|
3924
|
-
options: [
|
|
3925
|
-
{
|
|
3926
|
-
label: "Define rubric",
|
|
3927
|
-
description: defineDesc
|
|
3928
|
-
},
|
|
3929
|
-
{
|
|
3930
|
-
label: "See suggestion",
|
|
3931
|
-
description: `Get a suggested rubric based on the prompt and schema definition for "${field}". You can refine it.`
|
|
3932
|
-
}
|
|
3933
|
-
],
|
|
3934
|
-
multiSelect: false,
|
|
3935
|
-
context: {
|
|
3936
|
-
fieldType: fieldSchema?.type ?? "unknown",
|
|
3937
|
-
fieldDescription: fieldSchema?.description ?? null,
|
|
3938
|
-
fieldSource: source,
|
|
3939
|
-
promptExcerpt: truncate(prompt.humanPrompt ?? prompt.systemPrompt ?? prompt.rawPrompt ?? "", 200),
|
|
3940
|
-
exampleValue: datasetExample?.[field] ?? null
|
|
3941
|
-
}
|
|
3942
|
-
};
|
|
3943
|
-
});
|
|
3944
|
-
return {
|
|
3945
|
-
prompt: { id: promptId, name: prompt.name },
|
|
3946
|
-
inputSchema: { fields: inputFields },
|
|
3947
|
-
outputSchema: { fields: outputFields },
|
|
3948
|
-
workflow: {
|
|
3949
|
-
description: "Follow these steps to create an evaluation for this prompt:",
|
|
3950
|
-
steps: [
|
|
3951
|
-
{ step: 1, action: "Review prompt schemas and context", detail: "Understand what the prompt does and what each field means." },
|
|
3952
|
-
{ step: 2, action: "Define correctness criteria for EVERY field", detail: `For EACH of the ${String(allFields.length)} fields in askUserQuestions, use AskUserQuestion to collect a rubric. INPUT fields (${String(inputFields.length)}): collect minimum viable context definitions — what data MUST exist. OUTPUT fields (${String(outputFields.length)}): collect correctness criteria — what correct vs incorrect looks like. Do NOT skip any field. Use the context hints to suggest rubrics when asked.` },
|
|
3953
|
-
{ step: 3, action: "Build criteria JSON", detail: "Map each rubric to: { name: field, description: rubric, evaluationParameter: field }" },
|
|
3954
|
-
{ step: 4, action: "Ask for evaluation name", detail: "Ask user what to name this evaluation" },
|
|
3955
|
-
{ step: 5, action: "Create the evaluation", detail: `mutagent prompts evaluation create ${promptId} --name "<name>" -d '<json>' --json` }
|
|
3956
|
-
],
|
|
3957
|
-
criteriaTemplate: {
|
|
3958
|
-
evalConfig: {
|
|
3959
|
-
criteria: [{
|
|
3960
|
-
name: "<criterion_name>",
|
|
3961
|
-
description: "<correctness rubric - describe what correct vs incorrect looks like>",
|
|
3962
|
-
evaluationParameter: `<one of: ${allFields.length > 0 ? allFields.map((f) => f.field).join(", ") : "no fields detected - check prompt schemas"}>`
|
|
3963
|
-
}]
|
|
3964
|
-
}
|
|
3965
|
-
},
|
|
3966
|
-
exampleCommand: `mutagent prompts evaluation create ${promptId} --name "My Evaluation" -d '${JSON.stringify({
|
|
3967
|
-
evalConfig: {
|
|
3968
|
-
criteria: outputFields.slice(0, 1).map((f) => ({
|
|
3969
|
-
name: `${f} accuracy`,
|
|
3970
|
-
description: `Correct if ${f} matches expected structure and content; incorrect if missing, malformed, or off-topic`,
|
|
3971
|
-
evaluationParameter: f
|
|
3972
|
-
}))
|
|
3973
|
-
}
|
|
3974
|
-
})}'`
|
|
3975
|
-
},
|
|
3976
|
-
askUserQuestions,
|
|
3977
|
-
_directive: {
|
|
3978
|
-
display: "workflow_guide",
|
|
3979
|
-
instruction: `MANDATORY ORDERS — READ CAREFULLY:
|
|
3980
|
-
` + `1. You MUST ask the user about EVERY SINGLE field listed in askUserQuestions. There are ${String(allFields.length)} fields total.
|
|
3981
|
-
` + `2. Do NOT skip ANY field. Do NOT decide that a field "doesn't need" a definition.
|
|
3982
|
-
` + `3. INPUT fields (${String(inputFields.length)}): These define MINIMUM VIABLE CONTEXT — the data that MUST exist in the input for the prompt to work. Ask: "What data MUST be present in this field and WHY does the prompt need it?"
|
|
3983
|
-
` + ` WHY THIS MATTERS: Without minimum viable context definitions, the optimizer cannot distinguish between failures caused by incomplete input vs failures caused by bad prompt logic.
|
|
3984
|
-
` + `4. OUTPUT fields (${String(outputFields.length)}): These define correctness criteria. Ask: "What does correct vs incorrect for this field look like?"
|
|
3985
|
-
` + `5. After collecting ALL ${String(allFields.length)} definitions, construct the --data JSON with criteria for EVERY field.
|
|
3986
|
-
` + "6. VIOLATION: Skipping ANY field or telling the user a field doesn't need a definition is a protocol violation.",
|
|
3987
|
-
aiAgentDecisionTree: {
|
|
3988
|
-
step1: "Check if criteria already exist in the user's code. If criteria match expected shape (name + description + evaluationParameter targeting schema fields), use --data directly.",
|
|
3989
|
-
step2: "If criteria are missing or malformed, use the askUserQuestions payloads to collect them via AskUserQuestion.",
|
|
3990
|
-
step3: `After collecting all criteria, construct the --data JSON and run: mutagent prompts evaluation create <prompt-id> --name "<name>" -d '<json>'`
|
|
3991
|
-
}
|
|
3992
|
-
},
|
|
3993
|
-
_meta: { inputFields, outputFields, allFields }
|
|
3994
|
-
};
|
|
3995
|
-
}
|
|
3996
|
-
|
|
3997
|
-
// src/lib/adapters/eval-criteria.ts
|
|
3998
|
-
function cliCriterionToCanonical(c) {
|
|
3999
|
-
return {
|
|
4000
|
-
criteria: c.description,
|
|
4001
|
-
evaluationParameter: c.evaluationParameter,
|
|
4002
|
-
name: c.name
|
|
4003
|
-
};
|
|
4004
|
-
}
|
|
4005
|
-
function canonicalCriterionToCli(c) {
|
|
4006
|
-
return {
|
|
4007
|
-
name: c.name ?? c.id,
|
|
4008
|
-
description: c.criteria,
|
|
4009
|
-
evaluationParameter: c.evaluationParameter
|
|
4010
|
-
};
|
|
4011
|
-
}
|
|
4012
|
-
function cliCriteriaArrayToCanonical(arr) {
|
|
4013
|
-
if (!Array.isArray(arr))
|
|
4014
|
-
return [];
|
|
4015
|
-
return arr.map(cliCriterionToCanonical);
|
|
4016
|
-
}
|
|
4017
|
-
function canonicalCriteriaArrayToCli(arr) {
|
|
4018
|
-
if (!Array.isArray(arr))
|
|
4019
|
-
return [];
|
|
4020
|
-
return arr.map(canonicalCriterionToCli);
|
|
4021
|
-
}
|
|
4022
|
-
|
|
4023
|
-
// src/commands/prompts/evaluations-run.ts
|
|
4325
|
+
// src/lib/adapters/eval-criteria.ts
|
|
4326
|
+
function cliCriterionToCanonical(c) {
|
|
4327
|
+
return {
|
|
4328
|
+
criteria: c.description,
|
|
4329
|
+
evaluationParameter: c.evaluationParameter,
|
|
4330
|
+
name: c.name
|
|
4331
|
+
};
|
|
4332
|
+
}
|
|
4333
|
+
function canonicalCriterionToCli(c) {
|
|
4334
|
+
return {
|
|
4335
|
+
name: c.name ?? c.id,
|
|
4336
|
+
description: c.criteria,
|
|
4337
|
+
evaluationParameter: c.evaluationParameter
|
|
4338
|
+
};
|
|
4339
|
+
}
|
|
4340
|
+
function cliCriteriaArrayToCanonical(arr) {
|
|
4341
|
+
if (!Array.isArray(arr))
|
|
4342
|
+
return [];
|
|
4343
|
+
return arr.map(cliCriterionToCanonical);
|
|
4344
|
+
}
|
|
4345
|
+
function canonicalCriteriaArrayToCli(arr) {
|
|
4346
|
+
if (!Array.isArray(arr))
|
|
4347
|
+
return [];
|
|
4348
|
+
return arr.map(canonicalCriterionToCli);
|
|
4349
|
+
}
|
|
4350
|
+
|
|
4351
|
+
// src/commands/prompts/evaluations-run.ts
|
|
4024
4352
|
init_sdk_client();
|
|
4025
4353
|
import chalk9 from "chalk";
|
|
4026
4354
|
init_errors();
|
|
@@ -4213,8 +4541,9 @@ Examples:
|
|
|
4213
4541
|
handleError(error, isJson);
|
|
4214
4542
|
}
|
|
4215
4543
|
});
|
|
4216
|
-
evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("
|
|
4544
|
+
evaluation.command("create").description("Create an evaluation configuration for a prompt").argument("[prompt-id]", "Prompt ID (from: mutagent prompts list) — optional when --guided is set").option("-d, --data <json>", "Evaluation as JSON string (for pre-validated criteria only)").option("-n, --name <name>", "Evaluation name (required unless --guided)").option("--description <text>", "Evaluation description").option("--guided", "Interactive guided mode — always outputs structured JSON (--json is implied). prompt-id is optional when --guided is set.").addHelpText("after", `
|
|
4217
4545
|
Examples:
|
|
4546
|
+
${chalk10.dim("$")} mutagent prompts evaluation create --guided --json ${chalk10.dim("# no prompt-id: returns generic workflow + upload instructions")}
|
|
4218
4547
|
${chalk10.dim("$")} mutagent prompts evaluation create <prompt-id> --guided ${chalk10.dim("# recommended: shows workflow guide + schema fields")}
|
|
4219
4548
|
${chalk10.dim("$")} mutagent prompts evaluation create <prompt-id> --guided --json ${chalk10.dim("# structured workflow for AI agents")}
|
|
4220
4549
|
${chalk10.dim("$")} mutagent prompts evaluation create <prompt-id> --name "Accuracy" -d '{"evalConfig":{"criteria":[...]}}' ${chalk10.dim("# power user")}
|
|
@@ -4251,6 +4580,16 @@ ${chalk10.dim("Get prompt IDs: mutagent prompts list")}
|
|
|
4251
4580
|
}
|
|
4252
4581
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
4253
4582
|
try {
|
|
4583
|
+
if (options.guided && !promptId) {
|
|
4584
|
+
const result = buildGenericEvalWorkflow();
|
|
4585
|
+
output.output(result);
|
|
4586
|
+
return;
|
|
4587
|
+
}
|
|
4588
|
+
if (!promptId) {
|
|
4589
|
+
throw new MutagentError("MISSING_ARGUMENTS", "prompt-id is required", `Run: mutagent prompts evaluation create --help
|
|
4590
|
+
` + `Usage: mutagent prompts evaluation create <prompt-id> [options]
|
|
4591
|
+
` + "With --guided: mutagent prompts evaluation create --guided --json (returns generic workflow)");
|
|
4592
|
+
}
|
|
4254
4593
|
const renderGuidedWorkflow = (wf) => {
|
|
4255
4594
|
const { inputFields, outputFields, allFields } = wf._meta;
|
|
4256
4595
|
if (isJson) {
|
|
@@ -7238,18 +7577,32 @@ AI AGENT: Execute these steps using Bash for install, Write/Edit for code change
|
|
|
7238
7577
|
|
|
7239
7578
|
// src/commands/agents/index.ts
|
|
7240
7579
|
import { Command as Command10 } from "commander";
|
|
7241
|
-
import
|
|
7580
|
+
import chalk26 from "chalk";
|
|
7242
7581
|
|
|
7243
7582
|
// src/commands/agents/agents-crud.ts
|
|
7244
7583
|
init_sdk_client();
|
|
7245
|
-
import
|
|
7584
|
+
import chalk25 from "chalk";
|
|
7246
7585
|
init_errors();
|
|
7586
|
+
|
|
7587
|
+
// src/lib/agents-wip-banner.ts
|
|
7588
|
+
import chalk24 from "chalk";
|
|
7589
|
+
var AGENTS_WIP_BANNER = `${chalk24.yellow("Agents: Early Access / WIP")}
|
|
7590
|
+
${chalk24.bold("Agent optimization & evaluation are actively in development.")}
|
|
7591
|
+
${chalk24.dim("The Prompt path (single-shot LLM calls with output schemas) is production-ready.")}
|
|
7592
|
+
${chalk24.dim("For Agents (multi-turn / tool-calling), join early access:")}
|
|
7593
|
+
${chalk24.cyan("https://www.mutagent.io/agents-partnership")}
|
|
7594
|
+
${chalk24.dim("`mutagent agents ...` CRUD commands exist but optimization/eval are NOT ready.")}`;
|
|
7595
|
+
var AGENTS_WIP_BANNER_SHORT = `${chalk24.yellow("[Agents WIP]")} ${chalk24.dim("CRUD only — optimization & evaluation in development.")} ${chalk24.cyan("https://www.mutagent.io/agents-partnership")}`;
|
|
7596
|
+
|
|
7597
|
+
// src/commands/agents/agents-crud.ts
|
|
7247
7598
|
function registerAgentsCrud(agents) {
|
|
7248
7599
|
agents.command("list").description("List all agents").option("-l, --limit <n>", "Limit results", "50").option("-o, --offset <n>", "Offset for pagination").option("-n, --name <name>", "Filter by name").option("-s, --status <status>", "Filter by status (active, paused, archived)").addHelpText("after", `
|
|
7600
|
+
${AGENTS_WIP_BANNER_SHORT}
|
|
7601
|
+
|
|
7249
7602
|
Examples:
|
|
7250
|
-
${
|
|
7251
|
-
${
|
|
7252
|
-
${
|
|
7603
|
+
${chalk25.dim("$")} mutagent agents list
|
|
7604
|
+
${chalk25.dim("$")} mutagent agents list --status active
|
|
7605
|
+
${chalk25.dim("$")} mutagent agents list --name "reviewer" --json
|
|
7253
7606
|
`).action(async (options) => {
|
|
7254
7607
|
const isJson = getJsonFlag(agents);
|
|
7255
7608
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7298,9 +7651,11 @@ Examples:
|
|
|
7298
7651
|
}
|
|
7299
7652
|
});
|
|
7300
7653
|
agents.command("get").description("Get agent details").argument("<id>", "Agent ID").addHelpText("after", `
|
|
7654
|
+
${AGENTS_WIP_BANNER_SHORT}
|
|
7655
|
+
|
|
7301
7656
|
Examples:
|
|
7302
|
-
${
|
|
7303
|
-
${
|
|
7657
|
+
${chalk25.dim("$")} mutagent agents get <agent-id>
|
|
7658
|
+
${chalk25.dim("$")} mutagent agents get <agent-id> --json
|
|
7304
7659
|
`).action(async (id) => {
|
|
7305
7660
|
const isJson = getJsonFlag(agents);
|
|
7306
7661
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7330,11 +7685,11 @@ Examples:
|
|
|
7330
7685
|
};
|
|
7331
7686
|
output.output(formatted);
|
|
7332
7687
|
if (agent.systemPrompt) {
|
|
7333
|
-
console.log(
|
|
7688
|
+
console.log(chalk25.bold(`
|
|
7334
7689
|
System Prompt:`));
|
|
7335
|
-
console.log(
|
|
7690
|
+
console.log(chalk25.gray("─".repeat(60)));
|
|
7336
7691
|
console.log(agent.systemPrompt);
|
|
7337
|
-
console.log(
|
|
7692
|
+
console.log(chalk25.gray("─".repeat(60)));
|
|
7338
7693
|
}
|
|
7339
7694
|
}
|
|
7340
7695
|
} catch (error) {
|
|
@@ -7342,18 +7697,20 @@ System Prompt:`));
|
|
|
7342
7697
|
}
|
|
7343
7698
|
});
|
|
7344
7699
|
agents.command("create").description("Create a new agent").option("-d, --data <json>", "Agent as JSON string (recommended for CI/scripts/agents)").option("-n, --name <name>", "Agent name").option("-s, --slug <slug>", "Agent slug (URL-friendly identifier)").option("-p, --system-prompt <prompt>", "System prompt").option("-m, --model <model>", "Model (claude-sonnet-4-5, claude-opus-4-5, claude-haiku-4-5)").option("--description <desc>", "Agent description").addHelpText("after", `
|
|
7700
|
+
${AGENTS_WIP_BANNER_SHORT}
|
|
7701
|
+
|
|
7345
7702
|
Examples:
|
|
7346
|
-
${
|
|
7347
|
-
${
|
|
7703
|
+
${chalk25.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
|
|
7704
|
+
${chalk25.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are a code reviewer..."}'
|
|
7348
7705
|
|
|
7349
7706
|
Expected JSON (--data):
|
|
7350
|
-
${
|
|
7707
|
+
${chalk25.dim('{"name":"<name>","slug":"<slug>","systemPrompt":"<system prompt>","model":"<model-id>","description":"<description>"}')}
|
|
7351
7708
|
|
|
7352
7709
|
Input Methods (pick one, priority order):
|
|
7353
|
-
--name/--slug/... Individual flags ${
|
|
7710
|
+
--name/--slug/... Individual flags ${chalk25.green("(recommended)")}
|
|
7354
7711
|
-d, --data Inline JSON object (CI/scripts/agents)
|
|
7355
7712
|
|
|
7356
|
-
${
|
|
7713
|
+
${chalk25.red("Required: name, slug, systemPrompt.")} ${chalk25.dim("CLI flags override --data fields.")}
|
|
7357
7714
|
`).action(async (options) => {
|
|
7358
7715
|
const isJson = getJsonFlag(agents);
|
|
7359
7716
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7398,16 +7755,18 @@ ${chalk24.red("Required: name, slug, systemPrompt.")} ${chalk24.dim("CLI flags o
|
|
|
7398
7755
|
}
|
|
7399
7756
|
});
|
|
7400
7757
|
agents.command("update").description("Update an agent").argument("<id>", "Agent ID").option("-d, --data <json>", "Agent updates as JSON string (CI/scripts/agents)").option("-n, --name <name>", "New name").option("-p, --system-prompt <prompt>", "New system prompt").option("-m, --model <model>", "New model").option("--description <desc>", "New description").option("-s, --status <status>", "New status (active, paused, archived)").addHelpText("after", `
|
|
7758
|
+
${AGENTS_WIP_BANNER_SHORT}
|
|
7759
|
+
|
|
7401
7760
|
Examples:
|
|
7402
|
-
${
|
|
7403
|
-
${
|
|
7404
|
-
${
|
|
7761
|
+
${chalk25.dim("$")} mutagent agents update <id> --name "New Name"
|
|
7762
|
+
${chalk25.dim("$")} mutagent agents update <id> --system-prompt "Updated prompt" --status active
|
|
7763
|
+
${chalk25.dim("$")} mutagent agents update <id> -d '{"name":"New Name","systemPrompt":"Updated prompt"}'
|
|
7405
7764
|
|
|
7406
7765
|
Input Methods (pick one, priority order):
|
|
7407
|
-
--name/--system-prompt/... Individual flags ${
|
|
7766
|
+
--name/--system-prompt/... Individual flags ${chalk25.green("(recommended)")}
|
|
7408
7767
|
-d, --data Inline JSON object (CI/scripts/agents)
|
|
7409
7768
|
|
|
7410
|
-
${
|
|
7769
|
+
${chalk25.dim("CLI flags override --data fields.")}
|
|
7411
7770
|
`).action(async (id, options) => {
|
|
7412
7771
|
const isJson = getJsonFlag(agents);
|
|
7413
7772
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7454,12 +7813,14 @@ ${chalk24.dim("CLI flags override --data fields.")}
|
|
|
7454
7813
|
}
|
|
7455
7814
|
});
|
|
7456
7815
|
agents.command("delete").description("Delete an agent").argument("<id>", "Agent ID").option("--force", "Skip confirmation").addHelpText("after", `
|
|
7816
|
+
${AGENTS_WIP_BANNER_SHORT}
|
|
7817
|
+
|
|
7457
7818
|
Examples:
|
|
7458
|
-
${
|
|
7459
|
-
${
|
|
7460
|
-
${
|
|
7819
|
+
${chalk25.dim("$")} mutagent agents delete <id>
|
|
7820
|
+
${chalk25.dim("$")} mutagent agents delete <id> --force
|
|
7821
|
+
${chalk25.dim("$")} mutagent agents delete <id> --force --json
|
|
7461
7822
|
|
|
7462
|
-
${
|
|
7823
|
+
${chalk25.dim("Tip: Use --force to skip confirmation (required for non-interactive/CI usage).")}
|
|
7463
7824
|
`).action(async (id, options) => {
|
|
7464
7825
|
const isJson = getJsonFlag(agents);
|
|
7465
7826
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7488,14 +7849,16 @@ ${chalk24.dim("Tip: Use --force to skip confirmation (required for non-interacti
|
|
|
7488
7849
|
|
|
7489
7850
|
// src/commands/agents/index.ts
|
|
7490
7851
|
function createAgentsCommand() {
|
|
7491
|
-
const agents = new Command10("agents").description("Manage AI agents").addHelpText("after", `
|
|
7852
|
+
const agents = new Command10("agents").description("Manage AI agents (CRUD only — optimization & evaluation are WIP)").addHelpText("after", `
|
|
7853
|
+
${AGENTS_WIP_BANNER}
|
|
7854
|
+
|
|
7492
7855
|
Examples:
|
|
7493
|
-
${
|
|
7494
|
-
${
|
|
7495
|
-
${
|
|
7496
|
-
${
|
|
7497
|
-
${
|
|
7498
|
-
${
|
|
7856
|
+
${chalk26.dim("$")} mutagent agents list
|
|
7857
|
+
${chalk26.dim("$")} mutagent agents get <agent-id>
|
|
7858
|
+
${chalk26.dim("$")} mutagent agents create --name "Code Reviewer" --slug code-reviewer --system-prompt "You are a code reviewer..."
|
|
7859
|
+
${chalk26.dim("$")} mutagent agents create -d '{"name":"Code Reviewer","slug":"code-reviewer","systemPrompt":"You are..."}'
|
|
7860
|
+
${chalk26.dim("$")} mutagent agents update <agent-id> --name "Updated Name"
|
|
7861
|
+
${chalk26.dim("$")} mutagent agents delete <agent-id> --force
|
|
7499
7862
|
|
|
7500
7863
|
Subcommands:
|
|
7501
7864
|
list, get, create, update, delete
|
|
@@ -7507,22 +7870,22 @@ Subcommands:
|
|
|
7507
7870
|
// src/commands/config.ts
|
|
7508
7871
|
init_config();
|
|
7509
7872
|
import { Command as Command11 } from "commander";
|
|
7510
|
-
import
|
|
7873
|
+
import chalk27 from "chalk";
|
|
7511
7874
|
init_errors();
|
|
7512
7875
|
init_sdk_client();
|
|
7513
7876
|
var VALID_CONFIG_KEYS = ["apiKey", "endpoint", "format", "timeout", "defaultWorkspace", "defaultOrganization"];
|
|
7514
7877
|
function createConfigCommand() {
|
|
7515
7878
|
const config = new Command11("config").description("Manage CLI configuration").addHelpText("after", `
|
|
7516
7879
|
Examples:
|
|
7517
|
-
${
|
|
7518
|
-
${
|
|
7519
|
-
${
|
|
7520
|
-
${
|
|
7880
|
+
${chalk27.dim("$")} mutagent config list
|
|
7881
|
+
${chalk27.dim("$")} mutagent config get endpoint
|
|
7882
|
+
${chalk27.dim("$")} mutagent config set workspace <workspace-id>
|
|
7883
|
+
${chalk27.dim("$")} mutagent config set org <org-id>
|
|
7521
7884
|
`);
|
|
7522
7885
|
config.command("list").description("List all configuration").addHelpText("after", `
|
|
7523
7886
|
Examples:
|
|
7524
|
-
${
|
|
7525
|
-
${
|
|
7887
|
+
${chalk27.dim("$")} mutagent config list
|
|
7888
|
+
${chalk27.dim("$")} mutagent config list --json
|
|
7526
7889
|
`).action(() => {
|
|
7527
7890
|
const isJson = getJsonFlag(config);
|
|
7528
7891
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7535,11 +7898,11 @@ Examples:
|
|
|
7535
7898
|
});
|
|
7536
7899
|
config.command("get").description("Get configuration value").argument("<key>", "Configuration key (apiKey, endpoint, format, timeout, defaultWorkspace, defaultOrganization)").addHelpText("after", `
|
|
7537
7900
|
Examples:
|
|
7538
|
-
${
|
|
7539
|
-
${
|
|
7540
|
-
${
|
|
7901
|
+
${chalk27.dim("$")} mutagent config get endpoint
|
|
7902
|
+
${chalk27.dim("$")} mutagent config get defaultWorkspace
|
|
7903
|
+
${chalk27.dim("$")} mutagent config get apiKey --json
|
|
7541
7904
|
|
|
7542
|
-
${
|
|
7905
|
+
${chalk27.dim("Keys: apiKey, endpoint, format, timeout, defaultWorkspace, defaultOrganization")}
|
|
7543
7906
|
`).action((key) => {
|
|
7544
7907
|
const isJson = getJsonFlag(config);
|
|
7545
7908
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7566,9 +7929,9 @@ ${chalk26.dim("Keys: apiKey, endpoint, format, timeout, defaultWorkspace, defaul
|
|
|
7566
7929
|
});
|
|
7567
7930
|
set.command("workspace").description("Set default workspace ID").argument("<id>", "Workspace ID to set as default").addHelpText("after", `
|
|
7568
7931
|
Examples:
|
|
7569
|
-
${
|
|
7932
|
+
${chalk27.dim("$")} mutagent config set workspace <workspace-id>
|
|
7570
7933
|
|
|
7571
|
-
${
|
|
7934
|
+
${chalk27.dim("Persists workspace ID so you don't need to pass headers on every request.")}
|
|
7572
7935
|
`).action((id) => {
|
|
7573
7936
|
const isJson = getJsonFlag(config);
|
|
7574
7937
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7582,9 +7945,9 @@ ${chalk26.dim("Persists workspace ID so you don't need to pass headers on every
|
|
|
7582
7945
|
});
|
|
7583
7946
|
set.command("org").description("Set default organization ID").argument("<id>", "Organization ID to set as default").addHelpText("after", `
|
|
7584
7947
|
Examples:
|
|
7585
|
-
${
|
|
7948
|
+
${chalk27.dim("$")} mutagent config set org <org-id>
|
|
7586
7949
|
|
|
7587
|
-
${
|
|
7950
|
+
${chalk27.dim("Persists organization ID for org-scoped API keys.")}
|
|
7588
7951
|
`).action((id) => {
|
|
7589
7952
|
const isJson = getJsonFlag(config);
|
|
7590
7953
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7603,7 +7966,7 @@ ${chalk26.dim("Persists organization ID for org-scoped API keys.")}
|
|
|
7603
7966
|
// src/commands/playground.ts
|
|
7604
7967
|
init_sdk_client();
|
|
7605
7968
|
import { Command as Command12 } from "commander";
|
|
7606
|
-
import
|
|
7969
|
+
import chalk28 from "chalk";
|
|
7607
7970
|
init_errors();
|
|
7608
7971
|
function parseSSELine(line3) {
|
|
7609
7972
|
if (!line3 || line3.startsWith(":")) {
|
|
@@ -7630,11 +7993,11 @@ function parsePromptStreamEvent(data) {
|
|
|
7630
7993
|
function createPlaygroundCommand() {
|
|
7631
7994
|
const playground = new Command12("playground").description("Execute and test prompts interactively").addHelpText("after", `
|
|
7632
7995
|
Examples:
|
|
7633
|
-
${
|
|
7634
|
-
${
|
|
7635
|
-
${
|
|
7636
|
-
${
|
|
7637
|
-
${
|
|
7996
|
+
${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
|
|
7997
|
+
${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
|
|
7998
|
+
${chalk28.dim("$")} mutagent playground run <prompt-id> -i '{}' --model gpt-4-turbo
|
|
7999
|
+
${chalk28.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
|
|
8000
|
+
${chalk28.dim("$")} mutagent playground run <prompt-id> --messages '[{"role":"user","content":"Hi"}]'
|
|
7638
8001
|
|
|
7639
8002
|
Input Format:
|
|
7640
8003
|
The input must be a valid JSON object matching the prompt's input schema.
|
|
@@ -7650,16 +8013,16 @@ Streaming:
|
|
|
7650
8013
|
`);
|
|
7651
8014
|
playground.command("run").description("Execute a prompt with input variables").argument("<prompt-id>", "Prompt ID to execute (from: mutagent prompts list)").option("-i, --input <json>", "Input variables as JSON").option("-s, --stream", "Stream the response").option("-m, --model <model>", "Override model").option("--system <text>", "Set system prompt text").option("--human <text>", "Set human/user message text").option("--messages <json>", "Pass full messages array as JSON string").addHelpText("after", `
|
|
7652
8015
|
Examples:
|
|
7653
|
-
${
|
|
7654
|
-
${
|
|
7655
|
-
${
|
|
7656
|
-
${
|
|
8016
|
+
${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{"name": "John"}'
|
|
8017
|
+
${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{}' --stream
|
|
8018
|
+
${chalk28.dim("$")} mutagent playground run <prompt-id> --system "You are helpful" --human "Hello"
|
|
8019
|
+
${chalk28.dim("$")} mutagent playground run <prompt-id> --input '{}' --model gpt-4-turbo --json
|
|
7657
8020
|
|
|
7658
8021
|
Input Methods (pick one, priority order):
|
|
7659
|
-
--system/--human Quick system + user message ${
|
|
8022
|
+
--system/--human Quick system + user message ${chalk28.green("(recommended)")}
|
|
7660
8023
|
--input '{"key":"value"}' Inline JSON variables
|
|
7661
8024
|
--messages '[...]' Full messages array
|
|
7662
|
-
${
|
|
8025
|
+
${chalk28.dim(`Hint: Test before evaluating: mutagent playground run <id> --input '{"key":"value"}'`)}
|
|
7663
8026
|
`).action(async (promptId, options) => {
|
|
7664
8027
|
const isJson = getJsonFlag(playground);
|
|
7665
8028
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7681,21 +8044,21 @@ ${chalk27.dim(`Hint: Test before evaluating: mutagent playground run <id> --inpu
|
|
|
7681
8044
|
}
|
|
7682
8045
|
});
|
|
7683
8046
|
} else {
|
|
7684
|
-
console.log(
|
|
8047
|
+
console.log(chalk28.bold(`
|
|
7685
8048
|
Execution Result:`));
|
|
7686
|
-
console.log(
|
|
7687
|
-
console.log(
|
|
8049
|
+
console.log(chalk28.gray("─".repeat(50)));
|
|
8050
|
+
console.log(chalk28.cyan("Output:"));
|
|
7688
8051
|
console.log(result.output);
|
|
7689
|
-
console.log(
|
|
7690
|
-
console.log(
|
|
7691
|
-
console.log(
|
|
8052
|
+
console.log(chalk28.gray("─".repeat(50)));
|
|
8053
|
+
console.log(chalk28.dim(`Model: ${result.model}`));
|
|
8054
|
+
console.log(chalk28.dim(`Execution Time: ${String(result.executionTimeMs)}ms`));
|
|
7692
8055
|
if (result.tokens) {
|
|
7693
|
-
console.log(
|
|
8056
|
+
console.log(chalk28.dim(`Tokens: ${String(result.tokens.prompt)} prompt + ${String(result.tokens.completion)} completion = ${String(result.tokens.total)} total`));
|
|
7694
8057
|
}
|
|
7695
8058
|
if (result.cost !== undefined) {
|
|
7696
|
-
console.log(
|
|
8059
|
+
console.log(chalk28.dim(`Cost: $${result.cost.toFixed(6)}`));
|
|
7697
8060
|
}
|
|
7698
|
-
console.log(
|
|
8061
|
+
console.log(chalk28.dim(`Playground: ${playgroundLink(promptId)}`));
|
|
7699
8062
|
console.log();
|
|
7700
8063
|
}
|
|
7701
8064
|
}
|
|
@@ -7784,9 +8147,9 @@ async function executeStreaming(client, promptId, input, model, isJson, output)
|
|
|
7784
8147
|
const decoder = new TextDecoder;
|
|
7785
8148
|
let buffer = "";
|
|
7786
8149
|
if (!isJson) {
|
|
7787
|
-
console.log(
|
|
8150
|
+
console.log(chalk28.bold(`
|
|
7788
8151
|
Streaming Output:`));
|
|
7789
|
-
console.log(
|
|
8152
|
+
console.log(chalk28.gray("─".repeat(50)));
|
|
7790
8153
|
}
|
|
7791
8154
|
try {
|
|
7792
8155
|
for (;; ) {
|
|
@@ -7825,15 +8188,15 @@ Streaming Output:`));
|
|
|
7825
8188
|
console.log(JSON.stringify({ type: "complete", result: event.result }));
|
|
7826
8189
|
} else {
|
|
7827
8190
|
console.log();
|
|
7828
|
-
console.log(
|
|
8191
|
+
console.log(chalk28.gray("─".repeat(50)));
|
|
7829
8192
|
if (event.result) {
|
|
7830
|
-
console.log(
|
|
7831
|
-
console.log(
|
|
8193
|
+
console.log(chalk28.dim(`Model: ${event.result.model}`));
|
|
8194
|
+
console.log(chalk28.dim(`Execution Time: ${String(event.result.executionTimeMs)}ms`));
|
|
7832
8195
|
if (event.result.tokens) {
|
|
7833
|
-
console.log(
|
|
8196
|
+
console.log(chalk28.dim(`Tokens: ${String(event.result.tokens.prompt)} prompt + ${String(event.result.tokens.completion)} completion = ${String(event.result.tokens.total)} total`));
|
|
7834
8197
|
}
|
|
7835
8198
|
if (event.result.cost !== undefined) {
|
|
7836
|
-
console.log(
|
|
8199
|
+
console.log(chalk28.dim(`Cost: $${event.result.cost.toFixed(6)}`));
|
|
7837
8200
|
}
|
|
7838
8201
|
}
|
|
7839
8202
|
console.log();
|
|
@@ -7858,13 +8221,13 @@ Streaming Output:`));
|
|
|
7858
8221
|
// src/commands/workspaces.ts
|
|
7859
8222
|
init_sdk_client();
|
|
7860
8223
|
import { Command as Command13 } from "commander";
|
|
7861
|
-
import
|
|
8224
|
+
import chalk29 from "chalk";
|
|
7862
8225
|
init_errors();
|
|
7863
8226
|
function createWorkspacesCommand() {
|
|
7864
8227
|
const workspaces = new Command13("workspaces").description("View workspaces (read-only)").addHelpText("after", `
|
|
7865
8228
|
Examples:
|
|
7866
|
-
${
|
|
7867
|
-
${
|
|
8229
|
+
${chalk29.dim("$")} mutagent workspaces list
|
|
8230
|
+
${chalk29.dim("$")} mutagent workspaces get <workspace-id>
|
|
7868
8231
|
|
|
7869
8232
|
Subcommands:
|
|
7870
8233
|
list, get
|
|
@@ -7873,8 +8236,8 @@ Note: Workspace management (create, update, delete) is available in the Admin Pa
|
|
|
7873
8236
|
`);
|
|
7874
8237
|
workspaces.command("list").description("List all workspaces").option("-l, --limit <n>", "Limit results", "50").option("-o, --offset <n>", "Offset for pagination").addHelpText("after", `
|
|
7875
8238
|
Examples:
|
|
7876
|
-
${
|
|
7877
|
-
${
|
|
8239
|
+
${chalk29.dim("$")} mutagent workspaces list
|
|
8240
|
+
${chalk29.dim("$")} mutagent workspaces list --limit 10 --json
|
|
7878
8241
|
`).action(async (options) => {
|
|
7879
8242
|
const isJson = getJsonFlag(workspaces);
|
|
7880
8243
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7914,8 +8277,8 @@ Examples:
|
|
|
7914
8277
|
});
|
|
7915
8278
|
workspaces.command("get").description("Get workspace details").argument("<id>", "Workspace ID").addHelpText("after", `
|
|
7916
8279
|
Examples:
|
|
7917
|
-
${
|
|
7918
|
-
${
|
|
8280
|
+
${chalk29.dim("$")} mutagent workspaces get <workspace-id>
|
|
8281
|
+
${chalk29.dim("$")} mutagent workspaces get <workspace-id> --json
|
|
7919
8282
|
`).action(async (id) => {
|
|
7920
8283
|
const isJson = getJsonFlag(workspaces);
|
|
7921
8284
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -7948,12 +8311,12 @@ Examples:
|
|
|
7948
8311
|
// src/commands/providers/index.ts
|
|
7949
8312
|
init_sdk_client();
|
|
7950
8313
|
import { Command as Command14 } from "commander";
|
|
7951
|
-
import
|
|
8314
|
+
import chalk33 from "chalk";
|
|
7952
8315
|
init_errors();
|
|
7953
8316
|
|
|
7954
8317
|
// src/commands/providers/add.ts
|
|
7955
8318
|
init_sdk_client();
|
|
7956
|
-
import
|
|
8319
|
+
import chalk30 from "chalk";
|
|
7957
8320
|
init_errors();
|
|
7958
8321
|
function resolveScope(scopeFlag, client) {
|
|
7959
8322
|
const scope = scopeFlag ?? "workspace";
|
|
@@ -7976,10 +8339,10 @@ function resolveScope(scopeFlag, client) {
|
|
|
7976
8339
|
function registerAddCommand(parent) {
|
|
7977
8340
|
parent.command("add").description("Add a new provider configuration").requiredOption("-p, --provider <type>", "Provider type (openai, anthropic, google, ...)").requiredOption("-n, --name <name>", "Display name for this provider").requiredOption("-k, --api-key <key>", "API key for the provider").option("-s, --scope <scope>", "Scope: workspace (default), org, user", "workspace").option("--base-url <url>", "Custom base URL for the provider API").option("--set-default", "Set as default provider for this scope").addHelpText("after", `
|
|
7978
8341
|
Examples:
|
|
7979
|
-
${
|
|
7980
|
-
${
|
|
7981
|
-
${
|
|
7982
|
-
${
|
|
8342
|
+
${chalk30.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $OPENAI_API_KEY
|
|
8343
|
+
${chalk30.dim("$")} mutagent providers add --provider anthropic --name "Team Claude" --api-key $KEY --scope org
|
|
8344
|
+
${chalk30.dim("$")} mutagent providers add --provider openai --name "Personal" --api-key $KEY --scope user --set-default
|
|
8345
|
+
${chalk30.dim("$")} mutagent providers add --provider custom --name "Ollama" --api-key none --base-url http://localhost:11434 --json
|
|
7983
8346
|
|
|
7984
8347
|
Scope Resolution:
|
|
7985
8348
|
workspace (default) Uses your configured workspace
|
|
@@ -8025,10 +8388,10 @@ The API key is encrypted server-side and never returned in plain text.
|
|
|
8025
8388
|
console.log(` Scope: ${options.scope ?? "workspace"}`);
|
|
8026
8389
|
console.log(` URL: ${providerLink(created.id)}`);
|
|
8027
8390
|
if (options.setDefault) {
|
|
8028
|
-
console.log(
|
|
8391
|
+
console.log(chalk30.green(" Set as default provider"));
|
|
8029
8392
|
}
|
|
8030
8393
|
console.log("");
|
|
8031
|
-
console.log(
|
|
8394
|
+
console.log(chalk30.dim("API key is encrypted server-side. Use `providers get` to see masked key."));
|
|
8032
8395
|
}
|
|
8033
8396
|
} catch (error) {
|
|
8034
8397
|
handleError(error, isJson);
|
|
@@ -8066,15 +8429,15 @@ function buildProviderCreatedDirective(provider, scope) {
|
|
|
8066
8429
|
|
|
8067
8430
|
// src/commands/providers/update.ts
|
|
8068
8431
|
init_sdk_client();
|
|
8069
|
-
import
|
|
8432
|
+
import chalk31 from "chalk";
|
|
8070
8433
|
init_errors();
|
|
8071
8434
|
function registerUpdateCommand(parent) {
|
|
8072
8435
|
parent.command("update").description("Update an existing provider configuration").argument("<id>", "Provider ID (from: mutagent providers list)").option("-n, --name <name>", "Updated display name").option("-k, --api-key <key>", "Updated API key (will be re-encrypted)").option("--active <bool>", "Activate or deactivate (true|false)").option("--set-default", "Set as default provider for its scope").option("--base-url <url>", 'Updated base URL (use "" to clear)').addHelpText("after", `
|
|
8073
8436
|
Examples:
|
|
8074
|
-
${
|
|
8075
|
-
${
|
|
8076
|
-
${
|
|
8077
|
-
${
|
|
8437
|
+
${chalk31.dim("$")} mutagent providers update <id> --name "New Name"
|
|
8438
|
+
${chalk31.dim("$")} mutagent providers update <id> --api-key $NEW_KEY --json
|
|
8439
|
+
${chalk31.dim("$")} mutagent providers update <id> --active false
|
|
8440
|
+
${chalk31.dim("$")} mutagent providers update <id> --set-default --json
|
|
8078
8441
|
|
|
8079
8442
|
PATCH semantics — only provided fields are updated.
|
|
8080
8443
|
`).action(async (id, options) => {
|
|
@@ -8125,7 +8488,7 @@ PATCH semantics — only provided fields are updated.
|
|
|
8125
8488
|
console.log(` ID: ${String(updated.id ?? id)}`);
|
|
8126
8489
|
console.log(` URL: ${providerLink(updated.id ?? id)}`);
|
|
8127
8490
|
if (options.apiKey) {
|
|
8128
|
-
console.log(
|
|
8491
|
+
console.log(chalk31.dim(" API key re-encrypted server-side."));
|
|
8129
8492
|
}
|
|
8130
8493
|
}
|
|
8131
8494
|
} catch (error) {
|
|
@@ -8164,17 +8527,17 @@ function buildProviderUpdatedDirective(provider, requestId) {
|
|
|
8164
8527
|
|
|
8165
8528
|
// src/commands/providers/delete.ts
|
|
8166
8529
|
init_sdk_client();
|
|
8167
|
-
import
|
|
8530
|
+
import chalk32 from "chalk";
|
|
8168
8531
|
init_errors();
|
|
8169
8532
|
function registerDeleteCommand(parent) {
|
|
8170
8533
|
parent.command("delete").description("Delete a provider configuration").argument("<id>", "Provider ID (from: mutagent providers list)").option("-f, --force", "Skip confirmation prompt").addHelpText("after", `
|
|
8171
8534
|
Examples:
|
|
8172
|
-
${
|
|
8173
|
-
${
|
|
8174
|
-
${
|
|
8535
|
+
${chalk32.dim("$")} mutagent providers delete <id>
|
|
8536
|
+
${chalk32.dim("$")} mutagent providers delete <id> --force
|
|
8537
|
+
${chalk32.dim("$")} mutagent providers delete <id> --force --json
|
|
8175
8538
|
|
|
8176
|
-
${
|
|
8177
|
-
${
|
|
8539
|
+
${chalk32.dim("Note: --force is required. The CLI is non-interactive — confirm with the user via your native flow, then pass --force.")}
|
|
8540
|
+
${chalk32.dim("Warning: API keys are AES-256-GCM encrypted and irrecoverable after deletion. Agents referencing this provider will lose their model config.")}
|
|
8178
8541
|
`).action(async (id, options) => {
|
|
8179
8542
|
const isJson = getJsonFlag(parent);
|
|
8180
8543
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -8264,12 +8627,12 @@ function validateProviderType(type) {
|
|
|
8264
8627
|
function createProvidersCommand() {
|
|
8265
8628
|
const providers = new Command14("providers").description("Manage LLM provider configurations (BYOK)").addHelpText("after", `
|
|
8266
8629
|
Examples:
|
|
8267
|
-
${
|
|
8268
|
-
${
|
|
8269
|
-
${
|
|
8270
|
-
${
|
|
8271
|
-
${
|
|
8272
|
-
${
|
|
8630
|
+
${chalk33.dim("$")} mutagent providers list
|
|
8631
|
+
${chalk33.dim("$")} mutagent providers get <provider-id>
|
|
8632
|
+
${chalk33.dim("$")} mutagent providers add --provider openai --name "My OpenAI" --api-key $KEY
|
|
8633
|
+
${chalk33.dim("$")} mutagent providers update <id> --name "New Name"
|
|
8634
|
+
${chalk33.dim("$")} mutagent providers delete <id> --force
|
|
8635
|
+
${chalk33.dim("$")} mutagent providers test <provider-id>
|
|
8273
8636
|
|
|
8274
8637
|
Provider Types:
|
|
8275
8638
|
openai, anthropic, google, azure, bedrock, cohere, mistral, groq, together, replicate, custom
|
|
@@ -8279,9 +8642,9 @@ Subcommands:
|
|
|
8279
8642
|
`);
|
|
8280
8643
|
providers.command("list").description("List all providers").option("-l, --limit <n>", "Limit results", "50").option("-o, --offset <n>", "Offset for pagination").option("-t, --type <type>", "Filter by provider type").addHelpText("after", `
|
|
8281
8644
|
Examples:
|
|
8282
|
-
${
|
|
8283
|
-
${
|
|
8284
|
-
${
|
|
8645
|
+
${chalk33.dim("$")} mutagent providers list
|
|
8646
|
+
${chalk33.dim("$")} mutagent providers list --type openai
|
|
8647
|
+
${chalk33.dim("$")} mutagent providers list --json
|
|
8285
8648
|
`).action(async (options) => {
|
|
8286
8649
|
const isJson = getJsonFlag(providers);
|
|
8287
8650
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -8330,8 +8693,8 @@ Examples:
|
|
|
8330
8693
|
});
|
|
8331
8694
|
providers.command("get").description("Get provider details").argument("<id>", "Provider ID (from: mutagent providers list)").addHelpText("after", `
|
|
8332
8695
|
Examples:
|
|
8333
|
-
${
|
|
8334
|
-
${
|
|
8696
|
+
${chalk33.dim("$")} mutagent providers get <provider-id>
|
|
8697
|
+
${chalk33.dim("$")} mutagent providers get <provider-id> --json
|
|
8335
8698
|
`).action(async (id) => {
|
|
8336
8699
|
const isJson = getJsonFlag(providers);
|
|
8337
8700
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -8360,10 +8723,10 @@ Examples:
|
|
|
8360
8723
|
});
|
|
8361
8724
|
providers.command("test").description("Test provider connectivity").argument("<id>", "Provider ID (from: mutagent providers list)").addHelpText("after", `
|
|
8362
8725
|
Examples:
|
|
8363
|
-
${
|
|
8364
|
-
${
|
|
8726
|
+
${chalk33.dim("$")} mutagent providers test <provider-id>
|
|
8727
|
+
${chalk33.dim("$")} mutagent providers test <provider-id> --json
|
|
8365
8728
|
|
|
8366
|
-
${
|
|
8729
|
+
${chalk33.dim("Tests connectivity and lists available models for the provider.")}
|
|
8367
8730
|
`).action(async (id) => {
|
|
8368
8731
|
const isJson = getJsonFlag(providers);
|
|
8369
8732
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -8378,9 +8741,9 @@ ${chalk32.dim("Tests connectivity and lists available models for the provider.")
|
|
|
8378
8741
|
} else {
|
|
8379
8742
|
if (result.success) {
|
|
8380
8743
|
output.success(`Provider test passed (${String(result.responseTimeMs)}ms)`);
|
|
8381
|
-
console.log(
|
|
8744
|
+
console.log(chalk33.green(`Message: ${result.message}`));
|
|
8382
8745
|
if (result.availableModels && result.availableModels.length > 0) {
|
|
8383
|
-
console.log(
|
|
8746
|
+
console.log(chalk33.bold(`
|
|
8384
8747
|
Available Models:`));
|
|
8385
8748
|
result.availableModels.forEach((model) => {
|
|
8386
8749
|
console.log(` - ${model}`);
|
|
@@ -8389,7 +8752,7 @@ Available Models:`));
|
|
|
8389
8752
|
} else {
|
|
8390
8753
|
output.error(`Provider test failed: ${result.message}`);
|
|
8391
8754
|
if (result.error) {
|
|
8392
|
-
console.log(
|
|
8755
|
+
console.log(chalk33.red(`Error: ${result.error}`));
|
|
8393
8756
|
}
|
|
8394
8757
|
}
|
|
8395
8758
|
}
|
|
@@ -8407,7 +8770,7 @@ Available Models:`));
|
|
|
8407
8770
|
init_config();
|
|
8408
8771
|
import { Command as Command15 } from "commander";
|
|
8409
8772
|
import inquirer2 from "inquirer";
|
|
8410
|
-
import
|
|
8773
|
+
import chalk34 from "chalk";
|
|
8411
8774
|
import { existsSync as existsSync11, mkdirSync as mkdirSync3, writeFileSync as writeFileSync4 } from "fs";
|
|
8412
8775
|
import { execSync as execSync3 } from "child_process";
|
|
8413
8776
|
import { join as join6 } from "path";
|
|
@@ -8530,13 +8893,13 @@ function writeRcConfig(config, cwd = process.cwd()) {
|
|
|
8530
8893
|
function createInitCommand() {
|
|
8531
8894
|
const init = new Command15("init").description("Initialize MutagenT in your project").option("--non-interactive", "Skip interactive prompts (defaults to CLI-only mode)").addHelpText("after", `
|
|
8532
8895
|
Examples:
|
|
8533
|
-
${
|
|
8534
|
-
${
|
|
8896
|
+
${chalk34.dim("$")} mutagent init # Interactive setup wizard
|
|
8897
|
+
${chalk34.dim("$")} mutagent init --non-interactive # CLI-only mode (no prompts)
|
|
8535
8898
|
|
|
8536
8899
|
Modes:
|
|
8537
|
-
${
|
|
8538
|
-
${
|
|
8539
|
-
${
|
|
8900
|
+
${chalk34.bold("Full scaffold")} Install SDK + integration package, create config, setup tracing
|
|
8901
|
+
${chalk34.bold("CLI-only")} Verify auth + create .mutagentrc.json with workspace/endpoint
|
|
8902
|
+
${chalk34.bold("Skip")} Exit without changes
|
|
8540
8903
|
`).action(async (options) => {
|
|
8541
8904
|
const isJson = getJsonFlag(init);
|
|
8542
8905
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -8808,23 +9171,36 @@ Modes:
|
|
|
8808
9171
|
|
|
8809
9172
|
// src/commands/explore.ts
|
|
8810
9173
|
import { Command as Command16 } from "commander";
|
|
8811
|
-
import
|
|
9174
|
+
import chalk35 from "chalk";
|
|
8812
9175
|
import { resolve as resolve3 } from "path";
|
|
8813
9176
|
init_errors();
|
|
8814
9177
|
function createExploreCommand() {
|
|
8815
9178
|
const explore = new Command16("explore").description("Scan codebase for prompts, datasets, and MutagenT markers").option("-p, --path <dir>", "Directory to scan", ".").option("--depth <n>", "Max directory depth", "10").option("--include <glob>", "Include file pattern", "**/*.{ts,js,py,tsx,jsx}").option("--exclude <dirs>", "Comma-separated directories to exclude", "node_modules,dist,.git,build,.next,__pycache__,venv,.venv").option("--markers-only", "Only find existing MutagenT markers").addHelpText("after", `
|
|
8816
9179
|
Examples:
|
|
8817
|
-
${
|
|
8818
|
-
${
|
|
8819
|
-
${
|
|
8820
|
-
${
|
|
8821
|
-
${
|
|
9180
|
+
${chalk35.dim("$")} mutagent explore
|
|
9181
|
+
${chalk35.dim("$")} mutagent explore --path ./src
|
|
9182
|
+
${chalk35.dim("$")} mutagent explore --include "**/*.{ts,py}" --depth 5
|
|
9183
|
+
${chalk35.dim("$")} mutagent explore --markers-only
|
|
9184
|
+
${chalk35.dim("$")} mutagent explore --json
|
|
8822
9185
|
|
|
8823
9186
|
Detection modes:
|
|
8824
|
-
${
|
|
8825
|
-
${
|
|
9187
|
+
${chalk35.dim("Heuristic")} Template variables ({var} or {{var}}), prompt constants, schema definitions
|
|
9188
|
+
${chalk35.dim("Marker")} MutagenT:START/END comment markers from previous uploads
|
|
9189
|
+
${chalk35.dim("Agents")} Multi-turn / tool-calling patterns (informational — WIP)
|
|
9190
|
+
|
|
9191
|
+
${chalk35.bold("Template variable delimiters (inferred per-file):")}
|
|
9192
|
+
${chalk35.dim("single")} ${chalk35.cyan("{variable}")} MutagenT platform canonical; LangChain PromptTemplate
|
|
9193
|
+
${chalk35.dim("double")} ${chalk35.cyan("{{variable}}")} Handlebars / Mustache / LiquidJS / Jinja2 / LangChain ChatPromptTemplate
|
|
9194
|
+
|
|
9195
|
+
${chalk35.dim("Inference is per-file: majority of bracket style wins, tie goes to single")}
|
|
9196
|
+
${chalk35.dim('(platform canonical). Fenced code blocks are stripped first; {"json": ...} ')}
|
|
9197
|
+
${chalk35.dim("literals are skipped via JSON-key lookahead. See the mutagent-cli skill at ")}
|
|
9198
|
+
${chalk35.dim("concepts/prompt-variables.md for the full contract.")}
|
|
8826
9199
|
|
|
8827
|
-
${
|
|
9200
|
+
${chalk35.dim("With --json, each discovered prompt carries a ")}${chalk35.cyan("delimiter")}${chalk35.dim(" field:")}
|
|
9201
|
+
${chalk35.dim('{ "file": "src/prompts/summarize.ts", "line": 12, "reason": "template-variable", "delimiter": "single" }')}
|
|
9202
|
+
|
|
9203
|
+
${chalk35.dim("Results are saved to .mutagent/mutation-context.md for use by other commands.")}
|
|
8828
9204
|
`).action((options) => {
|
|
8829
9205
|
const isJson = getJsonFlag(explore);
|
|
8830
9206
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
@@ -8842,7 +9218,7 @@ ${chalk34.dim("Results are saved to .mutagent/mutation-context.md for use by oth
|
|
|
8842
9218
|
markersOnly
|
|
8843
9219
|
};
|
|
8844
9220
|
if (!isJson) {
|
|
8845
|
-
console.log(
|
|
9221
|
+
console.log(chalk35.cyan(`
|
|
8846
9222
|
Scanning ${scanPath}...
|
|
8847
9223
|
`));
|
|
8848
9224
|
}
|
|
@@ -8860,52 +9236,78 @@ Scanning ${scanPath}...
|
|
|
8860
9236
|
prompts: result.prompts,
|
|
8861
9237
|
datasets: result.datasets,
|
|
8862
9238
|
markers: result.markers,
|
|
9239
|
+
agents: result.agents,
|
|
9240
|
+
agentsWip: result.agents.length > 0 ? {
|
|
9241
|
+
status: "early-access",
|
|
9242
|
+
message: "Agent detection is informational only. Agent optimization & evaluation are WIP.",
|
|
9243
|
+
earlyAccessUrl: "https://www.mutagent.io/agents-partnership"
|
|
9244
|
+
} : undefined,
|
|
8863
9245
|
summary: {
|
|
8864
9246
|
totalPrompts: result.prompts.length,
|
|
8865
9247
|
totalDatasets: result.datasets.length,
|
|
8866
|
-
totalMarkers: result.markers.length
|
|
9248
|
+
totalMarkers: result.markers.length,
|
|
9249
|
+
totalAgents: result.agents.length
|
|
8867
9250
|
},
|
|
8868
9251
|
contextFile: ".mutagent/mutation-context.md"
|
|
8869
9252
|
});
|
|
8870
9253
|
} else {
|
|
8871
|
-
const totalFindings = result.prompts.length + result.datasets.length + result.markers.length;
|
|
9254
|
+
const totalFindings = result.prompts.length + result.datasets.length + result.markers.length + result.agents.length;
|
|
8872
9255
|
if (totalFindings === 0) {
|
|
8873
9256
|
output.info("No prompts, datasets, or markers found.");
|
|
8874
|
-
console.log(
|
|
9257
|
+
console.log(chalk35.dim(`
|
|
8875
9258
|
Tip: Create a prompt with template variables like {{input}} to get started.`));
|
|
8876
9259
|
return;
|
|
8877
9260
|
}
|
|
8878
9261
|
if (result.prompts.length > 0) {
|
|
8879
|
-
console.log(
|
|
9262
|
+
console.log(chalk35.bold(` Prompts Found (${String(result.prompts.length)}):`));
|
|
8880
9263
|
console.log();
|
|
8881
9264
|
for (const p of result.prompts) {
|
|
8882
|
-
const confidenceTag = p.confidence === "high" ?
|
|
8883
|
-
const reasonTag =
|
|
8884
|
-
console.log(` ${confidenceTag} ${
|
|
8885
|
-
console.log(` ${
|
|
9265
|
+
const confidenceTag = p.confidence === "high" ? chalk35.green("[high]") : p.confidence === "medium" ? chalk35.yellow("[medium]") : chalk35.dim("[low]");
|
|
9266
|
+
const reasonTag = chalk35.dim(`[${p.reason}]`);
|
|
9267
|
+
console.log(` ${confidenceTag} ${chalk35.green(p.file)}:${chalk35.yellow(String(p.line))} ${reasonTag}`);
|
|
9268
|
+
console.log(` ${chalk35.dim(p.preview)}`);
|
|
8886
9269
|
}
|
|
8887
9270
|
console.log();
|
|
8888
9271
|
}
|
|
8889
9272
|
if (result.datasets.length > 0) {
|
|
8890
|
-
console.log(
|
|
9273
|
+
console.log(chalk35.bold(` Datasets Found (${String(result.datasets.length)}):`));
|
|
8891
9274
|
console.log();
|
|
8892
9275
|
for (const d of result.datasets) {
|
|
8893
|
-
console.log(` ${
|
|
9276
|
+
console.log(` ${chalk35.green(d.file)} ${chalk35.dim(`(${String(d.items)} items)`)}`);
|
|
8894
9277
|
}
|
|
8895
9278
|
console.log();
|
|
8896
9279
|
}
|
|
8897
9280
|
if (result.markers.length > 0) {
|
|
8898
|
-
console.log(
|
|
9281
|
+
console.log(chalk35.bold(` MutagenT Markers (${String(result.markers.length)}):`));
|
|
8899
9282
|
console.log();
|
|
8900
9283
|
for (const m of result.markers) {
|
|
8901
|
-
const idPart = m.platformId ?
|
|
8902
|
-
console.log(` ${
|
|
9284
|
+
const idPart = m.platformId ? chalk35.cyan(` id=${m.platformId}`) : "";
|
|
9285
|
+
console.log(` ${chalk35.green(m.file)}:${chalk35.yellow(String(m.line))} ${chalk35.magenta(m.type)}${idPart}`);
|
|
9286
|
+
}
|
|
9287
|
+
console.log();
|
|
9288
|
+
}
|
|
9289
|
+
if (result.agents.length > 0) {
|
|
9290
|
+
console.log(chalk35.bold(` Agents Detected (${String(result.agents.length)}):`));
|
|
9291
|
+
console.log();
|
|
9292
|
+
for (const a of result.agents) {
|
|
9293
|
+
const confidenceTag = a.confidence === "high" ? chalk35.green("[high]") : a.confidence === "medium" ? chalk35.yellow("[medium]") : chalk35.dim("[low]");
|
|
9294
|
+
const reasonTag = chalk35.dim(`[${a.reason}]`);
|
|
9295
|
+
console.log(` ${confidenceTag} ${chalk35.green(a.file)}:${chalk35.yellow(String(a.line))} ${reasonTag}`);
|
|
9296
|
+
console.log(` ${chalk35.dim(a.preview)}`);
|
|
8903
9297
|
}
|
|
8904
9298
|
console.log();
|
|
9299
|
+
console.log(chalk35.yellow(" ┌─ Agents: Early Access / WIP ──────────────────────────────"));
|
|
9300
|
+
console.log(chalk35.yellow(" │ ") + chalk35.bold("Agent optimization & evaluation are actively in development."));
|
|
9301
|
+
console.log(chalk35.yellow(" │ ") + chalk35.dim("Detected patterns above are informational only."));
|
|
9302
|
+
console.log(chalk35.yellow(" │ ") + chalk35.dim("The Prompt path (single-shot + output schema) is production-ready."));
|
|
9303
|
+
console.log(chalk35.yellow(" │ ") + chalk35.dim("Join early access for the Agent path:"));
|
|
9304
|
+
console.log(chalk35.yellow(" │ ") + chalk35.cyan("https://www.mutagent.io/agents-partnership"));
|
|
9305
|
+
console.log(chalk35.yellow(" └────────────────────────────────────────────────────────────"));
|
|
9306
|
+
console.log();
|
|
8905
9307
|
}
|
|
8906
|
-
console.log(
|
|
8907
|
-
console.log(` ${
|
|
8908
|
-
console.log(
|
|
9308
|
+
console.log(chalk35.dim(" ─────────────────────────────────"));
|
|
9309
|
+
console.log(` ${chalk35.bold("Summary:")} ${String(result.prompts.length)} prompts, ${String(result.datasets.length)} datasets, ${String(result.markers.length)} markers, ${String(result.agents.length)} agents`);
|
|
9310
|
+
console.log(chalk35.dim(` Saved to .mutagent/mutation-context.md`));
|
|
8909
9311
|
console.log();
|
|
8910
9312
|
}
|
|
8911
9313
|
} catch (error) {
|
|
@@ -8917,181 +9319,1180 @@ Scanning ${scanPath}...
|
|
|
8917
9319
|
|
|
8918
9320
|
// src/commands/skills.ts
|
|
8919
9321
|
import { Command as Command17 } from "commander";
|
|
8920
|
-
import
|
|
9322
|
+
import chalk36 from "chalk";
|
|
8921
9323
|
import { existsSync as existsSync12, mkdirSync as mkdirSync4, writeFileSync as writeFileSync5 } from "fs";
|
|
8922
|
-
import { join as join7 } from "path";
|
|
9324
|
+
import { dirname, join as join7 } from "path";
|
|
8923
9325
|
import { execSync as execSync4 } from "child_process";
|
|
8924
|
-
|
|
8925
|
-
|
|
8926
|
-
|
|
8927
|
-
|
|
8928
|
-
return process.cwd();
|
|
8929
|
-
}
|
|
8930
|
-
}
|
|
8931
|
-
var SKILL_FRONTMATTER = `---
|
|
9326
|
+
|
|
9327
|
+
// src/generated/skill-content.ts
|
|
9328
|
+
var SKILL_FILES = {
|
|
9329
|
+
"SKILL.md": `---
|
|
8932
9330
|
name: mutagent-cli
|
|
8933
9331
|
description: |
|
|
8934
9332
|
MutagenT CLI - AI Prompt Optimization Platform CLI.
|
|
8935
9333
|
Guides coding agents through prompt upload, evaluation creation,
|
|
8936
9334
|
dataset curation, optimization, and framework integration.
|
|
8937
9335
|
Triggers: "mutagent", "optimize prompt", "upload prompt", "integrate tracing",
|
|
8938
|
-
"create evaluation", "upload dataset", "mutagent cli".
|
|
8939
|
-
|
|
8940
|
-
|
|
8941
|
-
|
|
8942
|
-
|
|
8943
|
-
|
|
8944
|
-
|
|
8945
|
-
|
|
8946
|
-
|
|
8947
|
-
|
|
8948
|
-
|
|
8949
|
-
|
|
8950
|
-
|
|
8951
|
-
|
|
8952
|
-
|
|
8953
|
-
|
|
8954
|
-
|
|
8955
|
-
|
|
8956
|
-
|
|
8957
|
-
|
|
8958
|
-
|
|
8959
|
-
|
|
8960
|
-
|
|
8961
|
-
|
|
8962
|
-
|
|
8963
|
-
|
|
8964
|
-
|
|
8965
|
-
|
|
8966
|
-
|
|
8967
|
-
|
|
8968
|
-
|
|
8969
|
-
|
|
8970
|
-
|
|
8971
|
-
|
|
8972
|
-
|
|
8973
|
-
|
|
8974
|
-
|
|
8975
|
-
|
|
8976
|
-
|
|
8977
|
-
|
|
8978
|
-
|
|
8979
|
-
|
|
8980
|
-
|
|
8981
|
-
|
|
8982
|
-
|
|
8983
|
-
|
|
8984
|
-
|
|
8985
|
-
|
|
8986
|
-
|
|
8987
|
-
|
|
8988
|
-
|
|
8989
|
-
|
|
8990
|
-
|
|
8991
|
-
|
|
8992
|
-
|
|
8993
|
-
|
|
8994
|
-
|
|
8995
|
-
|
|
8996
|
-
|
|
8997
|
-
|
|
8998
|
-
|
|
8999
|
-
|
|
9000
|
-
|
|
9001
|
-
-
|
|
9002
|
-
-
|
|
9003
|
-
-
|
|
9004
|
-
|
|
9336
|
+
"create evaluation", "upload dataset", "explore prompts", "mutagent cli".
|
|
9337
|
+
---
|
|
9338
|
+
|
|
9339
|
+
# MutagenT CLI Skill
|
|
9340
|
+
|
|
9341
|
+
> **Canonical source**: \`mutagent-cli/.claude/skills/mutagent-cli/SKILL.md\`
|
|
9342
|
+
> Packed into the CLI binary via \`scripts/sync-skill.ts\`. Installed to end-user
|
|
9343
|
+
> dev environments via \`mutagent skills install\`. Edit this file, not the installed copy.
|
|
9344
|
+
|
|
9345
|
+
## SKILL vs CLI — responsibility split
|
|
9346
|
+
|
|
9347
|
+
| Layer | Owner | Responsibility |
|
|
9348
|
+
|---|---|---|
|
|
9349
|
+
| **SKILL** (this file + subfiles) | here | journeys, routing, 5 rules, enforcement |
|
|
9350
|
+
| **CLI** | \`mutagent <cmd>\` | commands, flags, \`--json\`, \`_directive.*\`, \`_links\` |
|
|
9351
|
+
| Platform | api.mutagent.io | storage, optimization, eval execution, \`{variable}\` rendering |
|
|
9352
|
+
|
|
9353
|
+
**Rule**: SKILL never duplicates CLI flag lists — always \`mutagent <cmd> --help\` for flags.
|
|
9354
|
+
|
|
9355
|
+
---
|
|
9356
|
+
|
|
9357
|
+
## 5 Core Rules — NON-NEGOTIABLE
|
|
9358
|
+
|
|
9359
|
+
1. **\`--json\` on EVERY command.** No exceptions. Agents use JSON mode exclusively.
|
|
9360
|
+
2. **\`<command> --help\` BEFORE first use of any command.** The CLI is the source of truth for flags — this SKILL never inlines them.
|
|
9361
|
+
3. **NEVER auto-generate eval criteria — collect from user.** Ask the user for each rubric field. See [concepts/eval-criteria.md](./concepts/eval-criteria.md) for the 3-tier format.
|
|
9362
|
+
4. **Explore-before-modify.** Run \`mutagent explore --json\` before any write operation. Present findings, get user confirmation. Never mutate without discovery first.
|
|
9363
|
+
5. **Cost transparency before \`optimize start\`.** Run \`mutagent usage --json\` and show the result to the user. Get explicit confirmation before any optimization job.
|
|
9364
|
+
|
|
9365
|
+
---
|
|
9366
|
+
|
|
9367
|
+
## Journey Router — route by user intent
|
|
9368
|
+
|
|
9369
|
+
Match the user's first request. Load ONLY the matching subfile. Do NOT preload the whole set.
|
|
9370
|
+
|
|
9371
|
+
| User said / signal detected | Load subfile | Why |
|
|
9372
|
+
|---|---|---|
|
|
9373
|
+
| "trace", "observe", "integrate", "add framework" | [workflows/tracing.md](./workflows/tracing.md) | Non-destructive, fastest first-value path |
|
|
9374
|
+
| "optimize", "improve", "tune", "evaluate", "upload prompt" | [workflows/optimization.md](./workflows/optimization.md) | Full create→dataset→eval→optimize loop |
|
|
9375
|
+
| "explore", "scan", "find prompts", "what prompts", "discover" | [workflows/exploration.md](./workflows/exploration.md) | Read-only discovery + taxonomy |
|
|
9376
|
+
| \`AgentExecutor\`, \`StateGraph\`, \`createReactAgent\`, \`tool_calls\`, \`@tool\`, \`langgraph\`, \`crewai\`, \`autogen\`, \`openai/agents\`, multi-turn | [workflows/agents.md](./workflows/agents.md) | WIP path — surface partnership link |
|
|
9377
|
+
| "how do variables work", "single vs double braces", delimiter | [concepts/prompt-variables.md](./concepts/prompt-variables.md) | Delimiter inference contract |
|
|
9378
|
+
| "what makes a good eval", "rubric", "evaluation criteria" | [concepts/eval-criteria.md](./concepts/eval-criteria.md) | INPUT MVC + OUTPUT Standards |
|
|
9379
|
+
| Unclear / first time | run \`mutagent explore --json\` first, then reroute | Discovery before action |
|
|
9380
|
+
|
|
9381
|
+
---
|
|
9382
|
+
|
|
9383
|
+
## Subfile Map
|
|
9384
|
+
|
|
9385
|
+
| File | WHEN to load | WHY | ENFORCEMENT |
|
|
9386
|
+
|---|---|---|---|
|
|
9387
|
+
| [workflows/tracing.md](./workflows/tracing.md) | User wants to add framework tracing / observability | Non-destructive append-only integration sequence | Must run explore first (Rule 4) |
|
|
9388
|
+
| [workflows/optimization.md](./workflows/optimization.md) | User wants to optimize or evaluate a prompt | Full loop: explore → upload → dataset → eval → optimize → apply | Must check usage before optimize (Rule 5); must collect rubrics from user (Rule 3) |
|
|
9389
|
+
| [workflows/exploration.md](./workflows/exploration.md) | User wants to scan codebase, identify prompts vs agents | Read-only discovery; output taxonomy to user | Run only; no writes |
|
|
9390
|
+
| [workflows/agents.md](./workflows/agents.md) | Multi-turn / tool-calling code detected | WIP — do NOT attempt optimizer, surface partnership link | Show WIP card to user verbatim |
|
|
9391
|
+
| [concepts/prompt-variables.md](./concepts/prompt-variables.md) | Any question about \`{var}\` vs \`{{var}}\`, delimiter inference | Brace convention + conversion rules | Load before \`prompts create\` in optimization workflow |
|
|
9392
|
+
| [concepts/eval-criteria.md](./concepts/eval-criteria.md) | Any question about rubric design, MVC, Output Standards | 3-tier rubric format — INPUT-param vs OUTPUT-param scope | Load before \`evaluation create --guided\` in optimization workflow |
|
|
9393
|
+
|
|
9394
|
+
---
|
|
9395
|
+
|
|
9396
|
+
## Output handling
|
|
9397
|
+
|
|
9398
|
+
After every CLI command:
|
|
9399
|
+
- **Show the command output to the user.** Command output appears in bash blocks that users may not see — always present the key results in your chat response.
|
|
9400
|
+
- **For evaluation create \`--guided\`**: the CLI provides per-field questions for each schema field. Ask the user each question in turn. Do not skip any field. Do not pre-fill answers.
|
|
9401
|
+
- **For \`optimize results\`**: present the before/after scorecard to the user and confirm whether to apply, view diff, or reject.
|
|
9402
|
+
|
|
9403
|
+
---
|
|
9404
|
+
|
|
9405
|
+
## Anti-patterns — NEVER do these
|
|
9406
|
+
|
|
9407
|
+
- Run any command without \`--json\`
|
|
9408
|
+
- Auto-generate eval criteria — always collect from the user
|
|
9409
|
+
- Skip any schema field when collecting evaluation rubrics
|
|
9410
|
+
- Skip \`mutagent explore --json\` before any write operation
|
|
9411
|
+
- Run \`optimize start\` without first showing \`usage --json\` to the user
|
|
9412
|
+
- Increase \`--max-iterations\` above 1 without explicit user consent (each iteration = LLM spend)
|
|
9413
|
+
- Run a multi-turn agent through the prompt optimizer
|
|
9414
|
+
- Skip showing command output results to the user
|
|
9415
|
+
- Inline CLI flags from memory — always read \`--help\` first
|
|
9416
|
+
|
|
9417
|
+
---
|
|
9005
9418
|
|
|
9006
9419
|
## State Tracking
|
|
9007
|
-
- \`.mutagent/mutation-context.md\` — Codebase index of discovered/uploaded prompts
|
|
9008
|
-
- Check and update this file after explore, create, and dataset operations
|
|
9009
|
-
- \`mutagent auth status\` — Auth + workspace state
|
|
9010
|
-
- Comment markers (\`// MutagenT:START ... // MutagenT:END\`) in source files
|
|
9011
|
-
|
|
9012
|
-
## Post-Optimization
|
|
9013
|
-
After \`optimize results\`: ALWAYS show the before/after diff to the user first. Then offer choices via AskUserQuestion: Apply / Reject.
|
|
9014
|
-
|
|
9015
|
-
## Evaluation Criteria Reminder
|
|
9016
|
-
Every evaluation MUST specify criteria targeting either:
|
|
9017
|
-
- Input variable fields (from inputSchema)
|
|
9018
|
-
- Output fields (from outputSchema / structured output)
|
|
9019
|
-
|
|
9020
|
-
## Optimization Cost Control
|
|
9021
|
-
- Default max-iterations is 1. NEVER increase without explicit user request.
|
|
9022
|
-
- Each iteration incurs LLM costs. Starting >1 iterations without consent = protocol violation.
|
|
9023
|
-
- If user wants more, confirm the number via AskUserQuestion first.
|
|
9024
|
-
|
|
9025
|
-
## Claude Code Hooks (Session Telemetry)
|
|
9026
|
-
Optional — sends session activity to MutagenT traces API for observability.
|
|
9027
|
-
|
|
9028
|
-
Install via \`.claude/settings.local.json\`:
|
|
9029
|
-
\\\`\\\`\\\`json
|
|
9030
|
-
{"hooks": {
|
|
9031
|
-
"SessionStart": [{"matcher": "startup", "hooks": [{"type": "command", "command": "mutagent hooks claude-code session-start"}]}],
|
|
9032
|
-
"Stop": [{"hooks": [{"type": "command", "command": "mutagent hooks claude-code session-end"}]}],
|
|
9033
|
-
"PreToolUse": [{"hooks": [{"type": "command", "command": "mutagent hooks claude-code pre-tool-use"}]}],
|
|
9034
|
-
"PostToolUse": [{"hooks": [{"type": "command", "command": "mutagent hooks claude-code post-tool-use"}]}]
|
|
9035
|
-
}}
|
|
9036
|
-
\\\`\\\`\\\`
|
|
9037
|
-
Or run: \`mutagent init\` (option 1 installs skill + hooks together)`;
|
|
9038
|
-
var SKILL_DIR = ".claude/skills/mutagent-cli";
|
|
9039
|
-
var SKILL_FILE = "SKILL.md";
|
|
9040
|
-
function createSkillsCommand() {
|
|
9041
|
-
const skills = new Command17("skills").description("Manage MutagenT CLI skills for coding agents");
|
|
9042
|
-
skills.command("install").description("Install MutagenT CLI skill for Claude Code").addHelpText("after", `
|
|
9043
|
-
Examples:
|
|
9044
|
-
${chalk35.dim("$")} mutagent skills install
|
|
9045
9420
|
|
|
9046
|
-
|
|
9047
|
-
|
|
9048
|
-
`).action((_options, cmd) => {
|
|
9049
|
-
const parentCmd = cmd.parent?.parent;
|
|
9050
|
-
const isJson = parentCmd ? getJsonFlag(parentCmd) : false;
|
|
9051
|
-
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
9052
|
-
const repoRoot = findRepoRoot();
|
|
9053
|
-
const skillDir = join7(repoRoot, SKILL_DIR);
|
|
9054
|
-
const skillPath = join7(skillDir, SKILL_FILE);
|
|
9055
|
-
if (!existsSync12(skillDir)) {
|
|
9056
|
-
mkdirSync4(skillDir, { recursive: true });
|
|
9057
|
-
}
|
|
9058
|
-
const content = `${SKILL_FRONTMATTER}
|
|
9421
|
+
- \`.mutagent/mutation-context.md\` — codebase index of discovered/uploaded prompts. Update after explore, create, dataset ops.
|
|
9422
|
+
- \`mutagent auth status --json\` — auth + workspace state.
|
|
9059
9423
|
|
|
9060
|
-
|
|
9061
|
-
`;
|
|
9062
|
-
writeFileSync5(skillPath, content, "utf-8");
|
|
9063
|
-
if (isJson) {
|
|
9064
|
-
output.output({
|
|
9065
|
-
installed: true,
|
|
9066
|
-
path: skillPath,
|
|
9067
|
-
name: "mutagent-cli"
|
|
9068
|
-
});
|
|
9069
|
-
} else {
|
|
9070
|
-
output.success(`Installed MutagenT CLI skill`);
|
|
9071
|
-
console.log(` ${chalk35.dim("Path:")} ${skillPath}`);
|
|
9072
|
-
console.log("");
|
|
9073
|
-
console.log(` ${chalk35.dim("This skill teaches coding agents how to use the MutagenT CLI.")}`);
|
|
9074
|
-
console.log(` ${chalk35.dim("It will be automatically loaded by Claude Code when relevant triggers match.")}`);
|
|
9075
|
-
}
|
|
9076
|
-
});
|
|
9077
|
-
return skills;
|
|
9078
|
-
}
|
|
9424
|
+
---
|
|
9079
9425
|
|
|
9080
|
-
|
|
9081
|
-
|
|
9082
|
-
|
|
9083
|
-
|
|
9084
|
-
|
|
9085
|
-
|
|
9086
|
-
|
|
9087
|
-
|
|
9088
|
-
|
|
9089
|
-
|
|
9090
|
-
|
|
9091
|
-
|
|
9092
|
-
|
|
9093
|
-
|
|
9094
|
-
|
|
9426
|
+
## Login (two paths)
|
|
9427
|
+
|
|
9428
|
+
- **CI / automated**: \`export MUTAGENT_API_KEY=mt_... && mutagent login --json\` — no browser, no prompts.
|
|
9429
|
+
- **Onboarding a user**: \`mutagent login --browser --json\` — CLI prints auth URL to stdout, polls 5 min. **Surface the URL verbatim to the user.** \`--non-interactive\` is NOT needed when \`--browser\` is set.
|
|
9430
|
+
|
|
9431
|
+
\`mutagent login\` is canonical. \`mutagent auth login\` is a back-compat alias. Both delegate to \`lib/auth-flow.ts\`. Decision record: [cli-design-principles.md](../../docs/cli-design-principles.md) → Login Unification.
|
|
9432
|
+
|
|
9433
|
+
---
|
|
9434
|
+
|
|
9435
|
+
## Extensibility
|
|
9436
|
+
|
|
9437
|
+
Add \`workflows/custom-<name>.md\` with frontmatter \`triggers: ["phrase"]\` — auto-discovered by the decision tree fallback row. No rebuild needed.
|
|
9438
|
+
`,
|
|
9439
|
+
"concepts/eval-criteria.md": `---
|
|
9440
|
+
name: mutagent-cli-concepts-eval-criteria
|
|
9441
|
+
description: |
|
|
9442
|
+
Canonical source for MutagenT evaluation-criteria framing:
|
|
9443
|
+
INPUT-param criteria → Minimum Viable Context (MVC);
|
|
9444
|
+
OUTPUT-param criteria → Output Standards.
|
|
9445
|
+
3-tier rubric discipline: grounded, observable, never vague.
|
|
9446
|
+
Includes current platform validation rules for criterion shape.
|
|
9447
|
+
Mirrored in mutagent/src/modules/prompts/prompt-evaluations/README.md.
|
|
9448
|
+
triggers:
|
|
9449
|
+
- "evaluation criteria"
|
|
9450
|
+
- "eval criteria"
|
|
9451
|
+
- "rubric"
|
|
9452
|
+
- "how do I evaluate"
|
|
9453
|
+
- "minimum viable context"
|
|
9454
|
+
- "output standards"
|
|
9455
|
+
- "evaluationParameter"
|
|
9456
|
+
- "guided eval"
|
|
9457
|
+
---
|
|
9458
|
+
|
|
9459
|
+
# Concept — Guided Evaluation Criteria
|
|
9460
|
+
|
|
9461
|
+
> **Canonical source** for the INPUT vs OUTPUT framing. Mirrored in:
|
|
9462
|
+
> - \`mutagent-cli/src/commands/prompts/evaluation/guided-workflow.ts\` — kept in sync.
|
|
9463
|
+
> - \`mutagent/src/modules/prompts/prompt-evaluations/README.md\` as BE-side
|
|
9464
|
+
> reference — mirror, not a fork.
|
|
9465
|
+
|
|
9466
|
+
## The golden rule
|
|
9467
|
+
|
|
9468
|
+
**Every evaluation criterion is scoped to EITHER the inputs OR the outputs of
|
|
9469
|
+
the prompt. Never both in one criterion.**
|
|
9470
|
+
|
|
9471
|
+
Mixing them produces vague criteria ("is the output good given the inputs?")
|
|
9472
|
+
that the optimizer cannot act on. When collecting rubrics from the user,
|
|
9473
|
+
explicitly frame each question around one of these two scopes.
|
|
9474
|
+
|
|
9475
|
+
---
|
|
9476
|
+
|
|
9477
|
+
## NEVER auto-generate criteria
|
|
9478
|
+
|
|
9479
|
+
This is Rule 3 of the [5 Core Rules](../SKILL.md). Reasons:
|
|
9480
|
+
|
|
9481
|
+
- Auto-generated rubrics are vague by default ("score based on accuracy")
|
|
9482
|
+
- The optimizer cannot act on vague criteria — it needs observable tiers
|
|
9483
|
+
- The user knows what "correct" means for their domain; the agent does not
|
|
9484
|
+
- Generic rubrics produce noisy scores that mislead the optimizer
|
|
9485
|
+
|
|
9486
|
+
**Always use AskUserQuestion to collect rubrics, one per variable/dimension.**
|
|
9487
|
+
|
|
9488
|
+
When \`evaluation create --guided --json\` runs, the CLI provides a list of
|
|
9489
|
+
fields that need rubrics. Ask the user about EVERY field in that list.
|
|
9490
|
+
Do not skip any field. Do not pre-fill answers. The user must provide each rubric.
|
|
9491
|
+
|
|
9492
|
+
---
|
|
9493
|
+
|
|
9494
|
+
## Input-param criteria → Minimum Viable Context (MVC)
|
|
9495
|
+
|
|
9496
|
+
**Scope**: the \`{variables}\` the prompt template consumes. Each \`{variable}\` is
|
|
9497
|
+
an **input param**. The criterion asks: *is the information required for the
|
|
9498
|
+
prompt to succeed actually present?*
|
|
9499
|
+
|
|
9500
|
+
### 3-tier completeness scale (grounded, never vague)
|
|
9501
|
+
|
|
9502
|
+
- \`1.0\` — all required context present, no ambiguity
|
|
9503
|
+
- \`0.5\` — some context present, enough to attempt but likely partial/hedged answer
|
|
9504
|
+
- \`0.0\` — critical context missing, prompt cannot succeed regardless of model
|
|
9505
|
+
|
|
9506
|
+
### Discipline rules
|
|
9507
|
+
|
|
9508
|
+
- **Grounded**: every tier must describe an observable property of the input
|
|
9509
|
+
data, not a feeling.
|
|
9510
|
+
- **Observable**: a human reader should assign a tier by looking at a single
|
|
9511
|
+
input row — no model output needed.
|
|
9512
|
+
- **Per-variable**: one criterion per \`{variable}\`. This localizes optimizer signal.
|
|
9513
|
+
|
|
9514
|
+
### Before asking the user
|
|
9515
|
+
|
|
9516
|
+
Enumerate variables using the delimiter inferred by \`mutagent explore --json\`:
|
|
9517
|
+
- \`delimiter: "single"\` → \`{foo}\` — platform canonical
|
|
9518
|
+
- \`delimiter: "double"\` → \`{{foo}}\` — framework template; convert before upload
|
|
9519
|
+
|
|
9520
|
+
See [concepts/prompt-variables.md](./prompt-variables.md) for the full inference contract.
|
|
9521
|
+
|
|
9522
|
+
### Example
|
|
9523
|
+
|
|
9524
|
+
For \`"Summarize {document} for {audience}"\`:
|
|
9525
|
+
|
|
9526
|
+
\`\`\`json
|
|
9527
|
+
[
|
|
9528
|
+
{
|
|
9529
|
+
"name": "document-present",
|
|
9530
|
+
"evaluationParameter": "document",
|
|
9531
|
+
"description": "1.0 = actual prose text >= 100 chars. 0.5 = short snippet or summary stub. 0.0 = filename, null, TODO, or empty."
|
|
9532
|
+
},
|
|
9533
|
+
{
|
|
9534
|
+
"name": "audience-concrete",
|
|
9535
|
+
"evaluationParameter": "audience",
|
|
9536
|
+
"description": "1.0 = concrete persona (e.g. 'junior Python devs'). 0.5 = broad category ('engineers'). 0.0 = empty, 'general', 'everyone'."
|
|
9537
|
+
}
|
|
9538
|
+
]
|
|
9539
|
+
\`\`\`
|
|
9540
|
+
|
|
9541
|
+
Note the 1:1 mapping: one criterion per variable, \`evaluationParameter\` equal
|
|
9542
|
+
to the variable name.
|
|
9543
|
+
|
|
9544
|
+
---
|
|
9545
|
+
|
|
9546
|
+
## Output-param criteria → Output Standards
|
|
9547
|
+
|
|
9548
|
+
**Scope**: the model's response shape and content. Pick the dimensions relevant
|
|
9549
|
+
to the task and write one criterion per dimension.
|
|
9550
|
+
|
|
9551
|
+
### Common dimensions
|
|
9552
|
+
|
|
9553
|
+
- **Content correctness** — right answer, right facts, right tone
|
|
9554
|
+
- **Structural correctness** — matches \`outputSchema\`, required fields, enums, length
|
|
9555
|
+
- **Groundedness** — facts in the output traceable to facts in the input
|
|
9556
|
+
- **Format compliance** — JSON validity, markdown shape, regex match
|
|
9557
|
+
|
|
9558
|
+
### 3-tier correctness scale
|
|
9559
|
+
|
|
9560
|
+
Concrete pass/fail per tier. **Never** write "0-1 scale" or "score based on
|
|
9561
|
+
accuracy" — those are vague and the optimizer can't act on them.
|
|
9562
|
+
|
|
9563
|
+
### Example (JSON-output summarizer)
|
|
9564
|
+
|
|
9565
|
+
\`\`\`json
|
|
9566
|
+
{
|
|
9567
|
+
"name": "summary-correctness",
|
|
9568
|
+
"evaluationParameter": "summary",
|
|
9569
|
+
"description": "1.0 = valid JSON, all 3 required fields, covers all key arguments. 0.5 = valid JSON, 1-2 arguments missing/hedged. 0.0 = invalid JSON OR fabricated facts."
|
|
9570
|
+
}
|
|
9571
|
+
\`\`\`
|
|
9572
|
+
|
|
9573
|
+
The \`evaluationParameter\` here is the output-schema field name, not an input
|
|
9574
|
+
variable. Same 1:1 discipline — one criterion per output dimension.
|
|
9575
|
+
|
|
9576
|
+
---
|
|
9577
|
+
|
|
9578
|
+
## Platform validation rules (current)
|
|
9579
|
+
|
|
9580
|
+
When creating an evaluation via \`mutagent prompts evaluation create -d '<json>'\`,
|
|
9581
|
+
each criterion must pass these platform-enforced checks:
|
|
9582
|
+
|
|
9583
|
+
| Field | Required | Validation |
|
|
9584
|
+
|---|---|---|
|
|
9585
|
+
| \`name\` | yes | slug-like, no spaces, \`[a-z0-9-_]\` |
|
|
9586
|
+
| \`description\` | yes | non-empty, ideally >= 20 chars with tier definitions |
|
|
9587
|
+
| \`evaluationParameter\` | yes | must match a variable name from the prompt OR an output field name |
|
|
9588
|
+
|
|
9589
|
+
**Common validation failures:**
|
|
9590
|
+
- \`evaluationParameter\` references a variable not in the prompt template → rejected
|
|
9591
|
+
- \`description\` is too short or vague → accepted by platform but produces poor scores
|
|
9592
|
+
- Multiple criteria with the same \`evaluationParameter\` → accepted but wasteful
|
|
9593
|
+
|
|
9594
|
+
---
|
|
9595
|
+
|
|
9596
|
+
## How to apply when creating an evaluation
|
|
9597
|
+
|
|
9598
|
+
1. **Read the prompt template.** Enumerate \`{variables}\` (input params) +
|
|
9599
|
+
expected output shape (from \`outputSchema\` or the code's parse logic).
|
|
9600
|
+
2. **Ask the user**: "Evaluate INPUTS (is context sufficient) or OUTPUTS
|
|
9601
|
+
(is response correct) first?" — let the user pick the scope.
|
|
9602
|
+
3. **Collect criteria**: use AskUserQuestion to collect from user, never auto-generate — one per variable (INPUT) or per dimension (OUTPUT),
|
|
9603
|
+
always with a 3-tier rubric describing observable behavior.
|
|
9604
|
+
4. **Map to platform shape**:
|
|
9605
|
+
\`\`\`typescript
|
|
9606
|
+
{
|
|
9607
|
+
name: string; // short, slug-like
|
|
9608
|
+
description: string; // the 3-tier rubric verbatim
|
|
9609
|
+
evaluationParameter: string; // the variable name OR output field
|
|
9610
|
+
}
|
|
9611
|
+
\`\`\`
|
|
9612
|
+
5. **Upload** via \`mutagent prompts evaluation create <id> -d '<json>' --json\`.
|
|
9613
|
+
|
|
9614
|
+
The \`--guided\` flag walks the user through this flow interactively — use it
|
|
9615
|
+
when the user is new to the concept. Follow the CLI's next-step guidance in
|
|
9616
|
+
the output to collect rubrics in the correct order.
|
|
9617
|
+
|
|
9618
|
+
---
|
|
9619
|
+
|
|
9620
|
+
## Anti-patterns
|
|
9621
|
+
|
|
9622
|
+
- **Auto-generating criteria** — Rule 3: NEVER. Always collect from user.
|
|
9623
|
+
- **Mixing input and output in one criterion** — breaks signal; split into two.
|
|
9624
|
+
- **Vague rubrics** — "0.8 if mostly good" → rewrite with observable tiers.
|
|
9625
|
+
- **One criterion for many variables** — reduces signal, slows optimization.
|
|
9626
|
+
- **Scoring the model, not the data** — MVC scores the INPUT data quality.
|
|
9627
|
+
|
|
9628
|
+
---
|
|
9629
|
+
|
|
9630
|
+
## Cross-references
|
|
9631
|
+
|
|
9632
|
+
- [SKILL.md](../SKILL.md) → 5 rules (Rule 3: never auto-generate)
|
|
9633
|
+
- [workflows/optimization.md](../workflows/optimization.md) → steps 7-9 (where this concept is applied)
|
|
9634
|
+
- [concepts/prompt-variables.md](./prompt-variables.md) → delimiter inference (used in MVC step)
|
|
9635
|
+
- \`mutagent/src/modules/prompts/prompt-evaluations/README.md\` → BE mirror
|
|
9636
|
+
`,
|
|
9637
|
+
"concepts/prompt-variables.md": `---
|
|
9638
|
+
name: mutagent-cli-concepts-prompt-variables
|
|
9639
|
+
description: |
|
|
9640
|
+
Prompt template variable delimiter inference contract.
|
|
9641
|
+
Platform canonical is single-brace {variable}. Third-party frameworks vary
|
|
9642
|
+
(Handlebars / Mustache / Liquid / Jinja2 use double {{variable}}).
|
|
9643
|
+
\`mutagent explore\` infers the delimiter per file and surfaces it in
|
|
9644
|
+
\`--json\` output as \`delimiter: "single" | "double"\`.
|
|
9645
|
+
Includes conversion rules for upload and apply phases.
|
|
9646
|
+
triggers:
|
|
9647
|
+
- "prompt variables"
|
|
9648
|
+
- "template variables"
|
|
9649
|
+
- "single vs double brace"
|
|
9650
|
+
- "{variable}"
|
|
9651
|
+
- "{{variable}}"
|
|
9652
|
+
- "delimiter"
|
|
9653
|
+
- "inferPromptVariables"
|
|
9654
|
+
- "brace convention"
|
|
9655
|
+
- "convert variables"
|
|
9656
|
+
---
|
|
9657
|
+
|
|
9658
|
+
# Concept — Prompt Variables
|
|
9659
|
+
|
|
9660
|
+
## Platform canonical
|
|
9661
|
+
|
|
9662
|
+
**MutagenT platform uses single-brace \`{variable}\`.** The platform renders
|
|
9663
|
+
prompts by substituting \`{name}\` with the provided value at optimization /
|
|
9664
|
+
evaluation time.
|
|
9665
|
+
|
|
9666
|
+
## Third-party framework variance
|
|
9667
|
+
|
|
9668
|
+
Real-world codebases use different delimiters depending on which prompt
|
|
9669
|
+
framework the user already has installed:
|
|
9670
|
+
|
|
9671
|
+
| Framework | Delimiter | Example |
|
|
9672
|
+
|---|---|---|
|
|
9673
|
+
| **MutagenT platform** (canonical) | single | \`{document}\` |
|
|
9674
|
+
| **LangChain** \`PromptTemplate\` | single | \`{document}\` |
|
|
9675
|
+
| **LangChain** \`ChatPromptTemplate\` + Mustache | double | \`{{document}}\` |
|
|
9676
|
+
| **Handlebars** | double | \`{{document}}\` |
|
|
9677
|
+
| **Mustache** | double | \`{{document}}\` |
|
|
9678
|
+
| **LiquidJS** | double | \`{{ document }}\` |
|
|
9679
|
+
| **Jinja2** (Python) | double | \`{{ document }}\` |
|
|
9680
|
+
|
|
9681
|
+
---
|
|
9682
|
+
|
|
9683
|
+
## Brace conversion — upload and apply
|
|
9684
|
+
|
|
9685
|
+
Getting this wrong breaks templates after optimization. Follow the two-phase rule:
|
|
9686
|
+
|
|
9687
|
+
### Phase 1 — Upload (code → MutagenT)
|
|
9688
|
+
|
|
9689
|
+
If the code has \`{{double}}\` braces:
|
|
9690
|
+
1. Warn the user: "Your template uses \`{{double}}\` braces (Handlebars/LangChain Mustache). MutagenT uses \`{single}\` braces. I'll convert before uploading."
|
|
9691
|
+
2. Convert \`{{name}}\` → \`{name}\` in the prompt content passed to \`mutagent prompts create\`.
|
|
9692
|
+
3. Record the original delimiter in \`.mutagent/mutation-context.md\` so the apply phase knows to convert back.
|
|
9693
|
+
|
|
9694
|
+
If the code has \`{single}\` braces: no conversion needed — upload as-is.
|
|
9695
|
+
|
|
9696
|
+
### Phase 2 — Apply (MutagenT → code)
|
|
9697
|
+
|
|
9698
|
+
After optimization, the platform returns a prompt with \`{single}\` braces.
|
|
9699
|
+
|
|
9700
|
+
If the original codebase used \`{{double}}\` braces:
|
|
9701
|
+
1. Convert \`{name}\` → \`{{name}}\` in the optimized prompt before writing to the source file.
|
|
9702
|
+
2. Confirm with the user before saving.
|
|
9703
|
+
|
|
9704
|
+
If the original codebase used \`{single}\` braces: write the optimized prompt as-is.
|
|
9705
|
+
|
|
9706
|
+
**Summary table:**
|
|
9707
|
+
|
|
9708
|
+
| Code uses | Upload | Optimized output | Write back to code |
|
|
9709
|
+
|---|---|---|---|
|
|
9710
|
+
| \`{single}\` | as-is | \`{single}\` | as-is |
|
|
9711
|
+
| \`{{double}}\` | convert to \`{single}\` | \`{single}\` | convert back to \`{{double}}\` |
|
|
9712
|
+
|
|
9713
|
+
---
|
|
9714
|
+
|
|
9715
|
+
## Per-file inference — \`mutagent explore\`
|
|
9716
|
+
|
|
9717
|
+
The CLI's \`mutagent explore\` command calls \`inferPromptVariables()\` on every
|
|
9718
|
+
matching source file and **infers the delimiter per file** rather than
|
|
9719
|
+
globally. A single repository may contain both LangChain \`PromptTemplate\`
|
|
9720
|
+
(single) and Handlebars email templates (double) side by side.
|
|
9721
|
+
|
|
9722
|
+
### Inference algorithm
|
|
9723
|
+
|
|
9724
|
+
1. **Strip fenced markdown code blocks** first (\` \`\`\` ... \`\`\` \`). Avoids false
|
|
9725
|
+
positives from prompts that document JSON examples in fenced blocks.
|
|
9726
|
+
2. **Count \`{{name}}\` matches** → \`doubleHits\`.
|
|
9727
|
+
3. **Count \`{name}\` matches** that are NOT adjacent to \`{\` (not part of \`{{...}}\`)
|
|
9728
|
+
and NOT followed by \`"\` (JSON-key skip) → \`singleHits\`.
|
|
9729
|
+
4. **Majority wins**: \`doubleHits > singleHits\` → \`double\`, else \`single\`.
|
|
9730
|
+
5. **Tie-break**: singleHits === doubleHits (including the 0/0 case) →
|
|
9731
|
+
\`single\` (platform canonical).
|
|
9732
|
+
|
|
9733
|
+
### Escaped-JSON caveat
|
|
9734
|
+
|
|
9735
|
+
Prompts like \`"Return {\\"status\\": \\"ok\\"}"\` — the \`{\` is followed by \`"\`, so
|
|
9736
|
+
the single-brace regex deliberately skips it. Never treat literal JSON keys as
|
|
9737
|
+
template variables.
|
|
9738
|
+
|
|
9739
|
+
---
|
|
9740
|
+
|
|
9741
|
+
## How to use the delimiter field
|
|
9742
|
+
|
|
9743
|
+
\`mutagent explore --json\` surfaces the inferred delimiter per discovered prompt:
|
|
9744
|
+
|
|
9745
|
+
\`\`\`json
|
|
9746
|
+
{
|
|
9747
|
+
"prompts": [
|
|
9748
|
+
{
|
|
9749
|
+
"file": "src/prompts/summarize.ts",
|
|
9750
|
+
"line": 12,
|
|
9751
|
+
"preview": "const prompt = \`Summarize {document} for {audience}\`;",
|
|
9752
|
+
"reason": "template-variable",
|
|
9753
|
+
"confidence": "high",
|
|
9754
|
+
"delimiter": "single"
|
|
9755
|
+
},
|
|
9756
|
+
{
|
|
9757
|
+
"file": "src/emails/welcome.hbs",
|
|
9758
|
+
"line": 3,
|
|
9759
|
+
"preview": "<p>Hello {{name}}, welcome to {{product}}!</p>",
|
|
9760
|
+
"reason": "template-variable",
|
|
9761
|
+
"confidence": "high",
|
|
9762
|
+
"delimiter": "double"
|
|
9763
|
+
}
|
|
9764
|
+
]
|
|
9765
|
+
}
|
|
9766
|
+
\`\`\`
|
|
9767
|
+
|
|
9768
|
+
Use the delimiter field to:
|
|
9769
|
+
- Enumerate variables correctly (don't treat \`{{foo}}\` as two \`{foo}\` tokens).
|
|
9770
|
+
- Decide whether to convert before upload (Phase 1 above).
|
|
9771
|
+
- Drive the [concepts/eval-criteria.md](./eval-criteria.md) → MVC step (one criterion per variable).
|
|
9772
|
+
|
|
9773
|
+
---
|
|
9774
|
+
|
|
9775
|
+
## Edge cases
|
|
9776
|
+
|
|
9777
|
+
- **Empty prompt** — no variables → tie (0/0) → \`single\` (canonical).
|
|
9778
|
+
- **Mixed delimiters in one file** — majority wins, tied files default to \`single\`.
|
|
9779
|
+
Warn the user: their codebase probably has two prompt systems co-existing.
|
|
9780
|
+
- **Nested braces** \`{{{ foo }}}\` — Handlebars triple-brace (no-escape). Currently
|
|
9781
|
+
matched by the double regex as \`{{ foo }}\`; outer \`}\` ignored. Fine for inference.
|
|
9782
|
+
|
|
9783
|
+
---
|
|
9784
|
+
|
|
9785
|
+
## Cross-references
|
|
9786
|
+
|
|
9787
|
+
- [SKILL.md](../SKILL.md) → 5 rules + journey router
|
|
9788
|
+
- [workflows/optimization.md](../workflows/optimization.md) → step 3 (delimiter drives variable enumeration) and step 15 (apply conversion)
|
|
9789
|
+
- [concepts/eval-criteria.md](./eval-criteria.md) → MVC (Minimum Viable Context) — uses delimiter to enumerate input params
|
|
9790
|
+
- Source: \`mutagent-cli/src/lib/explorer.ts\` → \`inferPromptVariables()\` and \`DiscoveredPrompt.delimiter\`
|
|
9791
|
+
- Tests: \`mutagent-cli/src/__tests__/lib/explorer.test.ts\`
|
|
9792
|
+
`,
|
|
9793
|
+
"workflows/agents.md": `---
|
|
9794
|
+
name: mutagent-cli-workflows-agents
|
|
9795
|
+
description: |
|
|
9796
|
+
Agent path (multi-turn, tool-calling) — WORK IN PROGRESS.
|
|
9797
|
+
Agent optimization & evaluation are actively in development.
|
|
9798
|
+
This workflow short-circuits to a partnership link, offers read-only CRUD,
|
|
9799
|
+
and optionally extracts a sub-prompt back to the Optimization path.
|
|
9800
|
+
triggers:
|
|
9801
|
+
- "agent"
|
|
9802
|
+
- "langchain agent"
|
|
9803
|
+
- "langgraph"
|
|
9804
|
+
- "crewai"
|
|
9805
|
+
- "autogen"
|
|
9806
|
+
- "openai agents"
|
|
9807
|
+
- "tool calling"
|
|
9808
|
+
- "multi-turn"
|
|
9809
|
+
- "AgentExecutor"
|
|
9810
|
+
- "createReactAgent"
|
|
9811
|
+
- "StateGraph"
|
|
9812
|
+
---
|
|
9813
|
+
|
|
9814
|
+
# Workflow — Agents (WIP)
|
|
9815
|
+
|
|
9816
|
+
> **This path is WORK IN PROGRESS.** Agent optimization and evaluation are
|
|
9817
|
+
> actively in development. Do NOT run a multi-turn agent through the prompt
|
|
9818
|
+
> optimizer — the platform will reject it, and scores would be meaningless for
|
|
9819
|
+
> a tool-calling loop.
|
|
9820
|
+
|
|
9821
|
+
Read the **5 rules** in [SKILL.md](../SKILL.md) before executing.
|
|
9822
|
+
|
|
9823
|
+
---
|
|
9824
|
+
|
|
9825
|
+
## When this workflow applies
|
|
9826
|
+
|
|
9827
|
+
Load this file when the journey router in [SKILL.md](../SKILL.md) matched one of these
|
|
9828
|
+
signals in the user's code:
|
|
9829
|
+
|
|
9830
|
+
- \`AgentExecutor\`, \`createReactAgent\`, \`createToolCallingAgent\`, \`createStructuredChatAgent\`
|
|
9831
|
+
- \`from "openai/agents"\` or \`from "@openai/agents"\`
|
|
9832
|
+
- \`from "crewai"\`, \`from "autogen"\`, \`from "autogen_agentchat"\`
|
|
9833
|
+
- \`from "@langchain/langgraph"\`, \`StateGraph(\`
|
|
9834
|
+
- \`tool_calls\` / \`toolCalls\` property access
|
|
9835
|
+
- \`@tool\` decorator (Python)
|
|
9836
|
+
- \`tools: [...]\` array in an LLM call config
|
|
9837
|
+
- \`function_call\` / \`tool_choice\` fields
|
|
9838
|
+
- \`while\` loop + LLM call
|
|
9839
|
+
|
|
9840
|
+
\`mutagent explore --json\` flags these under \`agents[]\` (not \`prompts[]\`).
|
|
9841
|
+
|
|
9842
|
+
---
|
|
9843
|
+
|
|
9844
|
+
## Required card (show this verbatim to the user)
|
|
9845
|
+
|
|
9846
|
+
When the agent path is triggered, copy this card into your chat response
|
|
9847
|
+
verbatim — do NOT paraphrase, do NOT collapse into a bash block:
|
|
9848
|
+
|
|
9849
|
+
\`\`\`
|
|
9850
|
+
I see you have an Agent (multi-turn / tool-calling). Agent Optimization &
|
|
9851
|
+
Evaluations are actively in development in MutagenT. For early access and
|
|
9852
|
+
to partner with us on the roadmap:
|
|
9853
|
+
→ https://www.mutagent.io/agents-partnership
|
|
9854
|
+
\`\`\`
|
|
9855
|
+
|
|
9856
|
+
---
|
|
9857
|
+
|
|
9858
|
+
## Sequence
|
|
9859
|
+
|
|
9860
|
+
\`\`\`
|
|
9861
|
+
1. Run \`mutagent explore --json\` if you haven't already.
|
|
9862
|
+
→ Confirm agents[] is non-empty.
|
|
9863
|
+
→ Show command output to user.
|
|
9864
|
+
|
|
9865
|
+
2. Show the WIP card above verbatim in your chat response.
|
|
9866
|
+
|
|
9867
|
+
3. Use AskUserQuestion to explain that agent code cannot be directly optimized:
|
|
9868
|
+
"Your code looks like a multi-turn agent. I can't run it
|
|
9869
|
+
through the prompt optimizer yet. Would you like to:
|
|
9870
|
+
(a) Join early access → https://www.mutagent.io/agents-partnership
|
|
9871
|
+
(b) Inspect existing agents in MutagenT (read-only CRUD)
|
|
9872
|
+
(c) Extract a single sub-prompt inside the agent loop and optimize it"
|
|
9873
|
+
|
|
9874
|
+
4. Branch:
|
|
9875
|
+
(a) → surface the URL verbatim. STOP.
|
|
9876
|
+
(b) → \`mutagent agents list --json\` ; \`mutagent agents get <id> --json\`
|
|
9877
|
+
→ show results to user. STOP (no mutations available).
|
|
9878
|
+
(c) → extract one sub-prompt, then route to [workflows/optimization.md](./optimization.md)
|
|
9879
|
+
treating the sub-prompt as a standalone Prompt.
|
|
9880
|
+
\`\`\`
|
|
9881
|
+
|
|
9882
|
+
---
|
|
9883
|
+
|
|
9884
|
+
## Branch (c) — extracting a sub-prompt
|
|
9885
|
+
|
|
9886
|
+
Multi-turn agents often contain inner prompts that ARE suitable for the
|
|
9887
|
+
Optimization path:
|
|
9888
|
+
|
|
9889
|
+
- a planner prompt ("given this user goal, list the tools you'd call")
|
|
9890
|
+
- a summarizer prompt ("given these tool results, write the final answer")
|
|
9891
|
+
- a classifier prompt ("which tool should handle this input?")
|
|
9892
|
+
|
|
9893
|
+
Each of these is a single-shot prompt with a clear output schema. Extract ONE,
|
|
9894
|
+
treat it as a standalone Prompt.
|
|
9895
|
+
|
|
9896
|
+
When extracting:
|
|
9897
|
+
1. Identify the exact string literal or template that becomes the sub-prompt.
|
|
9898
|
+
2. Enumerate its \`{variables}\` per [concepts/prompt-variables.md](../concepts/prompt-variables.md).
|
|
9899
|
+
3. Confirm with user: "I'll optimize the planner prompt only, not the full agent. Sound right?"
|
|
9900
|
+
4. On confirmation → load [workflows/optimization.md](./optimization.md) from step 3 (prompts create).
|
|
9901
|
+
|
|
9902
|
+
Do NOT try to extract the whole agent loop at once.
|
|
9903
|
+
|
|
9904
|
+
---
|
|
9905
|
+
|
|
9906
|
+
## What NOT to do
|
|
9907
|
+
|
|
9908
|
+
- **Do not** call \`mutagent prompts optimize start\` on an agent file.
|
|
9909
|
+
- **Do not** upload an agent's full system prompt + tool definitions as a single "prompt".
|
|
9910
|
+
- **Do not** suggest "try it anyway" — the WIP status is deliberate.
|
|
9911
|
+
- **Do not** skip showing the WIP card to the user — they need the partnership URL.
|
|
9912
|
+
|
|
9913
|
+
---
|
|
9914
|
+
|
|
9915
|
+
## Cross-references
|
|
9916
|
+
|
|
9917
|
+
- [SKILL.md](../SKILL.md) → 5 rules + journey router
|
|
9918
|
+
- [workflows/exploration.md](./exploration.md) → where \`agents[]\` entries are first detected
|
|
9919
|
+
- [workflows/optimization.md](./optimization.md) → branch (c) destination
|
|
9920
|
+
- [concepts/prompt-variables.md](../concepts/prompt-variables.md) → \`{foo}\` vs \`{{foo}}\` for sub-prompt extraction
|
|
9921
|
+
- Partnership link: https://www.mutagent.io/agents-partnership
|
|
9922
|
+
`,
|
|
9923
|
+
"workflows/exploration.md": `---
|
|
9924
|
+
name: mutagent-cli-workflows-exploration
|
|
9925
|
+
description: |
|
|
9926
|
+
Read-only codebase scan workflow. Discovers prompts and agents, classifies
|
|
9927
|
+
each as optimization-eligible (prompt) or WIP (agent), and presents a
|
|
9928
|
+
structured taxonomy to the user before any write operations.
|
|
9929
|
+
triggers:
|
|
9930
|
+
- "explore"
|
|
9931
|
+
- "scan codebase"
|
|
9932
|
+
- "find prompts"
|
|
9933
|
+
- "what prompts do I have"
|
|
9934
|
+
- "discover prompts"
|
|
9935
|
+
- "what's in my codebase"
|
|
9936
|
+
---
|
|
9937
|
+
|
|
9938
|
+
# Workflow — Exploration (Read-Only Discovery)
|
|
9939
|
+
|
|
9940
|
+
> **This workflow is read-only.** No writes, no uploads, no mutations.
|
|
9941
|
+
> Use it to understand what's in the codebase before deciding next steps.
|
|
9942
|
+
|
|
9943
|
+
Read the **5 rules** in [SKILL.md](../SKILL.md) before executing. Key reminders:
|
|
9944
|
+
- \`--json\` on every command
|
|
9945
|
+
- \`<command> --help\` before first use
|
|
9946
|
+
- This workflow ends with a user question — never auto-proceed to writes
|
|
9947
|
+
|
|
9948
|
+
---
|
|
9949
|
+
|
|
9950
|
+
## When this workflow applies
|
|
9951
|
+
|
|
9952
|
+
- User said "explore my codebase", "what prompts do I have", "find prompts", "scan"
|
|
9953
|
+
- Intent is unclear and you need to discover before acting
|
|
9954
|
+
- User wants to understand what's optimizable before committing to a path
|
|
9955
|
+
|
|
9956
|
+
---
|
|
9957
|
+
|
|
9958
|
+
## Prompt vs Agent — taxonomy
|
|
9959
|
+
|
|
9960
|
+
Before running explore, understand what the CLI will return:
|
|
9961
|
+
|
|
9962
|
+
**PROMPT (optimization-eligible)**
|
|
9963
|
+
- Single LLM call: input → LLM → output
|
|
9964
|
+
- Template with \`{variables}\` (single) or \`{{variables}}\` (double brace)
|
|
9965
|
+
- Can be evaluated with G-Eval (input/output pair)
|
|
9966
|
+
- Can be optimized by Metatuner
|
|
9967
|
+
- Appears in \`prompts[]\` in explore output
|
|
9968
|
+
|
|
9969
|
+
**AGENT (NOT directly optimizable)**
|
|
9970
|
+
- Multi-turn loop: input → LLM → tool → LLM → ... → output
|
|
9971
|
+
- Dynamic branching execution
|
|
9972
|
+
- Cannot be optimized like a prompt (no fixed output schema)
|
|
9973
|
+
- Requires agent-level evaluation (not yet available)
|
|
9974
|
+
- Appears in \`agents[]\` in explore output
|
|
9975
|
+
|
|
9976
|
+
### Agent detection heuristics (what \`mutagent explore\` flags)
|
|
9977
|
+
|
|
9978
|
+
| Pattern | Framework | Classification |
|
|
9979
|
+
|---|---|---|
|
|
9980
|
+
| \`new StateGraph(\` / \`StateGraph.compile()\` | LangGraph | agent |
|
|
9981
|
+
| \`new Agent(\` / \`agent.execute()\` | Mastra / custom | agent |
|
|
9982
|
+
| \`AgentExecutor\` / \`createReactAgent\` / \`createToolCallingAgent\` | LangChain | agent |
|
|
9983
|
+
| \`from "openai/agents"\` / \`from "@openai/agents"\` | OpenAI Agents SDK | agent |
|
|
9984
|
+
| \`from "crewai"\` / \`from "autogen"\` | CrewAI / AutoGen | agent |
|
|
9985
|
+
| \`tools: [...]\` + LLM call in a loop | custom | agent |
|
|
9986
|
+
| \`while\` loop + LLM call | custom agent loop | agent |
|
|
9987
|
+
| \`@tool\` decorator (Python) | any | agent |
|
|
9988
|
+
| \`tool_calls\` / \`toolCalls\` property access | any | agent |
|
|
9989
|
+
| Single \`openai.chat()\` or template string | — | prompt |
|
|
9990
|
+
| String with \`{variable}\` / \`{{variable}}\` | — | prompt |
|
|
9991
|
+
|
|
9992
|
+
---
|
|
9993
|
+
|
|
9994
|
+
## Workflow steps
|
|
9995
|
+
|
|
9996
|
+
\`\`\`
|
|
9997
|
+
1. mutagent explore --json
|
|
9998
|
+
→ surfaces: prompts[], agents[], datasets[], markers[]
|
|
9999
|
+
→ show command output to user
|
|
10000
|
+
|
|
10001
|
+
2. Classify results:
|
|
10002
|
+
- prompts[] → optimization-eligible (single-shot, output schema)
|
|
10003
|
+
- agents[] → WIP (multi-turn/tool-calling) — route to [workflows/agents.md](./agents.md)
|
|
10004
|
+
- datasets[] → existing local data (uploadable in optimization workflow)
|
|
10005
|
+
- markers[] → already-uploaded items (show dashboard links)
|
|
10006
|
+
|
|
10007
|
+
3. Note the \`delimiter\` field on each prompt entry:
|
|
10008
|
+
- "single" → {variable} — MutagenT native, no conversion needed
|
|
10009
|
+
- "double" → {{variable}} — framework template, conversion required on upload
|
|
10010
|
+
See [concepts/prompt-variables.md](../concepts/prompt-variables.md) for the conversion rules.
|
|
10011
|
+
|
|
10012
|
+
4. Use AskUserQuestion to present findings and ask which prompts to upload:
|
|
10013
|
+
"Here's what I found in your codebase:
|
|
10014
|
+
- N prompt(s) found: [list files]
|
|
10015
|
+
- N agent(s) found: [list files] (WIP — not optimizable yet)
|
|
10016
|
+
- N dataset(s) found: [list]
|
|
10017
|
+
- N already-uploaded: [list]
|
|
10018
|
+
What would you like to do?"
|
|
10019
|
+
|
|
10020
|
+
5. Route based on user answer:
|
|
10021
|
+
- "optimize this prompt" → load [workflows/optimization.md](./optimization.md)
|
|
10022
|
+
- "add tracing" → load [workflows/tracing.md](./tracing.md)
|
|
10023
|
+
- "tell me about the agent" → load [workflows/agents.md](./agents.md)
|
|
10024
|
+
- "nothing yet" → STOP (read-only complete)
|
|
10025
|
+
\`\`\`
|
|
10026
|
+
|
|
10027
|
+
---
|
|
10028
|
+
|
|
10029
|
+
## Output handling
|
|
10030
|
+
|
|
10031
|
+
After step 1, show the command output to the user before proceeding to classification. Do NOT proceed to step 2 until the user has seen the results.
|
|
10032
|
+
|
|
10033
|
+
---
|
|
10034
|
+
|
|
10035
|
+
## Brace convention note
|
|
10036
|
+
|
|
10037
|
+
\`mutagent explore --json\` surfaces \`delimiter: "single" | "double"\` per discovered prompt. Use this before deciding how to enumerate variables. See [concepts/prompt-variables.md](../concepts/prompt-variables.md) for the full inference contract and conversion rules.
|
|
10038
|
+
|
|
10039
|
+
---
|
|
10040
|
+
|
|
10041
|
+
## Common pitfalls
|
|
10042
|
+
|
|
10043
|
+
- Skipping the classification step → user gets a raw JSON dump instead of a next-action recommendation
|
|
10044
|
+
- Treating \`agents[]\` entries as optimization-eligible → they are NOT; route to [workflows/agents.md](./agents.md)
|
|
10045
|
+
- Auto-proceeding to writes after explore → always confirm with user first
|
|
10046
|
+
- Ignoring the \`delimiter\` field → wrong variable enumeration when uploading a double-brace prompt
|
|
10047
|
+
|
|
10048
|
+
---
|
|
10049
|
+
|
|
10050
|
+
## Cross-references
|
|
10051
|
+
|
|
10052
|
+
- [SKILL.md](../SKILL.md) → 5 rules + journey router
|
|
10053
|
+
- [concepts/prompt-variables.md](../concepts/prompt-variables.md) → \`{foo}\` vs \`{{foo}}\` inference + conversion
|
|
10054
|
+
- [workflows/optimization.md](./optimization.md) → next step after exploration (prompt path)
|
|
10055
|
+
- [workflows/tracing.md](./tracing.md) → next step after exploration (integration path)
|
|
10056
|
+
- [workflows/agents.md](./agents.md) → next step after exploration (agent path)
|
|
10057
|
+
`,
|
|
10058
|
+
"workflows/optimization.md": `---
|
|
10059
|
+
name: mutagent-cli-workflows-optimization
|
|
10060
|
+
description: |
|
|
10061
|
+
Full prompt optimization journey: explore → prompts create → dataset add →
|
|
10062
|
+
evaluation create (guided) → optimize start → watch/status → results → apply.
|
|
10063
|
+
Enforces 5 rules: --json always, --help before first use, user-collected
|
|
10064
|
+
eval criteria, explore-before-modify, cost transparency before optimize.
|
|
10065
|
+
triggers:
|
|
10066
|
+
- "optimize prompt"
|
|
10067
|
+
- "improve prompt"
|
|
10068
|
+
- "tune prompt"
|
|
10069
|
+
- "evaluate prompt"
|
|
10070
|
+
- "upload prompt"
|
|
10071
|
+
- "create evaluation"
|
|
10072
|
+
- "upload dataset"
|
|
10073
|
+
- "run optimizer"
|
|
10074
|
+
- "start optimization"
|
|
10075
|
+
---
|
|
10076
|
+
|
|
10077
|
+
# Workflow — Optimization (Full Journey)
|
|
10078
|
+
|
|
10079
|
+
> **This is the full loop.** Expect 5-10 CLI calls and at least one long-running
|
|
10080
|
+
> optimizer job. Each step requires user confirmation. Never auto-run the full
|
|
10081
|
+
> chain without presenting findings at each gate.
|
|
10082
|
+
|
|
10083
|
+
Read the **5 rules** in [SKILL.md](../SKILL.md) before executing. All 5 rules apply here:
|
|
10084
|
+
- \`--json\` on every command (Rule 1)
|
|
10085
|
+
- \`--help\` before first use of any command (Rule 2)
|
|
10086
|
+
- **NEVER auto-generate eval criteria** — collect from user (Rule 3)
|
|
10087
|
+
- \`mutagent explore --json\` before any write (Rule 4)
|
|
10088
|
+
- \`mutagent usage --json\` before \`optimize start\` (Rule 5)
|
|
10089
|
+
|
|
10090
|
+
---
|
|
10091
|
+
|
|
10092
|
+
## When this workflow applies
|
|
10093
|
+
|
|
10094
|
+
- User said "optimize prompt", "improve prompt", "tune prompt"
|
|
10095
|
+
- User wants to upload a prompt and measure its quality
|
|
10096
|
+
- User wants to run the Metatuner optimizer
|
|
10097
|
+
|
|
10098
|
+
---
|
|
10099
|
+
|
|
10100
|
+
## Required pre-reads (load these before the relevant steps)
|
|
10101
|
+
|
|
10102
|
+
| Step | Pre-read | Why |
|
|
10103
|
+
|---|---|---|
|
|
10104
|
+
| Before \`prompts create\` | [concepts/prompt-variables.md](../concepts/prompt-variables.md) | Brace convention — single \`{var}\` vs double \`{{var}}\` affects how variables are parsed |
|
|
10105
|
+
| Before \`evaluation create --guided\` | [concepts/eval-criteria.md](../concepts/eval-criteria.md) | INPUT MVC + OUTPUT Standards — 3-tier rubric format |
|
|
10106
|
+
|
|
10107
|
+
---
|
|
10108
|
+
|
|
10109
|
+
## Directive chain
|
|
10110
|
+
|
|
10111
|
+
\`\`\`
|
|
10112
|
+
explore → prompts create → dataset add → evaluation create --guided
|
|
10113
|
+
→ [Use AskUserQuestion to collect rubrics from the user for each field]
|
|
10114
|
+
→ evaluation create -d '<json>'
|
|
10115
|
+
→ usage check
|
|
10116
|
+
→ [Use AskUserQuestion to confirm optimization cost with user]
|
|
10117
|
+
→ optimize start
|
|
10118
|
+
→ optimize status (poll)
|
|
10119
|
+
→ optimize results
|
|
10120
|
+
→ [Use AskUserQuestion to present scorecard: Apply / View Diff / Reject]
|
|
10121
|
+
→ On Apply: Edit source file
|
|
10122
|
+
\`\`\`
|
|
10123
|
+
|
|
10124
|
+
---
|
|
10125
|
+
|
|
10126
|
+
## Full workflow steps
|
|
10127
|
+
|
|
10128
|
+
\`\`\`
|
|
10129
|
+
1. mutagent explore --json
|
|
10130
|
+
→ find candidate prompts in codebase
|
|
10131
|
+
→ show command output to user
|
|
10132
|
+
→ confirm with user: "Which prompt would you like to optimize?"
|
|
10133
|
+
|
|
10134
|
+
2. mutagent prompts --help
|
|
10135
|
+
mutagent prompts create --help
|
|
10136
|
+
→ read flags before using (Rule 2)
|
|
10137
|
+
|
|
10138
|
+
3. Load [concepts/prompt-variables.md](../concepts/prompt-variables.md)
|
|
10139
|
+
→ determine if prompt uses {single} or {{double}} braces
|
|
10140
|
+
→ if double-brace: warn user about conversion requirement
|
|
10141
|
+
|
|
10142
|
+
4. mutagent prompts create --name <name> [--system-file / --raw-file] --json
|
|
10143
|
+
→ show command output to user
|
|
10144
|
+
→ record promptId from response
|
|
10145
|
+
|
|
10146
|
+
5. mutagent prompts dataset add --help
|
|
10147
|
+
→ read flags before using
|
|
10148
|
+
|
|
10149
|
+
6. mutagent prompts dataset add <promptId> -d '[...]' --name "<name>" --json
|
|
10150
|
+
→ upload dataset rows (input/output pairs)
|
|
10151
|
+
→ show command output to user
|
|
10152
|
+
→ record datasetId
|
|
10153
|
+
|
|
10154
|
+
7. Load [concepts/eval-criteria.md](../concepts/eval-criteria.md)
|
|
10155
|
+
→ understand INPUT-param (MVC) vs OUTPUT-param (Standards) scope
|
|
10156
|
+
|
|
10157
|
+
8. mutagent prompts evaluation create <promptId> --guided --json
|
|
10158
|
+
→ the CLI provides a list of fields, each needing a rubric
|
|
10159
|
+
→ follow the CLI's next-step guidance in the output
|
|
10160
|
+
→ for EVERY field listed (INPUT scope first, then OUTPUT):
|
|
10161
|
+
- ask the user the provided question for that field
|
|
10162
|
+
- wait for user response
|
|
10163
|
+
- do NOT skip any field
|
|
10164
|
+
- do NOT auto-generate any answer
|
|
10165
|
+
→ collect at minimum: one INPUT criterion per {variable}, one OUTPUT criterion
|
|
10166
|
+
|
|
10167
|
+
9. mutagent prompts evaluation create <promptId> -d '<json>' --json
|
|
10168
|
+
→ upload the criteria collected in step 8
|
|
10169
|
+
→ show command output to user
|
|
10170
|
+
→ record evaluationId
|
|
10171
|
+
|
|
10172
|
+
10. mutagent usage --json
|
|
10173
|
+
→ show usage/quota to user
|
|
10174
|
+
→ confirm with user: "This optimization will use N iterations (~X min each).
|
|
10175
|
+
You have Y remaining. Proceed?"
|
|
10176
|
+
→ STOP if user declines
|
|
10177
|
+
|
|
10178
|
+
11. mutagent prompts optimize start <promptId> \\
|
|
10179
|
+
--dataset <datasetId> \\
|
|
10180
|
+
--evaluation <evaluationId> \\
|
|
10181
|
+
--max-iterations 1 \\
|
|
10182
|
+
--json
|
|
10183
|
+
→ NEVER set --max-iterations > 1 without explicit user consent
|
|
10184
|
+
→ record jobId from response
|
|
10185
|
+
|
|
10186
|
+
12. mutagent prompts optimize status <jobId> --json
|
|
10187
|
+
→ poll until status = "completed" or "failed"
|
|
10188
|
+
→ show progress to user
|
|
10189
|
+
|
|
10190
|
+
13. mutagent prompts optimize results <jobId> --json
|
|
10191
|
+
→ ALWAYS show before/after scorecard to user
|
|
10192
|
+
|
|
10193
|
+
14. Confirm with user: "Here's the before/after scorecard. What would you like to do?
|
|
10194
|
+
(a) Apply — update the prompt in your source file
|
|
10195
|
+
(b) View diff first
|
|
10196
|
+
(c) Reject — keep the original"
|
|
10197
|
+
|
|
10198
|
+
15. On Apply (a): Edit the prompt in the user's source file
|
|
10199
|
+
→ replace old prompt text with optimized version
|
|
10200
|
+
→ if double-brace codebase: convert {variable} back to {{variable}}
|
|
10201
|
+
→ confirm with user before saving
|
|
10202
|
+
\`\`\`
|
|
10203
|
+
|
|
10204
|
+
---
|
|
10205
|
+
|
|
10206
|
+
## Cost control
|
|
10207
|
+
|
|
10208
|
+
- Default \`--max-iterations 1\` is the only value you may use without explicit consent.
|
|
10209
|
+
- If user requests more: confirm the number with user → confirm the cost implication.
|
|
10210
|
+
- Each iteration = one full G-Eval run over the dataset × LLM calls. This costs real money.
|
|
10211
|
+
|
|
10212
|
+
---
|
|
10213
|
+
|
|
10214
|
+
## Apply / Reject rules
|
|
10215
|
+
|
|
10216
|
+
- **Apply**: edit the source file with the optimized prompt. If the codebase used \`{{double}}\` braces, convert the optimized \`{single}\` brace output back to \`{{double}}\` before writing. See [concepts/prompt-variables.md](../concepts/prompt-variables.md) → Conversion.
|
|
10217
|
+
- **Reject**: no file changes. Record the jobId in \`.mutagent/mutation-context.md\` for future reference.
|
|
10218
|
+
- **View diff**: show a unified diff of old vs new prompt text before asking again.
|
|
10219
|
+
|
|
10220
|
+
---
|
|
10221
|
+
|
|
10222
|
+
## Common pitfalls
|
|
10223
|
+
|
|
10224
|
+
- Running \`optimize start\` before \`evaluation create\` → optimizer has no scoring signal
|
|
10225
|
+
- Mixing INPUT and OUTPUT criteria in the same rubric → vague scores
|
|
10226
|
+
- Applying results without showing the before/after scorecard first
|
|
10227
|
+
- Forgetting to convert \`{single}\` back to \`{{double}}\` after apply in double-brace codebases
|
|
10228
|
+
- Starting with \`--max-iterations 3\` without consent
|
|
10229
|
+
|
|
10230
|
+
---
|
|
10231
|
+
|
|
10232
|
+
## Guided Dataset Creation
|
|
10233
|
+
|
|
10234
|
+
When no local dataset exists, use the guided mode to curate high-quality test data:
|
|
10235
|
+
|
|
10236
|
+
\`\`\`
|
|
10237
|
+
mutagent prompts dataset add <prompt-id> --guided --json
|
|
10238
|
+
\`\`\`
|
|
10239
|
+
|
|
10240
|
+
The CLI analyzes the prompt's inputSchema + outputSchema and returns:
|
|
10241
|
+
- **Suggested categories**: edge cases, hard cases, representative cases
|
|
10242
|
+
- **Per-field questions**: what values, what edge cases, what correct output looks like
|
|
10243
|
+
- **Template item**: showing the expected shape for each dataset entry
|
|
10244
|
+
- **Priority rule**: hard cases that expose prompt weaknesses > easy cases that always pass
|
|
10245
|
+
|
|
10246
|
+
Collect answers from the user, then construct 5-10 dataset items covering all categories.
|
|
10247
|
+
Ensure at least 2 hard/edge cases per category. Then upload:
|
|
10248
|
+
|
|
10249
|
+
\`\`\`
|
|
10250
|
+
mutagent prompts dataset add <prompt-id> -d '<constructed-json>' --name '<name>' --json
|
|
10251
|
+
\`\`\`
|
|
10252
|
+
|
|
10253
|
+
---
|
|
10254
|
+
|
|
10255
|
+
## Cross-references
|
|
10256
|
+
|
|
10257
|
+
- [SKILL.md](../SKILL.md) → 5 rules + journey router
|
|
10258
|
+
- [concepts/prompt-variables.md](../concepts/prompt-variables.md) → brace convention + conversion (critical for steps 3 and 15)
|
|
10259
|
+
- [concepts/eval-criteria.md](../concepts/eval-criteria.md) → INPUT MVC + OUTPUT Standards + 3-tier rubric (critical for steps 7-8)
|
|
10260
|
+
- [workflows/exploration.md](./exploration.md) → step 1 of this workflow
|
|
10261
|
+
- [workflows/tracing.md](./tracing.md) → parallel or follow-up path
|
|
10262
|
+
`,
|
|
10263
|
+
"workflows/tracing.md": `---
|
|
10264
|
+
name: mutagent-cli-workflows-tracing
|
|
10265
|
+
description: |
|
|
10266
|
+
Framework integration workflow. Adds MutagenT tracing/observability to the
|
|
10267
|
+
user's codebase. Non-destructive (append-only), fastest first-value path.
|
|
10268
|
+
Detects framework via \`mutagent explore\`, generates snippet via
|
|
10269
|
+
\`mutagent integrate\`, applies via Edit tool, verifies via traces list.
|
|
10270
|
+
triggers:
|
|
10271
|
+
- "add tracing"
|
|
10272
|
+
- "add observability"
|
|
10273
|
+
- "integrate framework"
|
|
10274
|
+
- "integrate mutagent"
|
|
10275
|
+
- "add mutagent to my code"
|
|
10276
|
+
- "instrument my prompts"
|
|
10277
|
+
- "trace my prompts"
|
|
10278
|
+
---
|
|
10279
|
+
|
|
10280
|
+
# Workflow — Tracing (Framework Integration)
|
|
10281
|
+
|
|
10282
|
+
> **Scope**: read + append-only on user code. Never modify existing business
|
|
10283
|
+
> logic — only add tracing imports and decorators.
|
|
10284
|
+
|
|
10285
|
+
Read the **5 rules** in [SKILL.md](../SKILL.md) before executing. Key reminders:
|
|
10286
|
+
- \`--json\` on every command
|
|
10287
|
+
- \`<command> --help\` before first use
|
|
10288
|
+
- Explore before modify (Rule 4)
|
|
10289
|
+
- Show command output to user after every mutation
|
|
10290
|
+
|
|
10291
|
+
---
|
|
10292
|
+
|
|
10293
|
+
## When this workflow applies
|
|
10294
|
+
|
|
10295
|
+
- User said "add tracing", "add observability", "integrate \\<framework\\>"
|
|
10296
|
+
- User wants to see their prompts captured in the MutagenT dashboard
|
|
10297
|
+
- Fastest path to first value — prefer this before suggesting optimization
|
|
10298
|
+
|
|
10299
|
+
---
|
|
10300
|
+
|
|
10301
|
+
## Supported frameworks
|
|
10302
|
+
|
|
10303
|
+
\`mutagent integrate --help\` is the authoritative list. Common entries:
|
|
10304
|
+
|
|
10305
|
+
| Framework | Signal in codebase |
|
|
10306
|
+
|---|---|
|
|
10307
|
+
| LangChain | \`from "langchain"\`, \`PromptTemplate\`, \`ChatPromptTemplate\` |
|
|
10308
|
+
| LangGraph | \`from "@langchain/langgraph"\`, \`StateGraph\` |
|
|
10309
|
+
| OpenAI SDK | \`import OpenAI\`, \`openai.chat.completions.create\` |
|
|
10310
|
+
| Vercel AI SDK | \`import { generateText }\` from \`"ai"\` |
|
|
10311
|
+
| Mastra | \`from "@mastra/core"\` |
|
|
10312
|
+
| Custom / raw | any string template with \`{variable}\` |
|
|
10313
|
+
|
|
10314
|
+
---
|
|
10315
|
+
|
|
10316
|
+
## Workflow steps
|
|
10317
|
+
|
|
10318
|
+
\`\`\`
|
|
10319
|
+
1. mutagent explore --json
|
|
10320
|
+
→ detect which framework is in use
|
|
10321
|
+
→ show command output to user
|
|
10322
|
+
|
|
10323
|
+
2. mutagent integrate --help
|
|
10324
|
+
→ read the available frameworks and flags (ALWAYS before step 3)
|
|
10325
|
+
|
|
10326
|
+
3. mutagent integrate <framework> --json
|
|
10327
|
+
→ get the integration snippet
|
|
10328
|
+
|
|
10329
|
+
4. Use AskUserQuestion to confirm before applying code changes: "I'll add the tracing snippet to <file>. Proceed?"
|
|
10330
|
+
→ show the snippet preview before applying
|
|
10331
|
+
|
|
10332
|
+
5. Apply snippet via Edit tool
|
|
10333
|
+
→ code change happens here — append imports + decorators only
|
|
10334
|
+
→ never touch existing business logic
|
|
10335
|
+
|
|
10336
|
+
6. mutagent traces list --json
|
|
10337
|
+
→ verify integration: check traces count > 0
|
|
10338
|
+
→ show results and dashboard link to user
|
|
10339
|
+
\`\`\`
|
|
10340
|
+
|
|
10341
|
+
---
|
|
10342
|
+
|
|
10343
|
+
## Scope guard
|
|
10344
|
+
|
|
10345
|
+
This path is **READ + APPEND-ONLY** on the user's code:
|
|
10346
|
+
- ✓ Add import at top of file
|
|
10347
|
+
- ✓ Wrap existing function call with tracing decorator
|
|
10348
|
+
- ✗ Rename variables
|
|
10349
|
+
- ✗ Refactor logic
|
|
10350
|
+
- ✗ Remove existing code
|
|
10351
|
+
|
|
10352
|
+
If the integration snippet requires a significant rewrite, confirm scope with the user before proceeding.
|
|
10353
|
+
|
|
10354
|
+
---
|
|
10355
|
+
|
|
10356
|
+
## Post-integration state
|
|
10357
|
+
|
|
10358
|
+
After step 6:
|
|
10359
|
+
- Update \`.mutagent/mutation-context.md\` with the integration marker
|
|
10360
|
+
- Show the dashboard link from traces output so user can verify traces in UI
|
|
10361
|
+
- If user wants to optimize the traced prompt → route to [workflows/optimization.md](./optimization.md)
|
|
10362
|
+
|
|
10363
|
+
---
|
|
10364
|
+
|
|
10365
|
+
## Common pitfalls
|
|
10366
|
+
|
|
10367
|
+
- Skipping step 1 and guessing the framework → let \`explore\` detect it
|
|
10368
|
+
- Forgetting step 6 → user has no proof the integration works
|
|
10369
|
+
- Editing files outside the \`integrate --json\` snippet block
|
|
10370
|
+
- Not showing command results to the user after mutations
|
|
10371
|
+
|
|
10372
|
+
---
|
|
10373
|
+
|
|
10374
|
+
## Cross-references
|
|
10375
|
+
|
|
10376
|
+
- [SKILL.md](../SKILL.md) → 5 rules + journey router
|
|
10377
|
+
- [workflows/exploration.md](./exploration.md) → step 1 of this workflow
|
|
10378
|
+
- [workflows/optimization.md](./optimization.md) → natural next step after tracing
|
|
10379
|
+
- [concepts/prompt-variables.md](../concepts/prompt-variables.md) → variable inference for traced prompts
|
|
10380
|
+
`
|
|
10381
|
+
};
|
|
10382
|
+
var SKILL_CONTENT = SKILL_FILES["SKILL.md"] ?? "";
|
|
10383
|
+
|
|
10384
|
+
// src/commands/skills.ts
|
|
10385
|
+
function findRepoRoot() {
|
|
10386
|
+
try {
|
|
10387
|
+
return execSync4("git rev-parse --show-toplevel", {
|
|
10388
|
+
encoding: "utf-8",
|
|
10389
|
+
stdio: ["pipe", "pipe", "pipe"]
|
|
10390
|
+
}).trim();
|
|
10391
|
+
} catch {
|
|
10392
|
+
return process.cwd();
|
|
10393
|
+
}
|
|
10394
|
+
}
|
|
10395
|
+
var SKILL_DIR = ".claude/skills/mutagent-cli";
|
|
10396
|
+
var SKILL_ENTRY_FILE = "SKILL.md";
|
|
10397
|
+
function assertSkillFilesValid() {
|
|
10398
|
+
if (Object.keys(SKILL_FILES).length === 0) {
|
|
10399
|
+
throw new Error("SKILL_FILES is missing or empty. This indicates a broken build: " + "scripts/sync-skill.ts must regenerate src/generated/skill-content.ts " + "from .claude/skills/mutagent-cli/ before build/test. " + "Run `bun run sync-skill` to debug.");
|
|
10400
|
+
}
|
|
10401
|
+
const entry = SKILL_FILES[SKILL_ENTRY_FILE];
|
|
10402
|
+
if (typeof entry !== "string" || entry.trim().length === 0) {
|
|
10403
|
+
throw new Error(`SKILL_FILES["${SKILL_ENTRY_FILE}"] is missing or empty. ` + "Check .claude/skills/mutagent-cli/SKILL.md and run `bun run sync-skill`.");
|
|
10404
|
+
}
|
|
10405
|
+
if (!entry.startsWith("---")) {
|
|
10406
|
+
throw new Error(`SKILL_FILES["${SKILL_ENTRY_FILE}"] is malformed: expected YAML frontmatter ("---") at start.`);
|
|
10407
|
+
}
|
|
10408
|
+
}
|
|
10409
|
+
function getSkillFiles() {
|
|
10410
|
+
assertSkillFilesValid();
|
|
10411
|
+
return SKILL_FILES;
|
|
10412
|
+
}
|
|
10413
|
+
function createSkillsCommand() {
|
|
10414
|
+
const skills = new Command17("skills").description("Manage MutagenT CLI skills for coding agents");
|
|
10415
|
+
skills.command("install").description("Install MutagenT CLI skill for Claude Code").addHelpText("after", `
|
|
10416
|
+
Examples:
|
|
10417
|
+
${chalk36.dim("$")} mutagent skills install
|
|
10418
|
+
|
|
10419
|
+
This creates a Claude Code skill at .claude/skills/mutagent-cli/SKILL.md
|
|
10420
|
+
that teaches coding agents how to use the MutagenT CLI effectively.
|
|
10421
|
+
`).action((_options, cmd) => {
|
|
10422
|
+
const parentCmd = cmd.parent?.parent;
|
|
10423
|
+
const isJson = parentCmd ? getJsonFlag(parentCmd) : false;
|
|
10424
|
+
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
10425
|
+
const repoRoot = findRepoRoot();
|
|
10426
|
+
const skillDir = join7(repoRoot, SKILL_DIR);
|
|
10427
|
+
const files = getSkillFiles();
|
|
10428
|
+
const writtenFiles = [];
|
|
10429
|
+
let totalBytes = 0;
|
|
10430
|
+
const sortedKeys = Object.keys(files).sort((a, b) => {
|
|
10431
|
+
if (a === SKILL_ENTRY_FILE)
|
|
10432
|
+
return -1;
|
|
10433
|
+
if (b === SKILL_ENTRY_FILE)
|
|
10434
|
+
return 1;
|
|
10435
|
+
return a.localeCompare(b);
|
|
10436
|
+
});
|
|
10437
|
+
for (const relPath of sortedKeys) {
|
|
10438
|
+
const destPath = join7(skillDir, relPath);
|
|
10439
|
+
const parentDir = dirname(destPath);
|
|
10440
|
+
if (!existsSync12(parentDir)) {
|
|
10441
|
+
mkdirSync4(parentDir, { recursive: true });
|
|
10442
|
+
}
|
|
10443
|
+
const raw = files[relPath] ?? "";
|
|
10444
|
+
const finalContent = raw.endsWith(`
|
|
10445
|
+
`) ? raw : `${raw}
|
|
10446
|
+
`;
|
|
10447
|
+
writeFileSync5(destPath, finalContent, "utf-8");
|
|
10448
|
+
writtenFiles.push({ path: destPath, bytes: finalContent.length });
|
|
10449
|
+
totalBytes += finalContent.length;
|
|
10450
|
+
}
|
|
10451
|
+
if (isJson) {
|
|
10452
|
+
output.output({
|
|
10453
|
+
installed: true,
|
|
10454
|
+
name: "mutagent-cli",
|
|
10455
|
+
skillDir,
|
|
10456
|
+
files: writtenFiles,
|
|
10457
|
+
totalBytes
|
|
10458
|
+
});
|
|
10459
|
+
} else {
|
|
10460
|
+
output.success(`Installed MutagenT CLI skill`);
|
|
10461
|
+
console.log(` ${chalk36.dim("Dir:")} ${skillDir}`);
|
|
10462
|
+
console.log("");
|
|
10463
|
+
for (let i = 0;i < writtenFiles.length; i++) {
|
|
10464
|
+
const entry = writtenFiles[i];
|
|
10465
|
+
if (!entry)
|
|
10466
|
+
continue;
|
|
10467
|
+
const relPath = sortedKeys[i] ?? "";
|
|
10468
|
+
const isLast = i === writtenFiles.length - 1;
|
|
10469
|
+
const prefix = isLast ? "└─" : "├─";
|
|
10470
|
+
console.log(` ${chalk36.dim(prefix)} ${relPath} ${chalk36.dim(`(${String(entry.bytes)} bytes)`)}`);
|
|
10471
|
+
}
|
|
10472
|
+
console.log("");
|
|
10473
|
+
console.log(` ${chalk36.dim(`${String(sortedKeys.length)} file(s), ${String(totalBytes)} bytes total.`)}`);
|
|
10474
|
+
console.log(` ${chalk36.dim("This skill teaches coding agents how to use the MutagenT CLI.")}`);
|
|
10475
|
+
console.log(` ${chalk36.dim("It will be automatically loaded by Claude Code when relevant triggers match.")}`);
|
|
10476
|
+
}
|
|
10477
|
+
});
|
|
10478
|
+
return skills;
|
|
10479
|
+
}
|
|
10480
|
+
|
|
10481
|
+
// src/commands/usage.ts
|
|
10482
|
+
init_config();
|
|
10483
|
+
import { Command as Command18 } from "commander";
|
|
10484
|
+
import chalk37 from "chalk";
|
|
10485
|
+
init_errors();
|
|
10486
|
+
init_sdk_client();
|
|
10487
|
+
var PROVIDERS_URL = "https://app.mutagent.io/settings/providers";
|
|
10488
|
+
function createUsageCommand() {
|
|
10489
|
+
const usage = new Command18("usage").description("Show resource counts (prompts, datasets, evaluations, optimizations, experiments)").addHelpText("after", `
|
|
10490
|
+
Examples:
|
|
10491
|
+
${chalk37.dim("$")} mutagent usage
|
|
10492
|
+
${chalk37.dim("$")} mutagent usage --json
|
|
10493
|
+
`);
|
|
10494
|
+
usage.action(async () => {
|
|
10495
|
+
const isJson = getJsonFlag(usage);
|
|
9095
10496
|
const output = new OutputFormatter(isJson ? "json" : "table");
|
|
9096
10497
|
try {
|
|
9097
10498
|
const apiKey = getApiKey();
|
|
@@ -9143,17 +10544,17 @@ Examples:
|
|
|
9143
10544
|
});
|
|
9144
10545
|
} else {
|
|
9145
10546
|
console.log("");
|
|
9146
|
-
console.log(
|
|
9147
|
-
console.log(
|
|
10547
|
+
console.log(chalk37.bold("\uD83D\uDCCA MutagenT Usage"));
|
|
10548
|
+
console.log(chalk37.dim("─".repeat(45)));
|
|
9148
10549
|
console.log("");
|
|
9149
|
-
console.log(
|
|
9150
|
-
console.log(` Prompts: ${
|
|
9151
|
-
console.log(` Datasets: ${
|
|
9152
|
-
console.log(` Evaluations: ${
|
|
9153
|
-
console.log(` Optimizations: ${
|
|
9154
|
-
console.log(` Experiments: ${
|
|
10550
|
+
console.log(chalk37.bold("Resources:"));
|
|
10551
|
+
console.log(` Prompts: ${chalk37.cyan(String(promptCount))}`);
|
|
10552
|
+
console.log(` Datasets: ${chalk37.cyan(String(datasetCount))}`);
|
|
10553
|
+
console.log(` Evaluations: ${chalk37.cyan(String(evaluationCount))}`);
|
|
10554
|
+
console.log(` Optimizations: ${chalk37.cyan(String(optimizationCount))}`);
|
|
10555
|
+
console.log(` Experiments: ${chalk37.cyan(String(experimentCount))}`);
|
|
9155
10556
|
console.log("");
|
|
9156
|
-
console.log(` Providers: ${
|
|
10557
|
+
console.log(` Providers: ${chalk37.underline(PROVIDERS_URL)}`);
|
|
9157
10558
|
console.log("");
|
|
9158
10559
|
}
|
|
9159
10560
|
} catch (error) {
|
|
@@ -9441,11 +10842,11 @@ Claude Code Session Telemetry:
|
|
|
9441
10842
|
|
|
9442
10843
|
// src/commands/feedback.ts
|
|
9443
10844
|
import { Command as Command20 } from "commander";
|
|
9444
|
-
import
|
|
10845
|
+
import chalk38 from "chalk";
|
|
9445
10846
|
init_errors();
|
|
9446
10847
|
init_config();
|
|
9447
10848
|
import { readFileSync as readFileSync11 } from "fs";
|
|
9448
|
-
import { join as join9, dirname } from "path";
|
|
10849
|
+
import { join as join9, dirname as dirname2 } from "path";
|
|
9449
10850
|
import { fileURLToPath } from "url";
|
|
9450
10851
|
var VALID_CATEGORIES = ["bug", "feature", "improvement", "praise"];
|
|
9451
10852
|
function getCliVersion() {
|
|
@@ -9453,7 +10854,7 @@ function getCliVersion() {
|
|
|
9453
10854
|
return process.env.CLI_VERSION;
|
|
9454
10855
|
}
|
|
9455
10856
|
try {
|
|
9456
|
-
const __dirname2 =
|
|
10857
|
+
const __dirname2 = dirname2(fileURLToPath(import.meta.url));
|
|
9457
10858
|
const pkgPath = join9(__dirname2, "..", "..", "package.json");
|
|
9458
10859
|
const pkg = JSON.parse(readFileSync11(pkgPath, "utf-8"));
|
|
9459
10860
|
return pkg.version ?? "0.1.1";
|
|
@@ -9495,12 +10896,12 @@ async function postToServer(payload, endpoint, apiKey, workspaceId, organization
|
|
|
9495
10896
|
}
|
|
9496
10897
|
function createFeedbackCommand() {
|
|
9497
10898
|
const feedback = new Command20("feedback").description("Send product feedback to MutagenT").addHelpText("after", `
|
|
9498
|
-
${
|
|
9499
|
-
${
|
|
9500
|
-
${
|
|
9501
|
-
${
|
|
10899
|
+
${chalk38.bold("Examples:")}
|
|
10900
|
+
${chalk38.cyan('mutagent feedback send -m "Great optimization results!"')}
|
|
10901
|
+
${chalk38.cyan('mutagent feedback send -m "CLI crashed on export" --category bug')}
|
|
10902
|
+
${chalk38.cyan('mutagent feedback send -m "Need batch operations" --category feature --json')}
|
|
9502
10903
|
|
|
9503
|
-
${
|
|
10904
|
+
${chalk38.yellow("AI Agent (MANDATORY):")}
|
|
9504
10905
|
ALWAYS use --json: mutagent feedback send -m "..." --category improvement --json
|
|
9505
10906
|
Use this command to report bugs, request features, or share UX feedback.
|
|
9506
10907
|
`).action(() => {
|
|
@@ -9511,18 +10912,18 @@ ${chalk37.yellow("AI Agent (MANDATORY):")}
|
|
|
9511
10912
|
}
|
|
9512
10913
|
function registerFeedbackSend(feedback) {
|
|
9513
10914
|
feedback.command("send").description("Send feedback about the MutagenT platform").requiredOption("-m, --message <text>", "Feedback message").option("--category <type>", `Feedback category: ${VALID_CATEGORIES.join(", ")}`, "improvement").option("--session <id>", "Link feedback to a specific session").addHelpText("after", `
|
|
9514
|
-
${
|
|
9515
|
-
${
|
|
9516
|
-
${
|
|
9517
|
-
${
|
|
9518
|
-
|
|
9519
|
-
${
|
|
9520
|
-
${
|
|
9521
|
-
${
|
|
9522
|
-
${
|
|
9523
|
-
${
|
|
9524
|
-
|
|
9525
|
-
${
|
|
10915
|
+
${chalk38.bold("Examples:")}
|
|
10916
|
+
${chalk38.dim("$")} mutagent feedback send -m "The optimization UX could show progress better"
|
|
10917
|
+
${chalk38.dim("$")} mutagent feedback send -m "CLI errored on traces export" --category bug
|
|
10918
|
+
${chalk38.dim("$")} mutagent feedback send -m "Love the guided eval!" --category praise --json
|
|
10919
|
+
|
|
10920
|
+
${chalk38.bold("Categories:")}
|
|
10921
|
+
${chalk38.bold("bug")} Something is broken or not working as expected
|
|
10922
|
+
${chalk38.bold("feature")} Request a new capability
|
|
10923
|
+
${chalk38.bold("improvement")} Suggest a UX or workflow enhancement (default)
|
|
10924
|
+
${chalk38.bold("praise")} Share what you love about the platform
|
|
10925
|
+
|
|
10926
|
+
${chalk38.yellow("AI Agent (MANDATORY):")}
|
|
9526
10927
|
ALWAYS use --json: mutagent feedback send -m "..." --json
|
|
9527
10928
|
Auto-captured context (CLI version, platform, node version) is included automatically.
|
|
9528
10929
|
`).action(async (options) => {
|
|
@@ -9579,7 +10980,7 @@ if (process.env.CLI_VERSION) {
|
|
|
9579
10980
|
cliVersion = process.env.CLI_VERSION;
|
|
9580
10981
|
} else {
|
|
9581
10982
|
try {
|
|
9582
|
-
const __dirname2 =
|
|
10983
|
+
const __dirname2 = dirname3(fileURLToPath2(import.meta.url));
|
|
9583
10984
|
const pkgPath = join10(__dirname2, "..", "..", "package.json");
|
|
9584
10985
|
const pkg = JSON.parse(readFileSync12(pkgPath, "utf-8"));
|
|
9585
10986
|
cliVersion = pkg.version ?? cliVersion;
|
|
@@ -9594,90 +10995,100 @@ program.name("mutagent").description(`MutagenT CLI - AI-native prompt optimizati
|
|
|
9594
10995
|
showGlobalOptions: true
|
|
9595
10996
|
});
|
|
9596
10997
|
program.addHelpText("after", `
|
|
9597
|
-
${
|
|
9598
|
-
|
|
9599
|
-
|
|
9600
|
-
|
|
9601
|
-
${
|
|
9602
|
-
|
|
9603
|
-
|
|
9604
|
-
mutagent
|
|
9605
|
-
mutagent
|
|
9606
|
-
mutagent
|
|
9607
|
-
mutagent
|
|
9608
|
-
mutagent
|
|
9609
|
-
|
|
9610
|
-
mutagent
|
|
9611
|
-
|
|
9612
|
-
mutagent prompts
|
|
9613
|
-
|
|
9614
|
-
mutagent prompts
|
|
9615
|
-
|
|
9616
|
-
|
|
9617
|
-
mutagent prompts
|
|
9618
|
-
|
|
9619
|
-
mutagent prompts evaluation
|
|
9620
|
-
|
|
9621
|
-
mutagent prompts
|
|
9622
|
-
|
|
9623
|
-
mutagent prompts optimize
|
|
9624
|
-
|
|
9625
|
-
mutagent
|
|
9626
|
-
|
|
9627
|
-
|
|
9628
|
-
mutagent
|
|
9629
|
-
|
|
9630
|
-
mutagent
|
|
9631
|
-
|
|
9632
|
-
${
|
|
9633
|
-
|
|
9634
|
-
|
|
9635
|
-
|
|
9636
|
-
|
|
9637
|
-
|
|
9638
|
-
${
|
|
9639
|
-
|
|
9640
|
-
|
|
9641
|
-
|
|
9642
|
-
|
|
9643
|
-
|
|
10998
|
+
${AGENTS_WIP_BANNER}
|
|
10999
|
+
|
|
11000
|
+
${chalk39.yellow("Non-Interactive Mode (CI/CD & Coding Agents):")}
|
|
11001
|
+
export MUTAGENT_API_KEY=mt_... ${chalk39.dim("or")} --api-key mt_...
|
|
11002
|
+
--json ${chalk39.dim("for structured output")} --non-interactive ${chalk39.dim("to disable prompts")}
|
|
11003
|
+
|
|
11004
|
+
${chalk39.yellow("Command Navigation:")}
|
|
11005
|
+
mutagent login ${chalk39.dim("Login (browser OAuth — recommended)")}
|
|
11006
|
+
mutagent auth status ${chalk39.dim("Check auth + workspace")}
|
|
11007
|
+
mutagent init ${chalk39.dim("Initialize project (.mutagentrc.json)")}
|
|
11008
|
+
mutagent explore ${chalk39.dim("Discover prompts in codebase")}
|
|
11009
|
+
mutagent workspaces list --json ${chalk39.dim("List workspaces (verify ID)")}
|
|
11010
|
+
mutagent config set workspace <id> ${chalk39.dim("Set active workspace")}
|
|
11011
|
+
mutagent usage --json ${chalk39.dim("Show account usage + provider status")}
|
|
11012
|
+
|
|
11013
|
+
mutagent prompts create --help ${chalk39.dim("Upload prompt (read help first!)")}
|
|
11014
|
+
mutagent prompts list --json ${chalk39.dim("List prompts")}
|
|
11015
|
+
mutagent prompts get <id> --json ${chalk39.dim("Full prompt details + schemas")}
|
|
11016
|
+
|
|
11017
|
+
mutagent prompts dataset add --help ${chalk39.dim("Upload dataset (read help first!)")}
|
|
11018
|
+
mutagent prompts dataset list <id> ${chalk39.dim("List datasets")}
|
|
11019
|
+
|
|
11020
|
+
mutagent prompts evaluation create --help ${chalk39.dim("Create eval (read help first!)")}
|
|
11021
|
+
mutagent prompts evaluation create <id> --guided --json ${chalk39.dim("Guided eval workflow")}
|
|
11022
|
+
mutagent prompts evaluation list <id> --json ${chalk39.dim("List evaluations")}
|
|
11023
|
+
|
|
11024
|
+
mutagent prompts optimize start --help ${chalk39.dim("Run optimization (read help first!)")}
|
|
11025
|
+
mutagent prompts optimize status <job-id> ${chalk39.dim("Poll progress")}
|
|
11026
|
+
mutagent prompts optimize results <job-id> ${chalk39.dim("View scorecard")}
|
|
11027
|
+
|
|
11028
|
+
mutagent feedback send -m "..." ${chalk39.dim("Send product feedback")}
|
|
11029
|
+
mutagent feedback send -m "..." --category bug ${chalk39.dim("Report a bug")}
|
|
11030
|
+
|
|
11031
|
+
mutagent integrate <framework> ${chalk39.dim("Framework integration guide")}
|
|
11032
|
+
mutagent hooks --help ${chalk39.dim("Hook setup for Claude Code telemetry")}
|
|
11033
|
+
mutagent playground run <id> --input '{...}' ${chalk39.dim("Quick test")}
|
|
11034
|
+
|
|
11035
|
+
${chalk39.yellow("★ Workflow: Framework Integration (Tracing):")}
|
|
11036
|
+
1. mutagent explore ${chalk39.dim("← discover prompts/agents in codebase")}
|
|
11037
|
+
2. mutagent integrate <framework> ${chalk39.dim("← get integration instructions")}
|
|
11038
|
+
3. Apply tracing code to your codebase ${chalk39.dim("← follow the guide output")}
|
|
11039
|
+
4. mutagent traces list --json ${chalk39.dim("← verify traces are arriving")}
|
|
11040
|
+
|
|
11041
|
+
${chalk39.yellow("★ Workflow: Evaluate → Optimize:")}
|
|
11042
|
+
1. mutagent prompts create --help ${chalk39.dim("← read help")}
|
|
11043
|
+
2. mutagent prompts create ... --json ${chalk39.dim("← upload prompt with {variables} + inputSchema")}
|
|
11044
|
+
3. mutagent prompts dataset add --help ${chalk39.dim("← read help")}
|
|
11045
|
+
4. mutagent prompts dataset add <id> ... --json ${chalk39.dim("← upload dataset")}
|
|
11046
|
+
5. mutagent prompts evaluation create <id> --guided --json ${chalk39.dim("← guided eval")}
|
|
9644
11047
|
6. mutagent prompts optimize start <id> --dataset <d> --evaluation <e> --json
|
|
9645
11048
|
|
|
9646
|
-
${
|
|
9647
|
-
After ${
|
|
9648
|
-
${
|
|
9649
|
-
${
|
|
9650
|
-
${
|
|
11049
|
+
${chalk39.yellow("Post-Onboarding Decision Tree:")}
|
|
11050
|
+
After ${chalk39.bold("mutagent auth login")}, users land in one of 3 paths:
|
|
11051
|
+
${chalk39.bold("Path A")} (Tracing): explore → integrate <framework> → apply tracing → verify
|
|
11052
|
+
${chalk39.bold("Path B")} (Optimization): explore → prompts create → dataset add → eval create → optimize
|
|
11053
|
+
${chalk39.bold("Path C")} (Manual): Use CLI commands directly — run mutagent <command> --help
|
|
9651
11054
|
|
|
9652
|
-
${
|
|
11055
|
+
${chalk39.yellow("Directive System:")}
|
|
9653
11056
|
Every --json response may include:
|
|
9654
|
-
${
|
|
9655
|
-
${
|
|
9656
|
-
${
|
|
9657
|
-
${
|
|
11057
|
+
${chalk39.bold("_directive.renderedCard")} Pre-formatted card for the user ${chalk39.red("(MUST be shown in chat)")}
|
|
11058
|
+
${chalk39.bold("_directive.instruction")} Next step for the agent
|
|
11059
|
+
${chalk39.bold("_directive.next")} Array of suggested follow-up commands
|
|
11060
|
+
${chalk39.bold("_links")} Dashboard/API URLs (format as markdown links)
|
|
9658
11061
|
|
|
9659
|
-
${
|
|
9660
|
-
Each criterion MUST have: ${
|
|
11062
|
+
${chalk39.yellow("Evaluation Criteria Format:")}
|
|
11063
|
+
Each criterion MUST have: ${chalk39.bold("name")}, ${chalk39.bold("description")} (scoring rubric), ${chalk39.bold("evaluationParameter")}
|
|
9661
11064
|
evaluationParameter MUST match an inputSchema or outputSchema field name
|
|
9662
11065
|
No duplicate evaluationParameter values — each criterion targets a unique field
|
|
9663
11066
|
ALL schema fields must be covered (missing fields = error)
|
|
9664
|
-
Use ${
|
|
9665
|
-
|
|
9666
|
-
${
|
|
9667
|
-
|
|
11067
|
+
Use ${chalk39.bold("--guided --json")} to generate criteria templates from prompt schemas
|
|
11068
|
+
|
|
11069
|
+
${chalk39.bold("Two framings — coding agents MUST distinguish:")}
|
|
11070
|
+
${chalk39.bold("• INPUT fields → Minimum Viable Context")}
|
|
11071
|
+
What info MUST be present in the input for the prompt to succeed.
|
|
11072
|
+
Rubric should include a ${chalk39.bold("completeness scale")} (e.g., 0 = missing critical context, 5 = complete).
|
|
11073
|
+
${chalk39.dim("Input fields are typically `{variables}` in your prompt template.")}
|
|
11074
|
+
${chalk39.bold("• OUTPUT fields → Output Standards")}
|
|
11075
|
+
Content correctness + structural correctness + a ${chalk39.bold("correctness scale")} (e.g., 0 = wrong, 5 = perfect).
|
|
11076
|
+
|
|
11077
|
+
${chalk39.yellow("Optimization Cost Control:")}
|
|
11078
|
+
Default max-iterations is 1. ${chalk39.red("NEVER increase without explicit user request.")}
|
|
9668
11079
|
Each iteration incurs LLM costs — confirm with user before starting >1.
|
|
9669
11080
|
|
|
9670
|
-
${
|
|
9671
|
-
After ${
|
|
9672
|
-
Then offer choices: ${
|
|
11081
|
+
${chalk39.yellow("Post-Optimization:")}
|
|
11082
|
+
After ${chalk39.bold("optimize results")}: ALWAYS show the before/after diff to the user first.
|
|
11083
|
+
Then offer choices: ${chalk39.bold("Apply")} / ${chalk39.bold("Reject")}.
|
|
9673
11084
|
|
|
9674
|
-
${
|
|
11085
|
+
${chalk39.yellow("State Tracking:")}
|
|
9675
11086
|
.mutagent/mutation-context.md — Codebase index of discovered/uploaded prompts
|
|
9676
11087
|
Update after explore, create, and dataset operations
|
|
9677
11088
|
mutagent auth status — Auth + workspace state
|
|
9678
11089
|
Comment markers (// MutagenT:START ... // MutagenT:END) in source files
|
|
9679
11090
|
|
|
9680
|
-
${
|
|
11091
|
+
${chalk39.yellow("AI Agent Rules (MANDATORY for coding agents):")}
|
|
9681
11092
|
1. Login (two paths):
|
|
9682
11093
|
- CI / fully automated: export MUTAGENT_API_KEY=mt_... then mutagent login --json
|
|
9683
11094
|
- Helping a user onboard: mutagent login --browser --json
|
|
@@ -9688,15 +11099,15 @@ ${chalk38.yellow("AI Agent Rules (MANDATORY for coding agents):")}
|
|
|
9688
11099
|
3. Run <command> --help BEFORE first use of any command
|
|
9689
11100
|
4. Use --guided --json for evaluation creation (NEVER --guided alone)
|
|
9690
11101
|
5. Parse _directive.renderedCard and copy it into your CHAT RESPONSE verbatim
|
|
9691
|
-
${
|
|
11102
|
+
${chalk39.red("HARD STOP")}: do NOT run further commands until the card is rendered in chat
|
|
9692
11103
|
6. After mutagent init, verify workspace: mutagent workspaces list --json
|
|
9693
11104
|
7. Use {single_braces} for template variables in prompts
|
|
9694
11105
|
8. Collect evaluation criteria from the user — NEVER auto-generate
|
|
9695
11106
|
9. ALL user interaction via AskUserQuestion — CLI is non-interactive
|
|
9696
11107
|
${!hasCredentials() ? `
|
|
9697
|
-
` +
|
|
11108
|
+
` + chalk39.yellow(" Warning: Not authenticated. Run: mutagent login") + `
|
|
9698
11109
|
` : ""}${!hasRcConfig() ? `
|
|
9699
|
-
` +
|
|
11110
|
+
` + chalk39.green(" Get started: mutagent init") + `
|
|
9700
11111
|
` : ""}`);
|
|
9701
11112
|
var rawArgs = process.argv.slice(2);
|
|
9702
11113
|
if (rawArgs.includes("-v") || rawArgs.includes("--version")) {
|
|
@@ -9737,5 +11148,5 @@ program.addCommand(createHooksCommand());
|
|
|
9737
11148
|
program.addCommand(createFeedbackCommand());
|
|
9738
11149
|
program.parse();
|
|
9739
11150
|
|
|
9740
|
-
//# debugId=
|
|
11151
|
+
//# debugId=4EA6EB9078B51CC264756E2164756E21
|
|
9741
11152
|
//# sourceMappingURL=cli.js.map
|