npm - @mutagent/cli - Versions diffs - 0.1.36 → 0.1.38 - Mend

@mutagent/cli 0.1.36 → 0.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/bin/cli.js CHANGED Viewed

@@ -317,6 +317,125 @@ var init_errors = __esm(() => {
   };
 });
+// src/lib/scorecard-extraction.ts
+function extractScorecardDetails(rawState, iterCtx) {
+  if (!iterCtx)
+    return {};
+  const gc = rawState.globalContext;
+  const gcCtx = gc?.context;
+  const beforeExec = gcCtx?.executions;
+  const beforeResults = beforeExec?.results ?? [];
+  const beforeById = new Map;
+  for (const r of beforeResults) {
+    const id = r.id;
+    if (id)
+      beforeById.set(id, r);
+  }
+  let originalScore;
+  if (beforeResults.length > 0) {
+    const sum = beforeResults.reduce((acc, r) => {
+      const eval_ = r.evaluation;
+      return acc + (eval_?.score ?? 0);
+    }, 0);
+    originalScore = sum / beforeResults.length;
+  }
+  const afterExec = iterCtx.executionResults;
+  const afterResults = afterExec?.executions ?? [];
+  const datasetResults = afterResults.length > 0 ? afterResults.map((r) => {
+    const id = r.id || "unknown";
+    const afterEval = r.evaluation;
+    const afterScore = afterEval?.score ?? 0;
+    const beforeResult = beforeById.get(id);
+    const beforeEval = beforeResult?.evaluation;
+    const beforeScore = beforeEval?.score;
+    return { id, beforeScore, afterScore };
+  }) : undefined;
+  const criteriaScores = extractCriteriaScores(beforeResults, afterResults);
+  const rawFailureModes = iterCtx.failureModes;
+  const failureModes = rawFailureModes?.categories && rawFailureModes.failures ? rawFailureModes.categories.map((category) => ({
+    category,
+    failures: (rawFailureModes.failures?.[category] ?? []).map((f) => ({
+      description: f.description ?? f.label,
+      summary: f.summary
+    }))
+  })) : undefined;
+  const rawMutations = iterCtx.mutations;
+  const mutations = rawMutations && rawMutations.length > 0 ? rawMutations.map((m) => ({
+    label: m.label ?? "Unknown mutation",
+    status: m.status ?? "pending",
+    priority: m.priority,
+    rationale: m.target?.rationale
+  })) : undefined;
+  const evaluationDetails = afterResults.length > 0 ? afterResults.map((r) => {
+    const id = r.id || "unknown";
+    const eval_ = r.evaluation;
+    const score = eval_?.score ?? 0;
+    const success = eval_?.success ?? false;
+    const metrics = eval_?.evaluations?.map((metric) => {
+      const criteria = metric.evaluationChecklist?.items?.map((item) => ({
+        name: item.evaluationParameter ?? item.criteria ?? "unknown",
+        score: item.llmScore ?? 0,
+        success: item.success ?? false
+      }));
+      return {
+        name: metric.name ?? "unknown",
+        score: metric.score ?? 0,
+        success: metric.success ?? false,
+        failureMode: metric.failureMode,
+        reasoning: metric.reasoning,
+        criteria: criteria && criteria.length > 0 ? criteria : undefined
+      };
+    });
+    return {
+      itemId: id,
+      score,
+      success,
+      metrics: metrics && metrics.length > 0 ? metrics : undefined
+    };
+  }) : undefined;
+  return {
+    originalScore,
+    criteriaScores,
+    datasetResults,
+    failureModes,
+    mutations,
+    evaluationDetails
+  };
+}
+function extractCriteriaScores(beforeResults, afterResults) {
+  const metricNames = new Set;
+  const beforeScores = new Map;
+  const afterScores = new Map;
+  for (const r of beforeResults) {
+    const eval_ = r.evaluation;
+    for (const m of eval_?.evaluations ?? []) {
+      const name = m.name ?? "unknown";
+      metricNames.add(name);
+      const existing = beforeScores.get(name) ?? [];
+      existing.push(m.score ?? 0);
+      beforeScores.set(name, existing);
+    }
+  }
+  for (const r of afterResults) {
+    const eval_ = r.evaluation;
+    for (const m of eval_?.evaluations ?? []) {
+      const name = m.name ?? "unknown";
+      metricNames.add(name);
+      const existing = afterScores.get(name) ?? [];
+      existing.push(m.score ?? 0);
+      afterScores.set(name, existing);
+    }
+  }
+  if (metricNames.size === 0)
+    return;
+  const avg = (arr) => arr.reduce((a, b) => a + b, 0) / arr.length;
+  return Array.from(metricNames).map((name) => ({
+    name,
+    before: beforeScores.has(name) ? avg(beforeScores.get(name) ?? []) : undefined,
+    after: afterScores.has(name) ? avg(afterScores.get(name) ?? []) : undefined
+  }));
+}
 // src/lib/sdk-client.ts
 var exports_sdk_client = {};
 __export(exports_sdk_client, {
@@ -646,9 +765,13 @@ class SDKClientWrapper {
       const prompt = await this.getPrompt(String(job.promptId ?? ""));
       const statesRes = await this.request(`/api/optimization/${jobId}/states`).catch(() => ({ states: [] }));
       const latestState = statesRes.states[statesRes.states.length - 1];
-      const iterCtx = latestState?.state.iterationContext;
-      const mutatedPromptText = iterCtx?.currentPrompt?.prompt;
-      const originalPromptText = iterCtx?.basePrompt?.prompt;
+      const rawState = latestState?.state ?? {};
+      const iterCtx = rawState.iterationContext ?? rawState.current?.context;
+      const basePromptObj = iterCtx?.basePrompt;
+      const currentPromptObj = iterCtx?.currentPrompt;
+      const mutatedPromptText = typeof currentPromptObj?.prompt === "string" ? currentPromptObj.prompt : undefined;
+      const originalPromptText = typeof basePromptObj?.prompt === "string" ? basePromptObj.prompt : undefined;
+      const extracted = extractScorecardDetails(rawState, iterCtx);
       return {
         job: {
           id: job.id ?? jobId,
@@ -658,17 +781,28 @@ class SDKClientWrapper {
         },
         prompt,
         bestScore: job.bestScore,
+        originalScore: extracted.originalScore,
         iterationsCompleted: job.currentIteration,
         scoreProgression: Array.isArray(progress.progression) ? progress.progression.map((p) => typeof p.score === "number" ? p.score : 0) : undefined,
         mutatedPromptText,
-        originalPromptText
+        originalPromptText,
+        criteriaScores: extracted.criteriaScores,
+        datasetResults: extracted.datasetResults,
+        failureModes: extracted.failureModes,
+        mutations: extracted.mutations,
+        evaluationDetails: extracted.evaluationDetails
       };
     } catch (error) {
       this.handleError(error);
     }
   }
   async listTraces(filters) {
-    const params = filters ? new URLSearchParams(filters).toString() : "";
+    const filterRecord = {};
+    if (filters?.promptId)
+      filterRecord.promptId = filters.promptId;
+    if (filters?.source)
+      filterRecord.source = filters.source;
+    const params = Object.keys(filterRecord).length > 0 ? new URLSearchParams(filterRecord).toString() : "";
     const response = await this.request(`/api/traces${params ? `?${params}` : ""}`);
     return response.data ?? [];
   }
@@ -927,10 +1061,10 @@ var init_sdk_client = __esm(() => {
 });
 // src/bin/cli.ts
-import { Command as Command18 } from "commander";
+import { Command as Command19 } from "commander";
 import chalk24 from "chalk";
-import { readFileSync as readFileSync11 } from "fs";
-import { join as join8, dirname } from "path";
+import { readFileSync as readFileSync12 } from "fs";
+import { join as join9, dirname } from "path";
 import { fileURLToPath } from "url";
 // src/commands/auth.ts
@@ -4830,10 +4964,11 @@ Examples:
 Note: MutagenT traces replace Langfuse for observability.
     `);
-  traces.command("list").description("List traces").option("-p, --prompt <id>", "Filter by prompt ID").option("-l, --limit <n>", "Limit results", "50").addHelpText("after", `
+  traces.command("list").description("List traces").option("-p, --prompt <id>", "Filter by prompt ID").option("-s, --source <source>", "Filter by trace source (e.g., claude-code, sdk, langchain)").option("-l, --limit <n>", "Limit results", "50").addHelpText("after", `
 Examples:
   ${chalk12.dim("$")} mutagent traces list
   ${chalk12.dim("$")} mutagent traces list --prompt <prompt-id>
+  ${chalk12.dim("$")} mutagent traces list --source claude-code --json
   ${chalk12.dim("$")} mutagent traces list --limit 10 --json
 ${chalk12.dim("Tip: Filter by prompt to see traces for a specific prompt version.")}
@@ -4843,7 +4978,8 @@ ${chalk12.dim("Tip: Filter by prompt to see traces for a specific prompt version
     try {
       const client = getSDKClient();
       const tracesList = await client.listTraces({
-        promptId: options.prompt
+        promptId: options.prompt,
+        source: options.source
       });
       const limit = parseInt(options.limit, 10) || 50;
       const limited = tracesList.slice(0, limit);
@@ -7349,6 +7485,257 @@ Examples:
   return usage;
 }
+// src/commands/hooks.ts
+init_config();
+import { Command as Command18 } from "commander";
+import { randomUUID } from "crypto";
+import { join as join8 } from "path";
+import { tmpdir } from "os";
+import { readFileSync as readFileSync11, writeFileSync as writeFileSync6, unlinkSync, existsSync as existsSync14 } from "fs";
+async function safeExecute(fn) {
+  try {
+    await fn();
+  } catch (err) {
+    process.stderr.write(`[mutagent hooks] Warning: ${err instanceof Error ? err.message : String(err)}
+`);
+  }
+}
+async function readStdin() {
+  const chunks = [];
+  for await (const chunk of process.stdin) {
+    chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
+  }
+  return JSON.parse(Buffer.concat(chunks).toString("utf-8"));
+}
+function stateFilePath(sessionId) {
+  return join8(tmpdir(), `mutagent-hook-${sessionId}.json`);
+}
+function readState(sessionId) {
+  const path = stateFilePath(sessionId);
+  if (!existsSync14(path))
+    return null;
+  try {
+    return JSON.parse(readFileSync11(path, "utf-8"));
+  } catch {
+    return null;
+  }
+}
+function writeState(sessionId, state) {
+  writeFileSync6(stateFilePath(sessionId), JSON.stringify(state), "utf-8");
+}
+function deleteState(sessionId) {
+  const path = stateFilePath(sessionId);
+  if (existsSync14(path)) {
+    try {
+      unlinkSync(path);
+    } catch {}
+  }
+}
+var API_TIMEOUT_MS = 5000;
+async function sendBatchTrace(traces) {
+  const apiKey = getApiKey();
+  if (!apiKey) {
+    process.stderr.write(`[mutagent hooks] Warning: Not authenticated. Run: mutagent auth login
+`);
+    return;
+  }
+  const config = loadConfig();
+  const endpoint = config.endpoint ?? "http://localhost:3003";
+  const headers = {
+    "x-api-key": apiKey,
+    "Content-Type": "application/json"
+  };
+  if (config.defaultWorkspace) {
+    headers["x-workspace-id"] = config.defaultWorkspace;
+  }
+  if (config.defaultOrganization) {
+    headers["x-organization-id"] = config.defaultOrganization;
+  }
+  const controller = new AbortController;
+  const timeout = setTimeout(() => {
+    controller.abort();
+  }, API_TIMEOUT_MS);
+  try {
+    const response = await fetch(`${endpoint}/api/traces/batch`, {
+      method: "POST",
+      headers,
+      body: JSON.stringify({ traces }),
+      signal: controller.signal
+    });
+    if (!response.ok) {
+      const body = await response.text().catch(() => "");
+      process.stderr.write(`[mutagent hooks] Warning: API returned ${String(response.status)}: ${body.slice(0, 200)}
+`);
+    }
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+function getString(input, ...keys) {
+  for (const key of keys) {
+    const val = input[key];
+    if (typeof val === "string" && val.length > 0)
+      return val;
+  }
+  return "";
+}
+async function handleSessionStart() {
+  const input = await readStdin();
+  const sessionId = getString(input, "session_id", "sessionId");
+  if (!sessionId)
+    throw new Error("Missing session_id in stdin");
+  const now = new Date().toISOString();
+  const traceId = `cc-${sessionId}`;
+  const state = {
+    traceId,
+    sessionId,
+    startTime: now,
+    openSpans: {}
+  };
+  writeState(sessionId, state);
+  await sendBatchTrace([
+    {
+      traceId,
+      sessionId,
+      name: "Claude Code Session",
+      source: "claude-code",
+      startTime: now,
+      status: "running",
+      spans: []
+    }
+  ]);
+}
+async function handleSessionEnd() {
+  const input = await readStdin();
+  const sessionId = getString(input, "session_id", "sessionId");
+  if (!sessionId)
+    throw new Error("Missing session_id in stdin");
+  const now = new Date().toISOString();
+  const state = readState(sessionId);
+  const traceId = state?.traceId ?? `cc-${sessionId}`;
+  const startTime = state?.startTime ?? now;
+  await sendBatchTrace([
+    {
+      traceId,
+      sessionId,
+      name: "Claude Code Session",
+      source: "claude-code",
+      startTime,
+      endTime: now,
+      status: "completed",
+      spans: []
+    }
+  ]);
+  deleteState(sessionId);
+}
+async function handlePreToolUse() {
+  const input = await readStdin();
+  const sessionId = getString(input, "session_id", "sessionId");
+  const toolName = getString(input, "tool_name", "toolName") || "unknown";
+  if (!sessionId)
+    throw new Error("Missing session_id in stdin");
+  const now = new Date().toISOString();
+  const state = readState(sessionId) ?? {
+    traceId: `cc-${sessionId}`,
+    sessionId,
+    startTime: now,
+    openSpans: {}
+  };
+  const spanId = randomUUID();
+  state.openSpans[spanId] = {
+    spanId,
+    startTime: now,
+    toolName
+  };
+  writeState(sessionId, state);
+  await sendBatchTrace([
+    {
+      traceId: state.traceId,
+      sessionId,
+      name: "Claude Code Session",
+      source: "claude-code",
+      startTime: state.startTime,
+      status: "running",
+      spans: [
+        {
+          spanId,
+          name: toolName,
+          kind: "tool",
+          startTime: now,
+          status: "running"
+        }
+      ]
+    }
+  ]);
+}
+async function handlePostToolUse() {
+  const input = await readStdin();
+  const sessionId = getString(input, "session_id", "sessionId");
+  const toolName = getString(input, "tool_name", "toolName") || "unknown";
+  if (!sessionId)
+    throw new Error("Missing session_id in stdin");
+  const now = new Date().toISOString();
+  const state = readState(sessionId);
+  const traceId = state?.traceId ?? `cc-${sessionId}`;
+  const startTime = state?.startTime ?? now;
+  let matchedSpan = null;
+  let matchedKey = null;
+  if (state?.openSpans) {
+    const entries = Object.entries(state.openSpans);
+    for (let i = entries.length - 1;i >= 0; i--) {
+      const entry = entries[i];
+      if (entry && entry[1].toolName === toolName) {
+        matchedSpan = entry[1];
+        matchedKey = entry[0];
+        break;
+      }
+    }
+  }
+  const spanId = matchedSpan?.spanId ?? randomUUID();
+  const spanStartTime = matchedSpan?.startTime ?? now;
+  if (state && matchedKey) {
+    state.openSpans = Object.fromEntries(Object.entries(state.openSpans).filter(([k]) => k !== matchedKey));
+    writeState(sessionId, state);
+  }
+  await sendBatchTrace([
+    {
+      traceId,
+      sessionId,
+      name: "Claude Code Session",
+      source: "claude-code",
+      startTime,
+      status: "running",
+      spans: [
+        {
+          spanId,
+          name: toolName,
+          kind: "tool",
+          startTime: spanStartTime,
+          endTime: now,
+          status: "completed"
+        }
+      ]
+    }
+  ]);
+}
+function createHooksCommand() {
+  const hooks = new Command18("hooks").description("Hook handlers for AI coding assistants");
+  const claudeCode = hooks.command("claude-code").description("Claude Code session telemetry");
+  claudeCode.command("session-start").description("Handle session start event").action(async () => {
+    await safeExecute(handleSessionStart);
+  });
+  claudeCode.command("session-end").description("Handle session end event").action(async () => {
+    await safeExecute(handleSessionEnd);
+  });
+  claudeCode.command("pre-tool-use").description("Handle pre-tool-use event").action(async () => {
+    await safeExecute(handlePreToolUse);
+  });
+  claudeCode.command("post-tool-use").description("Handle post-tool-use event").action(async () => {
+    await safeExecute(handlePostToolUse);
+  });
+  return hooks;
+}
 // src/bin/cli.ts
 init_config();
 var cliVersion = "0.1.1";
@@ -7357,12 +7744,12 @@ if (process.env.CLI_VERSION) {
 } else {
   try {
     const __dirname2 = dirname(fileURLToPath(import.meta.url));
-    const pkgPath = join8(__dirname2, "..", "..", "package.json");
-    const pkg = JSON.parse(readFileSync11(pkgPath, "utf-8"));
+    const pkgPath = join9(__dirname2, "..", "..", "package.json");
+    const pkg = JSON.parse(readFileSync12(pkgPath, "utf-8"));
     cliVersion = pkg.version ?? cliVersion;
   } catch {}
 }
-var program = new Command18;
+var program = new Command19;
 program.name("mutagent").description(`MutagenT CLI - AI-native prompt optimization platform
   Documentation: https://docs.mutagent.io/cli
@@ -7400,6 +7787,7 @@ ${chalk24.yellow("Command Navigation:")}
   mutagent prompts optimize results <job-id>  ${chalk24.dim("View scorecard")}
   mutagent integrate <framework>        ${chalk24.dim("Framework integration guide")}
+  mutagent hooks claude-code <event>   ${chalk24.dim("Hook handler for Claude Code telemetry")}
   mutagent playground run <id> --input '{...}'  ${chalk24.dim("Quick test")}
 ${chalk24.yellow("Workflow: Evaluate → Optimize:")}
@@ -7449,7 +7837,8 @@ program.addCommand(createProvidersCommand());
 program.addCommand(createExploreCommand());
 program.addCommand(createSkillsCommand());
 program.addCommand(createUsageCommand());
+program.addCommand(createHooksCommand());
 program.parse();
-//# debugId=B35CD49159FCE51364756E2164756E21
+//# debugId=CF947D7B55AD31C164756E2164756E21
 //# sourceMappingURL=cli.js.map