npm - @mutagent/cli - Versions diffs - 0.1.36 → 0.1.37 - Mend

@mutagent/cli 0.1.36 → 0.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/bin/cli.js CHANGED Viewed

@@ -317,6 +317,125 @@ var init_errors = __esm(() => {
   };
 });
+// src/lib/scorecard-extraction.ts
+function extractScorecardDetails(rawState, iterCtx) {
+  if (!iterCtx)
+    return {};
+  const gc = rawState.globalContext;
+  const gcCtx = gc?.context;
+  const beforeExec = gcCtx?.executions;
+  const beforeResults = beforeExec?.results ?? [];
+  const beforeById = new Map;
+  for (const r of beforeResults) {
+    const id = r.id;
+    if (id)
+      beforeById.set(id, r);
+  }
+  let originalScore;
+  if (beforeResults.length > 0) {
+    const sum = beforeResults.reduce((acc, r) => {
+      const eval_ = r.evaluation;
+      return acc + (eval_?.score ?? 0);
+    }, 0);
+    originalScore = sum / beforeResults.length;
+  }
+  const afterExec = iterCtx.executionResults;
+  const afterResults = afterExec?.executions ?? [];
+  const datasetResults = afterResults.length > 0 ? afterResults.map((r) => {
+    const id = r.id || "unknown";
+    const afterEval = r.evaluation;
+    const afterScore = afterEval?.score ?? 0;
+    const beforeResult = beforeById.get(id);
+    const beforeEval = beforeResult?.evaluation;
+    const beforeScore = beforeEval?.score;
+    return { id, beforeScore, afterScore };
+  }) : undefined;
+  const criteriaScores = extractCriteriaScores(beforeResults, afterResults);
+  const rawFailureModes = iterCtx.failureModes;
+  const failureModes = rawFailureModes?.categories && rawFailureModes.failures ? rawFailureModes.categories.map((category) => ({
+    category,
+    failures: (rawFailureModes.failures?.[category] ?? []).map((f) => ({
+      description: f.description ?? f.label,
+      summary: f.summary
+    }))
+  })) : undefined;
+  const rawMutations = iterCtx.mutations;
+  const mutations = rawMutations && rawMutations.length > 0 ? rawMutations.map((m) => ({
+    label: m.label ?? "Unknown mutation",
+    status: m.status ?? "pending",
+    priority: m.priority,
+    rationale: m.target?.rationale
+  })) : undefined;
+  const evaluationDetails = afterResults.length > 0 ? afterResults.map((r) => {
+    const id = r.id || "unknown";
+    const eval_ = r.evaluation;
+    const score = eval_?.score ?? 0;
+    const success = eval_?.success ?? false;
+    const metrics = eval_?.evaluations?.map((metric) => {
+      const criteria = metric.evaluationChecklist?.items?.map((item) => ({
+        name: item.evaluationParameter ?? item.criteria ?? "unknown",
+        score: item.llmScore ?? 0,
+        success: item.success ?? false
+      }));
+      return {
+        name: metric.name ?? "unknown",
+        score: metric.score ?? 0,
+        success: metric.success ?? false,
+        failureMode: metric.failureMode,
+        reasoning: metric.reasoning,
+        criteria: criteria && criteria.length > 0 ? criteria : undefined
+      };
+    });
+    return {
+      itemId: id,
+      score,
+      success,
+      metrics: metrics && metrics.length > 0 ? metrics : undefined
+    };
+  }) : undefined;
+  return {
+    originalScore,
+    criteriaScores,
+    datasetResults,
+    failureModes,
+    mutations,
+    evaluationDetails
+  };
+}
+function extractCriteriaScores(beforeResults, afterResults) {
+  const metricNames = new Set;
+  const beforeScores = new Map;
+  const afterScores = new Map;
+  for (const r of beforeResults) {
+    const eval_ = r.evaluation;
+    for (const m of eval_?.evaluations ?? []) {
+      const name = m.name ?? "unknown";
+      metricNames.add(name);
+      const existing = beforeScores.get(name) ?? [];
+      existing.push(m.score ?? 0);
+      beforeScores.set(name, existing);
+    }
+  }
+  for (const r of afterResults) {
+    const eval_ = r.evaluation;
+    for (const m of eval_?.evaluations ?? []) {
+      const name = m.name ?? "unknown";
+      metricNames.add(name);
+      const existing = afterScores.get(name) ?? [];
+      existing.push(m.score ?? 0);
+      afterScores.set(name, existing);
+    }
+  }
+  if (metricNames.size === 0)
+    return;
+  const avg = (arr) => arr.reduce((a, b) => a + b, 0) / arr.length;
+  return Array.from(metricNames).map((name) => ({
+    name,
+    before: beforeScores.has(name) ? avg(beforeScores.get(name) ?? []) : undefined,
+    after: afterScores.has(name) ? avg(afterScores.get(name) ?? []) : undefined
+  }));
+}
 // src/lib/sdk-client.ts
 var exports_sdk_client = {};
 __export(exports_sdk_client, {
@@ -646,9 +765,13 @@ class SDKClientWrapper {
       const prompt = await this.getPrompt(String(job.promptId ?? ""));
       const statesRes = await this.request(`/api/optimization/${jobId}/states`).catch(() => ({ states: [] }));
       const latestState = statesRes.states[statesRes.states.length - 1];
-      const iterCtx = latestState?.state.iterationContext;
-      const mutatedPromptText = iterCtx?.currentPrompt?.prompt;
-      const originalPromptText = iterCtx?.basePrompt?.prompt;
+      const rawState = latestState?.state ?? {};
+      const iterCtx = rawState.iterationContext ?? rawState.current?.context;
+      const basePromptObj = iterCtx?.basePrompt;
+      const currentPromptObj = iterCtx?.currentPrompt;
+      const mutatedPromptText = typeof currentPromptObj?.prompt === "string" ? currentPromptObj.prompt : undefined;
+      const originalPromptText = typeof basePromptObj?.prompt === "string" ? basePromptObj.prompt : undefined;
+      const extracted = extractScorecardDetails(rawState, iterCtx);
       return {
         job: {
           id: job.id ?? jobId,
@@ -658,10 +781,16 @@ class SDKClientWrapper {
         },
         prompt,
         bestScore: job.bestScore,
+        originalScore: extracted.originalScore,
         iterationsCompleted: job.currentIteration,
         scoreProgression: Array.isArray(progress.progression) ? progress.progression.map((p) => typeof p.score === "number" ? p.score : 0) : undefined,
         mutatedPromptText,
-        originalPromptText
+        originalPromptText,
+        criteriaScores: extracted.criteriaScores,
+        datasetResults: extracted.datasetResults,
+        failureModes: extracted.failureModes,
+        mutations: extracted.mutations,
+        evaluationDetails: extracted.evaluationDetails
       };
     } catch (error) {
       this.handleError(error);
@@ -7451,5 +7580,5 @@ program.addCommand(createSkillsCommand());
 program.addCommand(createUsageCommand());
 program.parse();
-//# debugId=B35CD49159FCE51364756E2164756E21
+//# debugId=873FF0DF0E58222164756E2164756E21
 //# sourceMappingURL=cli.js.map