@mutagent/cli 0.1.36 → 0.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/cli.js CHANGED
@@ -317,6 +317,125 @@ var init_errors = __esm(() => {
317
317
  };
318
318
  });
319
319
 
320
+ // src/lib/scorecard-extraction.ts
321
+ function extractScorecardDetails(rawState, iterCtx) {
322
+ if (!iterCtx)
323
+ return {};
324
+ const gc = rawState.globalContext;
325
+ const gcCtx = gc?.context;
326
+ const beforeExec = gcCtx?.executions;
327
+ const beforeResults = beforeExec?.results ?? [];
328
+ const beforeById = new Map;
329
+ for (const r of beforeResults) {
330
+ const id = r.id;
331
+ if (id)
332
+ beforeById.set(id, r);
333
+ }
334
+ let originalScore;
335
+ if (beforeResults.length > 0) {
336
+ const sum = beforeResults.reduce((acc, r) => {
337
+ const eval_ = r.evaluation;
338
+ return acc + (eval_?.score ?? 0);
339
+ }, 0);
340
+ originalScore = sum / beforeResults.length;
341
+ }
342
+ const afterExec = iterCtx.executionResults;
343
+ const afterResults = afterExec?.executions ?? [];
344
+ const datasetResults = afterResults.length > 0 ? afterResults.map((r) => {
345
+ const id = r.id || "unknown";
346
+ const afterEval = r.evaluation;
347
+ const afterScore = afterEval?.score ?? 0;
348
+ const beforeResult = beforeById.get(id);
349
+ const beforeEval = beforeResult?.evaluation;
350
+ const beforeScore = beforeEval?.score;
351
+ return { id, beforeScore, afterScore };
352
+ }) : undefined;
353
+ const criteriaScores = extractCriteriaScores(beforeResults, afterResults);
354
+ const rawFailureModes = iterCtx.failureModes;
355
+ const failureModes = rawFailureModes?.categories && rawFailureModes.failures ? rawFailureModes.categories.map((category) => ({
356
+ category,
357
+ failures: (rawFailureModes.failures?.[category] ?? []).map((f) => ({
358
+ description: f.description ?? f.label,
359
+ summary: f.summary
360
+ }))
361
+ })) : undefined;
362
+ const rawMutations = iterCtx.mutations;
363
+ const mutations = rawMutations && rawMutations.length > 0 ? rawMutations.map((m) => ({
364
+ label: m.label ?? "Unknown mutation",
365
+ status: m.status ?? "pending",
366
+ priority: m.priority,
367
+ rationale: m.target?.rationale
368
+ })) : undefined;
369
+ const evaluationDetails = afterResults.length > 0 ? afterResults.map((r) => {
370
+ const id = r.id || "unknown";
371
+ const eval_ = r.evaluation;
372
+ const score = eval_?.score ?? 0;
373
+ const success = eval_?.success ?? false;
374
+ const metrics = eval_?.evaluations?.map((metric) => {
375
+ const criteria = metric.evaluationChecklist?.items?.map((item) => ({
376
+ name: item.evaluationParameter ?? item.criteria ?? "unknown",
377
+ score: item.llmScore ?? 0,
378
+ success: item.success ?? false
379
+ }));
380
+ return {
381
+ name: metric.name ?? "unknown",
382
+ score: metric.score ?? 0,
383
+ success: metric.success ?? false,
384
+ failureMode: metric.failureMode,
385
+ reasoning: metric.reasoning,
386
+ criteria: criteria && criteria.length > 0 ? criteria : undefined
387
+ };
388
+ });
389
+ return {
390
+ itemId: id,
391
+ score,
392
+ success,
393
+ metrics: metrics && metrics.length > 0 ? metrics : undefined
394
+ };
395
+ }) : undefined;
396
+ return {
397
+ originalScore,
398
+ criteriaScores,
399
+ datasetResults,
400
+ failureModes,
401
+ mutations,
402
+ evaluationDetails
403
+ };
404
+ }
405
+ function extractCriteriaScores(beforeResults, afterResults) {
406
+ const metricNames = new Set;
407
+ const beforeScores = new Map;
408
+ const afterScores = new Map;
409
+ for (const r of beforeResults) {
410
+ const eval_ = r.evaluation;
411
+ for (const m of eval_?.evaluations ?? []) {
412
+ const name = m.name ?? "unknown";
413
+ metricNames.add(name);
414
+ const existing = beforeScores.get(name) ?? [];
415
+ existing.push(m.score ?? 0);
416
+ beforeScores.set(name, existing);
417
+ }
418
+ }
419
+ for (const r of afterResults) {
420
+ const eval_ = r.evaluation;
421
+ for (const m of eval_?.evaluations ?? []) {
422
+ const name = m.name ?? "unknown";
423
+ metricNames.add(name);
424
+ const existing = afterScores.get(name) ?? [];
425
+ existing.push(m.score ?? 0);
426
+ afterScores.set(name, existing);
427
+ }
428
+ }
429
+ if (metricNames.size === 0)
430
+ return;
431
+ const avg = (arr) => arr.reduce((a, b) => a + b, 0) / arr.length;
432
+ return Array.from(metricNames).map((name) => ({
433
+ name,
434
+ before: beforeScores.has(name) ? avg(beforeScores.get(name) ?? []) : undefined,
435
+ after: afterScores.has(name) ? avg(afterScores.get(name) ?? []) : undefined
436
+ }));
437
+ }
438
+
320
439
  // src/lib/sdk-client.ts
321
440
  var exports_sdk_client = {};
322
441
  __export(exports_sdk_client, {
@@ -646,9 +765,13 @@ class SDKClientWrapper {
646
765
  const prompt = await this.getPrompt(String(job.promptId ?? ""));
647
766
  const statesRes = await this.request(`/api/optimization/${jobId}/states`).catch(() => ({ states: [] }));
648
767
  const latestState = statesRes.states[statesRes.states.length - 1];
649
- const iterCtx = latestState?.state.iterationContext;
650
- const mutatedPromptText = iterCtx?.currentPrompt?.prompt;
651
- const originalPromptText = iterCtx?.basePrompt?.prompt;
768
+ const rawState = latestState?.state ?? {};
769
+ const iterCtx = rawState.iterationContext ?? rawState.current?.context;
770
+ const basePromptObj = iterCtx?.basePrompt;
771
+ const currentPromptObj = iterCtx?.currentPrompt;
772
+ const mutatedPromptText = typeof currentPromptObj?.prompt === "string" ? currentPromptObj.prompt : undefined;
773
+ const originalPromptText = typeof basePromptObj?.prompt === "string" ? basePromptObj.prompt : undefined;
774
+ const extracted = extractScorecardDetails(rawState, iterCtx);
652
775
  return {
653
776
  job: {
654
777
  id: job.id ?? jobId,
@@ -658,17 +781,28 @@ class SDKClientWrapper {
658
781
  },
659
782
  prompt,
660
783
  bestScore: job.bestScore,
784
+ originalScore: extracted.originalScore,
661
785
  iterationsCompleted: job.currentIteration,
662
786
  scoreProgression: Array.isArray(progress.progression) ? progress.progression.map((p) => typeof p.score === "number" ? p.score : 0) : undefined,
663
787
  mutatedPromptText,
664
- originalPromptText
788
+ originalPromptText,
789
+ criteriaScores: extracted.criteriaScores,
790
+ datasetResults: extracted.datasetResults,
791
+ failureModes: extracted.failureModes,
792
+ mutations: extracted.mutations,
793
+ evaluationDetails: extracted.evaluationDetails
665
794
  };
666
795
  } catch (error) {
667
796
  this.handleError(error);
668
797
  }
669
798
  }
670
799
  async listTraces(filters) {
671
- const params = filters ? new URLSearchParams(filters).toString() : "";
800
+ const filterRecord = {};
801
+ if (filters?.promptId)
802
+ filterRecord.promptId = filters.promptId;
803
+ if (filters?.source)
804
+ filterRecord.source = filters.source;
805
+ const params = Object.keys(filterRecord).length > 0 ? new URLSearchParams(filterRecord).toString() : "";
672
806
  const response = await this.request(`/api/traces${params ? `?${params}` : ""}`);
673
807
  return response.data ?? [];
674
808
  }
@@ -927,10 +1061,10 @@ var init_sdk_client = __esm(() => {
927
1061
  });
928
1062
 
929
1063
  // src/bin/cli.ts
930
- import { Command as Command18 } from "commander";
1064
+ import { Command as Command19 } from "commander";
931
1065
  import chalk24 from "chalk";
932
- import { readFileSync as readFileSync11 } from "fs";
933
- import { join as join8, dirname } from "path";
1066
+ import { readFileSync as readFileSync12 } from "fs";
1067
+ import { join as join9, dirname } from "path";
934
1068
  import { fileURLToPath } from "url";
935
1069
 
936
1070
  // src/commands/auth.ts
@@ -4830,10 +4964,11 @@ Examples:
4830
4964
 
4831
4965
  Note: MutagenT traces replace Langfuse for observability.
4832
4966
  `);
4833
- traces.command("list").description("List traces").option("-p, --prompt <id>", "Filter by prompt ID").option("-l, --limit <n>", "Limit results", "50").addHelpText("after", `
4967
+ traces.command("list").description("List traces").option("-p, --prompt <id>", "Filter by prompt ID").option("-s, --source <source>", "Filter by trace source (e.g., claude-code, sdk, langchain)").option("-l, --limit <n>", "Limit results", "50").addHelpText("after", `
4834
4968
  Examples:
4835
4969
  ${chalk12.dim("$")} mutagent traces list
4836
4970
  ${chalk12.dim("$")} mutagent traces list --prompt <prompt-id>
4971
+ ${chalk12.dim("$")} mutagent traces list --source claude-code --json
4837
4972
  ${chalk12.dim("$")} mutagent traces list --limit 10 --json
4838
4973
 
4839
4974
  ${chalk12.dim("Tip: Filter by prompt to see traces for a specific prompt version.")}
@@ -4843,7 +4978,8 @@ ${chalk12.dim("Tip: Filter by prompt to see traces for a specific prompt version
4843
4978
  try {
4844
4979
  const client = getSDKClient();
4845
4980
  const tracesList = await client.listTraces({
4846
- promptId: options.prompt
4981
+ promptId: options.prompt,
4982
+ source: options.source
4847
4983
  });
4848
4984
  const limit = parseInt(options.limit, 10) || 50;
4849
4985
  const limited = tracesList.slice(0, limit);
@@ -7349,6 +7485,257 @@ Examples:
7349
7485
  return usage;
7350
7486
  }
7351
7487
 
7488
+ // src/commands/hooks.ts
7489
+ init_config();
7490
+ import { Command as Command18 } from "commander";
7491
+ import { randomUUID } from "crypto";
7492
+ import { join as join8 } from "path";
7493
+ import { tmpdir } from "os";
7494
+ import { readFileSync as readFileSync11, writeFileSync as writeFileSync6, unlinkSync, existsSync as existsSync14 } from "fs";
7495
+ async function safeExecute(fn) {
7496
+ try {
7497
+ await fn();
7498
+ } catch (err) {
7499
+ process.stderr.write(`[mutagent hooks] Warning: ${err instanceof Error ? err.message : String(err)}
7500
+ `);
7501
+ }
7502
+ }
7503
+ async function readStdin() {
7504
+ const chunks = [];
7505
+ for await (const chunk of process.stdin) {
7506
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
7507
+ }
7508
+ return JSON.parse(Buffer.concat(chunks).toString("utf-8"));
7509
+ }
7510
+ function stateFilePath(sessionId) {
7511
+ return join8(tmpdir(), `mutagent-hook-${sessionId}.json`);
7512
+ }
7513
+ function readState(sessionId) {
7514
+ const path = stateFilePath(sessionId);
7515
+ if (!existsSync14(path))
7516
+ return null;
7517
+ try {
7518
+ return JSON.parse(readFileSync11(path, "utf-8"));
7519
+ } catch {
7520
+ return null;
7521
+ }
7522
+ }
7523
+ function writeState(sessionId, state) {
7524
+ writeFileSync6(stateFilePath(sessionId), JSON.stringify(state), "utf-8");
7525
+ }
7526
+ function deleteState(sessionId) {
7527
+ const path = stateFilePath(sessionId);
7528
+ if (existsSync14(path)) {
7529
+ try {
7530
+ unlinkSync(path);
7531
+ } catch {}
7532
+ }
7533
+ }
7534
+ var API_TIMEOUT_MS = 5000;
7535
+ async function sendBatchTrace(traces) {
7536
+ const apiKey = getApiKey();
7537
+ if (!apiKey) {
7538
+ process.stderr.write(`[mutagent hooks] Warning: Not authenticated. Run: mutagent auth login
7539
+ `);
7540
+ return;
7541
+ }
7542
+ const config = loadConfig();
7543
+ const endpoint = config.endpoint ?? "http://localhost:3003";
7544
+ const headers = {
7545
+ "x-api-key": apiKey,
7546
+ "Content-Type": "application/json"
7547
+ };
7548
+ if (config.defaultWorkspace) {
7549
+ headers["x-workspace-id"] = config.defaultWorkspace;
7550
+ }
7551
+ if (config.defaultOrganization) {
7552
+ headers["x-organization-id"] = config.defaultOrganization;
7553
+ }
7554
+ const controller = new AbortController;
7555
+ const timeout = setTimeout(() => {
7556
+ controller.abort();
7557
+ }, API_TIMEOUT_MS);
7558
+ try {
7559
+ const response = await fetch(`${endpoint}/api/traces/batch`, {
7560
+ method: "POST",
7561
+ headers,
7562
+ body: JSON.stringify({ traces }),
7563
+ signal: controller.signal
7564
+ });
7565
+ if (!response.ok) {
7566
+ const body = await response.text().catch(() => "");
7567
+ process.stderr.write(`[mutagent hooks] Warning: API returned ${String(response.status)}: ${body.slice(0, 200)}
7568
+ `);
7569
+ }
7570
+ } finally {
7571
+ clearTimeout(timeout);
7572
+ }
7573
+ }
7574
+ function getString(input, ...keys) {
7575
+ for (const key of keys) {
7576
+ const val = input[key];
7577
+ if (typeof val === "string" && val.length > 0)
7578
+ return val;
7579
+ }
7580
+ return "";
7581
+ }
7582
+ async function handleSessionStart() {
7583
+ const input = await readStdin();
7584
+ const sessionId = getString(input, "session_id", "sessionId");
7585
+ if (!sessionId)
7586
+ throw new Error("Missing session_id in stdin");
7587
+ const now = new Date().toISOString();
7588
+ const traceId = `cc-${sessionId}`;
7589
+ const state = {
7590
+ traceId,
7591
+ sessionId,
7592
+ startTime: now,
7593
+ openSpans: {}
7594
+ };
7595
+ writeState(sessionId, state);
7596
+ await sendBatchTrace([
7597
+ {
7598
+ traceId,
7599
+ sessionId,
7600
+ name: "Claude Code Session",
7601
+ source: "claude-code",
7602
+ startTime: now,
7603
+ status: "running",
7604
+ spans: []
7605
+ }
7606
+ ]);
7607
+ }
7608
+ async function handleSessionEnd() {
7609
+ const input = await readStdin();
7610
+ const sessionId = getString(input, "session_id", "sessionId");
7611
+ if (!sessionId)
7612
+ throw new Error("Missing session_id in stdin");
7613
+ const now = new Date().toISOString();
7614
+ const state = readState(sessionId);
7615
+ const traceId = state?.traceId ?? `cc-${sessionId}`;
7616
+ const startTime = state?.startTime ?? now;
7617
+ await sendBatchTrace([
7618
+ {
7619
+ traceId,
7620
+ sessionId,
7621
+ name: "Claude Code Session",
7622
+ source: "claude-code",
7623
+ startTime,
7624
+ endTime: now,
7625
+ status: "completed",
7626
+ spans: []
7627
+ }
7628
+ ]);
7629
+ deleteState(sessionId);
7630
+ }
7631
+ async function handlePreToolUse() {
7632
+ const input = await readStdin();
7633
+ const sessionId = getString(input, "session_id", "sessionId");
7634
+ const toolName = getString(input, "tool_name", "toolName") || "unknown";
7635
+ if (!sessionId)
7636
+ throw new Error("Missing session_id in stdin");
7637
+ const now = new Date().toISOString();
7638
+ const state = readState(sessionId) ?? {
7639
+ traceId: `cc-${sessionId}`,
7640
+ sessionId,
7641
+ startTime: now,
7642
+ openSpans: {}
7643
+ };
7644
+ const spanId = randomUUID();
7645
+ state.openSpans[spanId] = {
7646
+ spanId,
7647
+ startTime: now,
7648
+ toolName
7649
+ };
7650
+ writeState(sessionId, state);
7651
+ await sendBatchTrace([
7652
+ {
7653
+ traceId: state.traceId,
7654
+ sessionId,
7655
+ name: "Claude Code Session",
7656
+ source: "claude-code",
7657
+ startTime: state.startTime,
7658
+ status: "running",
7659
+ spans: [
7660
+ {
7661
+ spanId,
7662
+ name: toolName,
7663
+ kind: "tool",
7664
+ startTime: now,
7665
+ status: "running"
7666
+ }
7667
+ ]
7668
+ }
7669
+ ]);
7670
+ }
7671
+ async function handlePostToolUse() {
7672
+ const input = await readStdin();
7673
+ const sessionId = getString(input, "session_id", "sessionId");
7674
+ const toolName = getString(input, "tool_name", "toolName") || "unknown";
7675
+ if (!sessionId)
7676
+ throw new Error("Missing session_id in stdin");
7677
+ const now = new Date().toISOString();
7678
+ const state = readState(sessionId);
7679
+ const traceId = state?.traceId ?? `cc-${sessionId}`;
7680
+ const startTime = state?.startTime ?? now;
7681
+ let matchedSpan = null;
7682
+ let matchedKey = null;
7683
+ if (state?.openSpans) {
7684
+ const entries = Object.entries(state.openSpans);
7685
+ for (let i = entries.length - 1;i >= 0; i--) {
7686
+ const entry = entries[i];
7687
+ if (entry && entry[1].toolName === toolName) {
7688
+ matchedSpan = entry[1];
7689
+ matchedKey = entry[0];
7690
+ break;
7691
+ }
7692
+ }
7693
+ }
7694
+ const spanId = matchedSpan?.spanId ?? randomUUID();
7695
+ const spanStartTime = matchedSpan?.startTime ?? now;
7696
+ if (state && matchedKey) {
7697
+ state.openSpans = Object.fromEntries(Object.entries(state.openSpans).filter(([k]) => k !== matchedKey));
7698
+ writeState(sessionId, state);
7699
+ }
7700
+ await sendBatchTrace([
7701
+ {
7702
+ traceId,
7703
+ sessionId,
7704
+ name: "Claude Code Session",
7705
+ source: "claude-code",
7706
+ startTime,
7707
+ status: "running",
7708
+ spans: [
7709
+ {
7710
+ spanId,
7711
+ name: toolName,
7712
+ kind: "tool",
7713
+ startTime: spanStartTime,
7714
+ endTime: now,
7715
+ status: "completed"
7716
+ }
7717
+ ]
7718
+ }
7719
+ ]);
7720
+ }
7721
+ function createHooksCommand() {
7722
+ const hooks = new Command18("hooks").description("Hook handlers for AI coding assistants");
7723
+ const claudeCode = hooks.command("claude-code").description("Claude Code session telemetry");
7724
+ claudeCode.command("session-start").description("Handle session start event").action(async () => {
7725
+ await safeExecute(handleSessionStart);
7726
+ });
7727
+ claudeCode.command("session-end").description("Handle session end event").action(async () => {
7728
+ await safeExecute(handleSessionEnd);
7729
+ });
7730
+ claudeCode.command("pre-tool-use").description("Handle pre-tool-use event").action(async () => {
7731
+ await safeExecute(handlePreToolUse);
7732
+ });
7733
+ claudeCode.command("post-tool-use").description("Handle post-tool-use event").action(async () => {
7734
+ await safeExecute(handlePostToolUse);
7735
+ });
7736
+ return hooks;
7737
+ }
7738
+
7352
7739
  // src/bin/cli.ts
7353
7740
  init_config();
7354
7741
  var cliVersion = "0.1.1";
@@ -7357,12 +7744,12 @@ if (process.env.CLI_VERSION) {
7357
7744
  } else {
7358
7745
  try {
7359
7746
  const __dirname2 = dirname(fileURLToPath(import.meta.url));
7360
- const pkgPath = join8(__dirname2, "..", "..", "package.json");
7361
- const pkg = JSON.parse(readFileSync11(pkgPath, "utf-8"));
7747
+ const pkgPath = join9(__dirname2, "..", "..", "package.json");
7748
+ const pkg = JSON.parse(readFileSync12(pkgPath, "utf-8"));
7362
7749
  cliVersion = pkg.version ?? cliVersion;
7363
7750
  } catch {}
7364
7751
  }
7365
- var program = new Command18;
7752
+ var program = new Command19;
7366
7753
  program.name("mutagent").description(`MutagenT CLI - AI-native prompt optimization platform
7367
7754
 
7368
7755
  Documentation: https://docs.mutagent.io/cli
@@ -7400,6 +7787,7 @@ ${chalk24.yellow("Command Navigation:")}
7400
7787
  mutagent prompts optimize results <job-id> ${chalk24.dim("View scorecard")}
7401
7788
 
7402
7789
  mutagent integrate <framework> ${chalk24.dim("Framework integration guide")}
7790
+ mutagent hooks claude-code <event> ${chalk24.dim("Hook handler for Claude Code telemetry")}
7403
7791
  mutagent playground run <id> --input '{...}' ${chalk24.dim("Quick test")}
7404
7792
 
7405
7793
  ${chalk24.yellow("Workflow: Evaluate → Optimize:")}
@@ -7449,7 +7837,8 @@ program.addCommand(createProvidersCommand());
7449
7837
  program.addCommand(createExploreCommand());
7450
7838
  program.addCommand(createSkillsCommand());
7451
7839
  program.addCommand(createUsageCommand());
7840
+ program.addCommand(createHooksCommand());
7452
7841
  program.parse();
7453
7842
 
7454
- //# debugId=B35CD49159FCE51364756E2164756E21
7843
+ //# debugId=CF947D7B55AD31C164756E2164756E21
7455
7844
  //# sourceMappingURL=cli.js.map