npm - @tangle-network/agent-eval - Versions diffs - 0.71.0 → 0.72.0 - Mend

@tangle-network/agent-eval 0.71.0 → 0.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +24 -0
package/dist/campaign/index.js +25 -12
package/dist/campaign/index.js.map +1 -1
package/dist/{chunk-VMAYE3LM.js → chunk-4QJN7RDX.js} +3 -3
package/dist/chunk-SL55X4VN.js +186 -0
package/dist/chunk-SL55X4VN.js.map +1 -0
package/dist/{chunk-6QZUCFKM.js → chunk-UD6EF73X.js} +3 -3
package/dist/{chunk-6XQIEUQ2.js → chunk-ZPSKPT3V.js} +5 -3
package/dist/{chunk-6XQIEUQ2.js.map → chunk-ZPSKPT3V.js.map} +1 -1
package/dist/contract/index.js +3 -3
package/dist/index.js +11 -156
package/dist/index.js.map +1 -1
package/dist/openapi.json +1 -1
package/dist/{run-campaign-BVY3RGAZ.js → run-campaign-OVEZF24D.js} +2 -2
package/package.json +1 -1
package/dist/chunk-PQV2TKC3.js +0 -27
package/dist/chunk-PQV2TKC3.js.map +0 -1
/package/dist/{chunk-VMAYE3LM.js.map → chunk-4QJN7RDX.js.map} +0 -0
/package/dist/{chunk-6QZUCFKM.js.map → chunk-UD6EF73X.js.map} +0 -0
/package/dist/{run-campaign-BVY3RGAZ.js.map → run-campaign-OVEZF24D.js.map} +0 -0

package/dist/index.js CHANGED Viewed

@@ -1,6 +1,13 @@
 import {
-  agentProfileHash
-} from "./chunk-PQV2TKC3.js";
+  MODEL_PRICING,
+  MetricsCollector,
+  TokenCounter,
+  agentProfileHash,
+  estimateCost,
+  estimateTokens,
+  isModelPriced,
+  resolveModelPricing
+} from "./chunk-SL55X4VN.js";
 import {
   HoldoutAuditor,
   canaryLeakView,
@@ -31,12 +38,12 @@ import {
   scoreRedTeamOutput,
   surfaceContentHash,
   toolNamesForRun
-} from "./chunk-VMAYE3LM.js";
+} from "./chunk-4QJN7RDX.js";
 import {
   BackendIntegrityError,
   assertRealBackend,
   summarizeBackendIntegrity
-} from "./chunk-6XQIEUQ2.js";
+} from "./chunk-ZPSKPT3V.js";
 import {
   BENCHMARK_SPLIT_SEED,
   benchmarks_exports,
@@ -3093,158 +3100,6 @@ var ConvergenceTracker = class {
   }
 };
-// src/metrics.ts
-var MODEL_PRICING = {
-  "gpt-4o": { input: 25e-4, output: 0.01 },
-  "gpt-4o-mini": { input: 15e-5, output: 6e-4 },
-  "gpt-4-turbo": { input: 0.01, output: 0.03 },
-  "claude-sonnet-4-20250514": { input: 3e-3, output: 0.015 },
-  "claude-opus-4-20250514": { input: 0.015, output: 0.075 },
-  "claude-3-haiku-20240307": { input: 25e-5, output: 125e-5 }
-};
-var FAMILY_PRICING = [
-  [/claude.*opus/, { input: 0.015, output: 0.075 }],
-  [/claude.*haiku/, { input: 8e-4, output: 4e-3 }],
-  [/claude.*sonnet|claude-code|claude-sonnet/, { input: 3e-3, output: 0.015 }],
-  [/gpt-4o-mini/, { input: 15e-5, output: 6e-4 }],
-  [/gpt-5|gpt-4\.1|o[134]\b/, { input: 125e-5, output: 0.01 }],
-  [/gpt-4o|gpt-4/, { input: 25e-4, output: 0.01 }],
-  [/deepseek/, { input: 3e-4, output: 11e-4 }],
-  [/glm|zhipu|zai/, { input: 6e-4, output: 22e-4 }],
-  [/kimi|moonshot/, { input: 6e-4, output: 25e-4 }],
-  [/qwen/, { input: 4e-4, output: 12e-4 }],
-  [/gemini.*flash/, { input: 1e-4, output: 4e-4 }],
-  [/gemini/, { input: 125e-5, output: 5e-3 }],
-  [/llama/, { input: 2e-4, output: 6e-4 }]
-];
-function normalizeModelId(model) {
-  return (model.split("@")[0] ?? model).trim().toLowerCase();
-}
-function resolveModelPricing(model) {
-  if (MODEL_PRICING[model]) return MODEL_PRICING[model];
-  const id = normalizeModelId(model);
-  if (MODEL_PRICING[id]) return MODEL_PRICING[id];
-  for (const [pattern, price] of FAMILY_PRICING) {
-    if (pattern.test(id)) return price;
-  }
-  return null;
-}
-function isModelPriced(model) {
-  return resolveModelPricing(model) !== null;
-}
-var warnedUnpricedModels = /* @__PURE__ */ new Set();
-function estimateTokens(text) {
-  return Math.ceil(text.length / 4);
-}
-function estimateCost(inputTokens, outputTokens, model) {
-  const pricing = resolveModelPricing(model);
-  if (!pricing) {
-    if (!warnedUnpricedModels.has(model)) {
-      warnedUnpricedModels.add(model);
-      console.warn(
-        `estimateCost: no pricing for model "${model}" \u2014 returning 0; add it to MODEL_PRICING/FAMILY_PRICING (cost/Pareto axes will be blank until then)`
-      );
-    }
-    return 0;
-  }
-  return inputTokens / 1e3 * pricing.input + outputTokens / 1e3 * pricing.output;
-}
-var TokenCounter = class {
-  totalInput = 0;
-  totalOutput = 0;
-  totalCost = 0;
-  model;
-  constructor(model = "gpt-4o") {
-    this.model = model;
-  }
-  /** Record tokens for a turn, returns per-turn cost */
-  record(inputTokens, outputTokens) {
-    this.totalInput += inputTokens;
-    this.totalOutput += outputTokens;
-    const cost = estimateCost(inputTokens, outputTokens, this.model);
-    this.totalCost += cost;
-    return cost;
-  }
-  /** Estimate and record from raw text */
-  recordFromText(inputText, outputText) {
-    const inputTokens = estimateTokens(inputText);
-    const outputTokens = estimateTokens(outputText);
-    const cost = this.record(inputTokens, outputTokens);
-    return { inputTokens, outputTokens, cost };
-  }
-  getTotalInput() {
-    return this.totalInput;
-  }
-  getTotalOutput() {
-    return this.totalOutput;
-  }
-  getTotalCost() {
-    return this.totalCost;
-  }
-};
-var MetricsCollector = class {
-  client;
-  workspaceId;
-  metrics = [];
-  constructor(client, workspaceId) {
-    this.client = client;
-    this.workspaceId = workspaceId;
-  }
-  /** Collect metrics after a turn completes */
-  async collect(turn, responseLatencyMs, responseChars, codeBlocksProduced, blocksExtracted, completionCriteriaMet, completionCriteriaTotal, qualityScore, inputTokens = 0, outputTokens = 0, estimatedCostUsd = 0) {
-    const state = await this.getState();
-    const m = {
-      turn,
-      timestamp: (/* @__PURE__ */ new Date()).toISOString(),
-      tasks: state.tasks,
-      events: state.events,
-      proposals: state.proposals,
-      vaultFiles: state.vaultFiles.length,
-      responseLatencyMs,
-      responseChars,
-      codeBlocksProduced,
-      blocksExtracted,
-      qualityScore,
-      inputTokens,
-      outputTokens,
-      estimatedCostUsd,
-      totalCostUsd: estimatedCostUsd,
-      completionPercent: completionCriteriaTotal > 0 ? completionCriteriaMet / completionCriteriaTotal * 100 : 0
-    };
-    this.metrics.push(m);
-    return m;
-  }
-  /** Get current product state */
-  async getState() {
-    const [tasks, events, approvals, vaultFiles] = await Promise.all([
-      this.client.getTasks(this.workspaceId),
-      this.client.getEvents(this.workspaceId),
-      this.client.getApprovals(this.workspaceId),
-      this.client.getVaultTree(this.workspaceId)
-    ]);
-    return {
-      tasks: tasks.length,
-      events: events.length,
-      proposals: {
-        pending: approvals.filter((a) => a.status === "pending").length,
-        approved: approvals.filter((a) => a.status === "approved").length,
-        rejected: approvals.filter((a) => a.status === "rejected").length
-      },
-      vaultFiles,
-      codeBlocks: 0,
-      generations: 0
-    };
-  }
-  /** Get all collected metrics */
-  getMetrics() {
-    return [...this.metrics];
-  }
-  /** Get convergence curve (completion% over turns) */
-  getConvergenceCurve() {
-    return this.metrics.map((m) => m.completionPercent);
-  }
-};
 // src/driver.ts
 var RIGOR_STANCE = {
   cooperative: "Your stance: a pragmatic early adopter. You accept reasonable answers and only push back on clear gaps or outright errors.",