npm - @fallom/trace - Versions diffs - 0.2.15 → 0.2.17 - Mend

@fallom/trace 0.2.15 → 0.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.js CHANGED Viewed

@@ -590,9 +590,159 @@ async function datasetFromFallom(datasetKey, version, config) {
   );
   return items;
 }
+var EvaluationDataset;
 var init_helpers = __esm({
   "src/evals/helpers.ts"() {
     "use strict";
+    EvaluationDataset = class {
+      constructor() {
+        this._goldens = [];
+        this._testCases = [];
+        this._datasetKey = null;
+        this._datasetName = null;
+        this._version = null;
+      }
+      /** List of golden records (inputs with optional expected outputs). */
+      get goldens() {
+        return this._goldens;
+      }
+      /** List of test cases (inputs with actual outputs from your LLM). */
+      get testCases() {
+        return this._testCases;
+      }
+      /** The Fallom dataset key if pulled from Fallom. */
+      get datasetKey() {
+        return this._datasetKey;
+      }
+      /**
+       * Pull a dataset from Fallom.
+       *
+       * @param alias - The dataset key/alias in Fallom
+       * @param version - Specific version to pull (default: latest)
+       * @returns Self for chaining
+       */
+      async pull(alias, version) {
+        const { _apiKey: _apiKey2, _baseUrl: _baseUrl2, _initialized: _initialized2 } = await Promise.resolve().then(() => (init_core(), core_exports));
+        if (!_initialized2) {
+          throw new Error("Fallom evals not initialized. Call evals.init() first.");
+        }
+        const params = new URLSearchParams({ include_entries: "true" });
+        if (version !== void 0) {
+          params.set("version", String(version));
+        }
+        const url = `${_baseUrl2}/api/datasets/${encodeURIComponent(alias)}?${params}`;
+        const response = await fetch(url, {
+          headers: {
+            Authorization: `Bearer ${_apiKey2}`,
+            "Content-Type": "application/json"
+          }
+        });
+        if (response.status === 404) {
+          throw new Error(`Dataset '${alias}' not found`);
+        } else if (response.status === 403) {
+          throw new Error(`Access denied to dataset '${alias}'`);
+        }
+        if (!response.ok) {
+          throw new Error(`Failed to fetch dataset: ${response.statusText}`);
+        }
+        const data = await response.json();
+        this._datasetKey = alias;
+        this._datasetName = data.dataset?.name || alias;
+        this._version = data.version?.version || null;
+        this._goldens = [];
+        for (const entry of data.entries || []) {
+          this._goldens.push({
+            input: entry.input || "",
+            expectedOutput: entry.output,
+            systemMessage: entry.systemMessage,
+            metadata: entry.metadata
+          });
+        }
+        console.log(
+          `\u2713 Pulled dataset '${this._datasetName}' (version ${this._version}) with ${this._goldens.length} goldens`
+        );
+        return this;
+      }
+      /**
+       * Add a golden record manually.
+       * @param golden - A Golden object
+       * @returns Self for chaining
+       */
+      addGolden(golden) {
+        this._goldens.push(golden);
+        return this;
+      }
+      /**
+       * Add multiple golden records.
+       * @param goldens - Array of Golden objects
+       * @returns Self for chaining
+       */
+      addGoldens(goldens) {
+        this._goldens.push(...goldens);
+        return this;
+      }
+      /**
+       * Add a test case with actual LLM output.
+       * @param testCase - An LLMTestCase object
+       * @returns Self for chaining
+       */
+      addTestCase(testCase) {
+        this._testCases.push(testCase);
+        return this;
+      }
+      /**
+       * Add multiple test cases.
+       * @param testCases - Array of LLMTestCase objects
+       * @returns Self for chaining
+       */
+      addTestCases(testCases) {
+        this._testCases.push(...testCases);
+        return this;
+      }
+      /**
+       * Automatically generate test cases by running all goldens through your LLM app.
+       *
+       * @param llmApp - A callable that takes messages and returns response
+       * @param options - Configuration options
+       * @returns Self for chaining
+       */
+      async generateTestCases(llmApp, options = {}) {
+        const { includeContext = false } = options;
+        console.log(`Generating test cases for ${this._goldens.length} goldens...`);
+        for (let i = 0; i < this._goldens.length; i++) {
+          const golden = this._goldens[i];
+          const messages = [];
+          if (golden.systemMessage) {
+            messages.push({ role: "system", content: golden.systemMessage });
+          }
+          messages.push({ role: "user", content: golden.input });
+          const response = await llmApp(messages);
+          const testCase = {
+            input: golden.input,
+            actualOutput: response.content,
+            expectedOutput: golden.expectedOutput,
+            systemMessage: golden.systemMessage,
+            context: includeContext ? response.context : golden.context,
+            metadata: golden.metadata
+          };
+          this._testCases.push(testCase);
+          console.log(
+            `  [${i + 1}/${this._goldens.length}] Generated output for: ${golden.input.slice(0, 50)}...`
+          );
+        }
+        console.log(`\u2713 Generated ${this._testCases.length} test cases`);
+        return this;
+      }
+      /** Clear all test cases (useful for re-running with different LLM). */
+      clearTestCases() {
+        this._testCases = [];
+        return this;
+      }
+      /** Return the number of goldens. */
+      get length() {
+        return this._goldens.length;
+      }
+    };
   }
 });
@@ -707,9 +857,22 @@ async function evaluate(options) {
     name,
     description,
     verbose = true,
+    testCases,
     _skipUpload = false
   } = options;
-  const dataset = await resolveDataset(datasetInput);
+  let dataset;
+  if (testCases !== void 0 && testCases.length > 0) {
+    dataset = testCases.map((tc) => ({
+      input: tc.input,
+      output: tc.actualOutput,
+      systemMessage: tc.systemMessage,
+      metadata: tc.metadata
+    }));
+  } else if (datasetInput !== void 0) {
+    dataset = await resolveDataset(datasetInput);
+  } else {
+    throw new Error("Either 'dataset' or 'testCases' must be provided");
+  }
   for (const m of metrics) {
     if (typeof m === "string" && !AVAILABLE_METRICS.includes(m)) {
       throw new Error(
@@ -775,6 +938,9 @@ async function compareModels(options) {
     description,
     verbose = true
   } = options;
+  if (!datasetInput) {
+    throw new Error("'dataset' is required for compareModels()");
+  }
   const dataset = await resolveDataset(datasetInput);
   const results = {};
   if (includeProduction) {
@@ -2050,6 +2216,54 @@ function clearPromptContext() {
   promptContext = null;
 }
+// src/trace/wrappers/shared-utils.ts
+function sanitizeMetadataOnly(key, value) {
+  const contentKeys = [
+    "text",
+    "content",
+    "message",
+    "messages",
+    "object",
+    "prompt",
+    "system",
+    "input",
+    "output",
+    "response",
+    "toolCalls",
+    "toolResults",
+    "steps",
+    "reasoning",
+    "rawResponse",
+    "rawCall",
+    "body",
+    "candidates",
+    "parts"
+  ];
+  if (contentKeys.includes(key)) {
+    if (typeof value === "string") {
+      return `[content omitted: ${value.length} chars]`;
+    }
+    if (Array.isArray(value)) {
+      return `[content omitted: ${value.length} items]`;
+    }
+    if (typeof value === "object" && value !== null) {
+      return "[content omitted]";
+    }
+  }
+  if (typeof value === "string") {
+    if (value.startsWith("data:image/")) {
+      return "[base64 image omitted]";
+    }
+    if (value.length > 1e3) {
+      return `[large string omitted: ${value.length} chars]`;
+    }
+  }
+  if (value instanceof Uint8Array || value && value.type === "Buffer") {
+    return "[binary data omitted]";
+  }
+  return value;
+}
 // src/trace/wrappers/openai.ts
 function wrapOpenAI(client, sessionCtx) {
   const originalCreate = client.chat.completions.create.bind(
@@ -2097,6 +2311,13 @@ function wrapOpenAI(client, sessionCtx) {
       if (response?.usage) {
         attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          response,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       const waterfallTimings = {
         requestStart: 0,
         requestEnd: endTime - startTime,
@@ -2223,6 +2444,13 @@ function wrapAnthropic(client, sessionCtx) {
       if (response?.usage) {
         attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          response,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       const waterfallTimings = {
         requestStart: 0,
         requestEnd: endTime - startTime,
@@ -2343,6 +2571,13 @@ function wrapGoogleAI(model, sessionCtx) {
       if (result?.usageMetadata) {
         attributes["fallom.raw.usage"] = JSON.stringify(result.usageMetadata);
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          result,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       const waterfallTimings = {
         requestStart: 0,
         requestEnd: endTime - startTime,
@@ -2539,6 +2774,13 @@ function createGenerateTextWrapper(aiModule, sessionCtx, debug = false) {
           result.experimental_providerMetadata
         );
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          result,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       const totalDurationMs = endTime - startTime;
       const sortedToolTimings = Array.from(toolTimings.values()).sort(
         (a, b) => a.startTime - b.startTime
@@ -2867,6 +3109,10 @@ function createStreamTextWrapper(aiModule, sessionCtx, debug = false) {
           if (firstTokenTime) {
             attributes["fallom.time_to_first_token_ms"] = firstTokenTime - startTime;
           }
+          try {
+            attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
+          } catch {
+          }
           const totalDurationMs = endTime - startTime;
           const sortedToolTimings = Array.from(toolTimings.values()).sort(
             (a, b) => a.startTime - b.startTime
@@ -3072,6 +3318,10 @@ function createGenerateObjectWrapper(aiModule, sessionCtx, debug = false) {
           result.experimental_providerMetadata
         );
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
+      } catch {
+      }
       const promptCtx = getPromptContext();
       sendTrace({
         config_key: ctx.configKey,
@@ -3191,6 +3441,10 @@ function createStreamObjectWrapper(aiModule, sessionCtx, debug = false) {
         if (providerMetadata) {
           attributes["fallom.raw.providerMetadata"] = JSON.stringify(providerMetadata);
         }
+        try {
+          attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
+        } catch {
+        }
         const promptCtx = getPromptContext();
         sendTrace({
           config_key: ctx.configKey,
@@ -3287,6 +3541,13 @@ function wrapMastraAgent(agent, sessionCtx) {
         attributes["fallom.raw.request"] = JSON.stringify(input);
         attributes["fallom.raw.response"] = JSON.stringify(result);
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          result,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       sendTrace({
         config_key: ctx.configKey,
         session_id: ctx.sessionId,
@@ -3543,6 +3804,7 @@ var evals_exports = {};
 __export(evals_exports, {
   AVAILABLE_METRICS: () => AVAILABLE_METRICS,
   DEFAULT_JUDGE_MODEL: () => DEFAULT_JUDGE_MODEL,
+  EvaluationDataset: () => EvaluationDataset,
   METRIC_PROMPTS: () => METRIC_PROMPTS,
   compareModels: () => compareModels,
   createCustomModel: () => createCustomModel,

package/dist/index.mjs CHANGED Viewed

@@ -5,6 +5,7 @@ import {
 import {
   AVAILABLE_METRICS,
   DEFAULT_JUDGE_MODEL,
+  EvaluationDataset,
   METRIC_PROMPTS,
   compareModels,
   createCustomModel,
@@ -18,7 +19,7 @@ import {
   init as init2,
   isCustomMetric,
   uploadResultsPublic
-} from "./chunk-2NGJF2JZ.mjs";
+} from "./chunk-3HBKT4HK.mjs";
 import {
   __export
 } from "./chunk-7P6ASYW6.mjs";
@@ -1055,6 +1056,54 @@ function clearPromptContext() {
   promptContext = null;
 }
+// src/trace/wrappers/shared-utils.ts
+function sanitizeMetadataOnly(key, value) {
+  const contentKeys = [
+    "text",
+    "content",
+    "message",
+    "messages",
+    "object",
+    "prompt",
+    "system",
+    "input",
+    "output",
+    "response",
+    "toolCalls",
+    "toolResults",
+    "steps",
+    "reasoning",
+    "rawResponse",
+    "rawCall",
+    "body",
+    "candidates",
+    "parts"
+  ];
+  if (contentKeys.includes(key)) {
+    if (typeof value === "string") {
+      return `[content omitted: ${value.length} chars]`;
+    }
+    if (Array.isArray(value)) {
+      return `[content omitted: ${value.length} items]`;
+    }
+    if (typeof value === "object" && value !== null) {
+      return "[content omitted]";
+    }
+  }
+  if (typeof value === "string") {
+    if (value.startsWith("data:image/")) {
+      return "[base64 image omitted]";
+    }
+    if (value.length > 1e3) {
+      return `[large string omitted: ${value.length} chars]`;
+    }
+  }
+  if (value instanceof Uint8Array || value && value.type === "Buffer") {
+    return "[binary data omitted]";
+  }
+  return value;
+}
 // src/trace/wrappers/openai.ts
 function wrapOpenAI(client, sessionCtx) {
   const originalCreate = client.chat.completions.create.bind(
@@ -1102,6 +1151,13 @@ function wrapOpenAI(client, sessionCtx) {
       if (response?.usage) {
         attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          response,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       const waterfallTimings = {
         requestStart: 0,
         requestEnd: endTime - startTime,
@@ -1228,6 +1284,13 @@ function wrapAnthropic(client, sessionCtx) {
       if (response?.usage) {
         attributes["fallom.raw.usage"] = JSON.stringify(response.usage);
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          response,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       const waterfallTimings = {
         requestStart: 0,
         requestEnd: endTime - startTime,
@@ -1348,6 +1411,13 @@ function wrapGoogleAI(model, sessionCtx) {
       if (result?.usageMetadata) {
         attributes["fallom.raw.usage"] = JSON.stringify(result.usageMetadata);
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          result,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       const waterfallTimings = {
         requestStart: 0,
         requestEnd: endTime - startTime,
@@ -1544,6 +1614,13 @@ function createGenerateTextWrapper(aiModule, sessionCtx, debug = false) {
           result.experimental_providerMetadata
         );
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          result,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       const totalDurationMs = endTime - startTime;
       const sortedToolTimings = Array.from(toolTimings.values()).sort(
         (a, b) => a.startTime - b.startTime
@@ -1872,6 +1949,10 @@ function createStreamTextWrapper(aiModule, sessionCtx, debug = false) {
           if (firstTokenTime) {
             attributes["fallom.time_to_first_token_ms"] = firstTokenTime - startTime;
           }
+          try {
+            attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
+          } catch {
+          }
           const totalDurationMs = endTime - startTime;
           const sortedToolTimings = Array.from(toolTimings.values()).sort(
             (a, b) => a.startTime - b.startTime
@@ -2077,6 +2158,10 @@ function createGenerateObjectWrapper(aiModule, sessionCtx, debug = false) {
           result.experimental_providerMetadata
         );
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
+      } catch {
+      }
       const promptCtx = getPromptContext();
       sendTrace({
         config_key: ctx.configKey,
@@ -2196,6 +2281,10 @@ function createStreamObjectWrapper(aiModule, sessionCtx, debug = false) {
         if (providerMetadata) {
           attributes["fallom.raw.providerMetadata"] = JSON.stringify(providerMetadata);
         }
+        try {
+          attributes["fallom.raw.metadata"] = JSON.stringify(result, sanitizeMetadataOnly);
+        } catch {
+        }
         const promptCtx = getPromptContext();
         sendTrace({
           config_key: ctx.configKey,
@@ -2292,6 +2381,13 @@ function wrapMastraAgent(agent, sessionCtx) {
         attributes["fallom.raw.request"] = JSON.stringify(input);
         attributes["fallom.raw.response"] = JSON.stringify(result);
       }
+      try {
+        attributes["fallom.raw.metadata"] = JSON.stringify(
+          result,
+          sanitizeMetadataOnly
+        );
+      } catch {
+      }
       sendTrace({
         config_key: ctx.configKey,
         session_id: ctx.sessionId,
@@ -2545,6 +2641,7 @@ var evals_exports = {};
 __export(evals_exports, {
   AVAILABLE_METRICS: () => AVAILABLE_METRICS,
   DEFAULT_JUDGE_MODEL: () => DEFAULT_JUDGE_MODEL,
+  EvaluationDataset: () => EvaluationDataset,
   METRIC_PROMPTS: () => METRIC_PROMPTS,
   compareModels: () => compareModels,
   createCustomModel: () => createCustomModel,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fallom/trace",
-  "version": "0.2.15",
+  "version": "0.2.17",
   "description": "Model A/B testing and tracing for LLM applications. Zero latency, production-ready.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",