npm - @langfuse/client - Versions diffs - 4.0.0 → 4.1.0-alpha.1 - Mend

@langfuse/client 4.0.0 → 4.1.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -33,16 +33,18 @@ __export(index_exports, {
   ChatMessageType: () => ChatMessageType,
   ChatPromptClient: () => ChatPromptClient,
   DatasetManager: () => DatasetManager,
+  ExperimentManager: () => ExperimentManager,
   LangfuseClient: () => LangfuseClient,
   MediaManager: () => MediaManager,
   PromptManager: () => PromptManager,
   ScoreManager: () => ScoreManager,
-  TextPromptClient: () => TextPromptClient
+  TextPromptClient: () => TextPromptClient,
+  autoevalsToLangfuseEvaluator: () => autoevalsToLangfuseEvaluator
 });
 module.exports = __toCommonJS(index_exports);
 // src/LangfuseClient.ts
-var import_core5 = require("@langfuse/core");
+var import_core6 = require("@langfuse/core");
 // src/dataset/index.ts
 var DatasetManager = class {
@@ -53,44 +55,91 @@ var DatasetManager = class {
    * @internal
    */
   constructor(params) {
-    this.apiClient = params.apiClient;
+    this.langfuseClient = params.langfuseClient;
   }
   /**
-   * Retrieves a dataset by name along with all its items.
+   * Retrieves a dataset by name with all its items and experiment functionality.
    *
-   * This method automatically handles pagination to fetch all dataset items
-   * and enhances each item with a `link` function for easy experiment tracking.
+   * This method fetches a dataset and all its associated items, with support
+   * for automatic pagination to handle large datasets efficiently. The returned
+   * dataset object includes enhanced functionality for linking items to traces
+   * and running experiments directly on the dataset.
    *
    * @param name - The name of the dataset to retrieve
-   * @param options - Optional configuration for fetching
+   * @param options - Optional configuration for data fetching
    * @param options.fetchItemsPageSize - Number of items to fetch per page (default: 50)
+   * @returns Promise resolving to enhanced dataset with items, linking, and experiment capabilities
    *
-   * @returns Promise that resolves to the dataset with enhanced items
+   * @example Basic dataset retrieval
+   * ```typescript
+   * const dataset = await langfuse.dataset.get("my-evaluation-dataset");
+   * console.log(`Dataset ${dataset.name} has ${dataset.items.length} items`);
    *
-   * @example
+   * // Access dataset properties
+   * console.log(dataset.description);
+   * console.log(dataset.metadata);
+   * ```
+   *
+   * @example Working with dataset items
    * ```typescript
-   * const dataset = await langfuse.dataset.get("my-dataset");
+   * const dataset = await langfuse.dataset.get("qa-dataset");
    *
    * for (const item of dataset.items) {
-   *   // Use the item data for your experiment
-   *   const result = await processItem(item.input);
-   *
-   *   // Link the result to the dataset item
-   *   await item.link(
-   *     { otelSpan: currentSpan },
-   *     "experiment-run-1",
-   *     { description: "Testing new model" }
-   *   );
+   *   console.log("Question:", item.input);
+   *   console.log("Expected Answer:", item.expectedOutput);
+   *
+   *   // Each item has a link function for connecting to traces
+   *   // await item.link(span, "experiment-name");
    * }
    * ```
+   *
+   * @example Running experiments on datasets
+   * ```typescript
+   * const dataset = await langfuse.dataset.get("benchmark-dataset");
+   *
+   * const result = await dataset.runExperiment({
+   *   name: "GPT-4 Benchmark",
+   *   description: "Evaluating GPT-4 on our benchmark tasks",
+   *   task: async ({ input }) => {
+   *     const response = await openai.chat.completions.create({
+   *       model: "gpt-4",
+   *       messages: [{ role: "user", content: input }]
+   *     });
+   *     return response.choices[0].message.content;
+   *   },
+   *   evaluators: [
+   *     async ({ output, expectedOutput }) => ({
+   *       name: "exact_match",
+   *       value: output === expectedOutput ? 1 : 0
+   *     })
+   *   ]
+   * });
+   *
+   * console.log(await result.prettyPrint());
+   * ```
+   *
+   * @example Handling large datasets
+   * ```typescript
+   * // For very large datasets, use smaller page sizes
+   * const largeDataset = await langfuse.dataset.get(
+   *   "large-dataset",
+   *   { fetchItemsPageSize: 100 }
+   * );
+   * ```
+   *
+   * @throws {Error} If the dataset does not exist or cannot be accessed
+   * @see {@link FetchedDataset} for the complete return type specification
+   * @see {@link RunExperimentOnDataset} for experiment execution details
+   * @public
+   * @since 4.0.0
    */
   async get(name, options) {
     var _a;
-    const dataset = await this.apiClient.datasets.get(name);
+    const dataset = await this.langfuseClient.api.datasets.get(name);
     const items = [];
     let page = 1;
     while (true) {
-      const itemsResponse = await this.apiClient.datasetItems.list({
+      const itemsResponse = await this.langfuseClient.api.datasetItems.list({
         datasetName: name,
         limit: (_a = options == null ? void 0 : options.fetchItemsPageSize) != null ? _a : 50,
         page
@@ -101,12 +150,20 @@ var DatasetManager = class {
       }
       page++;
     }
+    const itemsWithLinkMethod = items.map((item) => ({
+      ...item,
+      link: this.createDatasetItemLinkFunction(item)
+    }));
+    const runExperiment = (params) => {
+      return this.langfuseClient.experiment.run({
+        data: items,
+        ...params
+      });
+    };
     const returnDataset = {
       ...dataset,
-      items: items.map((item) => ({
-        ...item,
-        link: this.createDatasetItemLinkFunction(item)
-      }))
+      items: itemsWithLinkMethod,
+      runExperiment
     };
     return returnDataset;
   }
@@ -119,7 +176,7 @@ var DatasetManager = class {
    */
   createDatasetItemLinkFunction(item) {
     const linkFunction = async (obj, runName, runArgs) => {
-      return await this.apiClient.datasetRunItems.create({
+      return await this.langfuseClient.api.datasetRunItems.create({
         runName,
         datasetItemId: item.id,
         traceId: obj.otelSpan.spanContext().traceId,
@@ -131,8 +188,508 @@ var DatasetManager = class {
   }
 };
-// src/media/index.ts
+// src/experiment/ExperimentManager.ts
 var import_core = require("@langfuse/core");
+var import_tracing = require("@langfuse/tracing");
+var import_api = require("@opentelemetry/api");
+var ExperimentManager = class {
+  /**
+   * Creates a new ExperimentManager instance.
+   *
+   * @param params - Configuration object
+   * @param params.langfuseClient - The Langfuse client instance for API communication
+   * @internal
+   */
+  constructor(params) {
+    this.langfuseClient = params.langfuseClient;
+  }
+  /**
+   * Gets the global logger instance for experiment-related logging.
+   *
+   * @returns The global logger instance
+   * @internal
+   */
+  get logger() {
+    return (0, import_core.getGlobalLogger)();
+  }
+  /**
+   * Executes an experiment by running a task on each data item and evaluating the results.
+   *
+   * This method orchestrates the complete experiment lifecycle:
+   * 1. Executes the task function on each data item with proper tracing
+   * 2. Runs item-level evaluators on each task output
+   * 3. Executes run-level evaluators on the complete result set
+   * 4. Links results to dataset runs (for Langfuse datasets)
+   * 5. Stores all scores and traces in Langfuse
+   *
+   * @param config - The experiment configuration
+   * @param config.name - Human-readable name for the experiment
+   * @param config.description - Optional description of the experiment's purpose
+   * @param config.metadata - Optional metadata to attach to the experiment run
+   * @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
+   * @param config.task - Function that processes each data item and returns output
+   * @param config.evaluators - Optional array of functions to evaluate each item's output
+   * @param config.runEvaluators - Optional array of functions to evaluate the entire run
+   * @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
+   *
+   * @returns Promise that resolves to experiment results including:
+   *   - itemResults: Results for each processed data item
+   *   - runEvaluations: Results from run-level evaluators
+   *   - datasetRunId: ID of the dataset run (if using Langfuse datasets)
+   *   - prettyPrint: Function to format and display results
+   *
+   * @throws {Error} When task execution fails and cannot be handled gracefully
+   * @throws {Error} When required evaluators fail critically
+   *
+   * @example Simple experiment
+   * ```typescript
+   * const result = await langfuse.experiment.run({
+   *   name: "Translation Quality Test",
+   *   data: [
+   *     { input: "Hello world", expectedOutput: "Hola mundo" },
+   *     { input: "Good morning", expectedOutput: "Buenos días" }
+   *   ],
+   *   task: async ({ input }) => translateText(input, 'es'),
+   *   evaluators: [
+   *     async ({ output, expectedOutput }) => ({
+   *       name: "bleu_score",
+   *       value: calculateBleuScore(output, expectedOutput)
+   *     })
+   *   ]
+   * });
+   * ```
+   *
+   * @example Experiment with concurrency control
+   * ```typescript
+   * const result = await langfuse.experiment.run({
+   *   name: "Large Scale Evaluation",
+   *   data: largeBatchOfItems,
+   *   task: expensiveModelCall,
+   *   maxConcurrency: 5, // Process max 5 items simultaneously
+   *   evaluators: [myEvaluator],
+   *   runEvaluators: [
+   *     async ({ itemResults }) => ({
+   *       name: "average_score",
+   *       value: itemResults.reduce((acc, r) => acc + r.evaluations[0].value, 0) / itemResults.length
+   *     })
+   *   ]
+   * });
+   * ```
+   *
+   * @see {@link ExperimentParams} for detailed parameter documentation
+   * @see {@link ExperimentResult} for detailed return value documentation
+   * @see {@link Evaluator} for evaluator function specifications
+   * @see {@link RunEvaluator} for run evaluator function specifications
+   *
+   * @public
+   */
+  async run(config) {
+    const {
+      data,
+      evaluators,
+      task,
+      name,
+      description,
+      metadata,
+      maxConcurrency: batchSize = Infinity,
+      runEvaluators
+    } = config;
+    if (!this.isOtelRegistered()) {
+      this.logger.warn(
+        "OpenTelemetry has not been set up. Traces will not be sent to Langfuse.See our docs on how to set up OpenTelemetry: https://langfuse.com/docs/observability/sdk/typescript/setup#tracing-setup"
+      );
+    }
+    const itemResults = [];
+    for (let i = 0; i < data.length; i += batchSize) {
+      const batch = data.slice(i, i + batchSize);
+      const promises = batch.map(async (item) => {
+        return this.runItem({
+          item,
+          evaluators,
+          task,
+          experimentName: name,
+          experimentDescription: description,
+          experimentMetadata: metadata
+        });
+      });
+      const results = await Promise.all(promises);
+      itemResults.push(...results);
+    }
+    const datasetRunId = itemResults.length > 0 ? itemResults[0].datasetRunId : void 0;
+    let datasetRunUrl = void 0;
+    if (datasetRunId && data.length > 0 && "datasetId" in data[0]) {
+      const datasetId = data[0].datasetId;
+      const projectUrl = (await this.langfuseClient.getTraceUrl("mock")).split(
+        "/traces"
+      )[0];
+      datasetRunUrl = `${projectUrl}/datasets/${datasetId}/runs/${datasetRunId}`;
+    }
+    let runEvaluations = [];
+    if (runEvaluators && (runEvaluators == null ? void 0 : runEvaluators.length) > 0) {
+      const promises = runEvaluators.map(async (runEvaluator) => {
+        return runEvaluator({ itemResults }).then((result) => {
+          return Array.isArray(result) ? result : [result];
+        }).catch((err) => {
+          this.logger.error("Run evaluator failed with error ", err);
+          throw err;
+        });
+      });
+      runEvaluations = (await Promise.allSettled(promises)).reduce(
+        (acc, settledPromise) => {
+          if (settledPromise.status === "fulfilled") {
+            acc.push(...settledPromise.value);
+          }
+          return acc;
+        },
+        []
+      );
+      if (datasetRunId) {
+        runEvaluations.forEach(
+          (runEval) => this.langfuseClient.score.create({ datasetRunId, ...runEval })
+        );
+      }
+    }
+    await this.langfuseClient.score.flush();
+    return {
+      itemResults,
+      datasetRunId,
+      datasetRunUrl,
+      runEvaluations,
+      prettyPrint: async (options) => {
+        var _a;
+        return await this.prettyPrintResults({
+          datasetRunUrl,
+          itemResults,
+          originalData: data,
+          runEvaluations,
+          name: config.name,
+          description: config.description,
+          includeItemResults: (_a = options == null ? void 0 : options.includeItemResults) != null ? _a : false
+        });
+      }
+    };
+  }
+  /**
+   * Executes the task and evaluators for a single data item.
+   *
+   * This method handles the complete processing pipeline for one data item:
+   * 1. Executes the task within a traced observation span
+   * 2. Links the result to a dataset run (if applicable)
+   * 3. Runs all item-level evaluators on the output
+   * 4. Stores evaluation scores in Langfuse
+   * 5. Handles errors gracefully by continuing with remaining evaluators
+   *
+   * @param params - Parameters for item execution
+   * @param params.experimentName - Name of the parent experiment
+   * @param params.experimentDescription - Description of the parent experiment
+   * @param params.experimentMetadata - Metadata for the parent experiment
+   * @param params.item - The data item to process
+   * @param params.task - The task function to execute
+   * @param params.evaluators - Optional evaluators to run on the output
+   *
+   * @returns Promise resolving to the item result with output, evaluations, and trace info
+   *
+   * @throws {Error} When task execution fails (propagated from task function)
+   *
+   * @internal
+   */
+  async runItem(params) {
+    const { item, evaluators = [], task, experimentMetadata = {} } = params;
+    const { output, traceId } = await (0, import_tracing.startActiveObservation)(
+      "experiment-item-run",
+      async (span) => {
+        var _a;
+        const output2 = await task(item);
+        span.update({
+          input: item.input,
+          output: output2,
+          metadata: {
+            experimentName: params.experimentName,
+            ...experimentMetadata,
+            ...(_a = item.metadata) != null ? _a : {},
+            ..."id" in item && "datasetId" in item ? {
+              datasetId: item["datasetId"],
+              datasetItemId: item["id"]
+            } : {}
+          }
+        });
+        return { output: output2, traceId: span.traceId };
+      }
+    );
+    let datasetRunId = void 0;
+    if ("id" in item) {
+      await this.langfuseClient.api.datasetRunItems.create({
+        runName: params.experimentName,
+        runDescription: params.experimentDescription,
+        metadata: params.experimentMetadata,
+        datasetItemId: item.id,
+        traceId
+      }).then((result) => {
+        datasetRunId = result.datasetRunId;
+      }).catch(
+        (err) => this.logger.error("Linking dataset run item failed", err)
+      );
+    }
+    const evalPromises = evaluators.map(
+      async (evaluator) => {
+        const params2 = {
+          input: item.input,
+          expectedOutput: item.expectedOutput,
+          output
+        };
+        return evaluator(params2).then((result) => {
+          return Array.isArray(result) ? result : [result];
+        }).catch((err) => {
+          this.logger.error(
+            `Evaluator '${evaluator.name}' failed for params
+${JSON.stringify(params2)}
+ with error: ${err}`
+          );
+          throw err;
+        });
+      }
+    );
+    const evals = (await Promise.allSettled(evalPromises)).reduce(
+      (acc, promiseResult) => {
+        if (promiseResult.status === "fulfilled") {
+          acc.push(...promiseResult.value.flat());
+        }
+        return acc;
+      },
+      []
+    );
+    for (const ev of evals) {
+      this.langfuseClient.score.create({
+        traceId,
+        name: ev.name,
+        comment: ev.comment,
+        value: ev.value,
+        metadata: ev.metadata,
+        dataType: ev.dataType
+      });
+    }
+    return {
+      output,
+      evaluations: evals,
+      traceId,
+      datasetRunId,
+      item
+    };
+  }
+  /**
+   * Formats experiment results into a human-readable string representation.
+   *
+   * Creates a comprehensive, nicely formatted summary of the experiment including:
+   * - Individual item results with inputs, outputs, expected values, and scores
+   * - Dataset item and trace links (when available)
+   * - Experiment overview with aggregate statistics
+   * - Average scores across all evaluations
+   * - Run-level evaluation results
+   * - Links to dataset runs in the Langfuse UI
+   *
+   * @param params - Formatting parameters
+   * @param params.datasetRunUrl - Optional URL to the dataset run in Langfuse UI
+   * @param params.itemResults - Results from processing each data item
+   * @param params.originalData - The original input data items
+   * @param params.runEvaluations - Results from run-level evaluators
+   * @param params.name - Name of the experiment
+   * @param params.description - Optional description of the experiment
+   * @param params.includeItemResults - Whether to include individual item details (default: false)
+   *
+   * @returns Promise resolving to formatted string representation
+   *
+   * @example Output format
+   * ```
+   * 1. Item 1:
+   *    Input:    What is the capital of France?
+   *    Expected: Paris
+   *    Actual:   Paris
+   *    Scores:
+   *      • exact_match: 1.000
+   *      • similarity: 0.95
+   *        💭 Very close match with expected output
+   *
+   *    Dataset Item:
+   *    https://cloud.langfuse.com/project/123/datasets/456/items/789
+   *
+   *    Trace:
+   *    https://cloud.langfuse.com/project/123/traces/abc123
+   *
+   * ──────────────────────────────────────────────────
+   * 📊 Translation Quality Test - Testing model accuracy
+   * 2 items
+   * Evaluations:
+   *   • exact_match
+   *   • similarity
+   *
+   * Average Scores:
+   *   • exact_match: 0.850
+   *   • similarity: 0.923
+   *
+   * Run Evaluations:
+   *   • overall_quality: 0.887
+   *     💭 Good performance with room for improvement
+   *
+   * 🔗 Dataset Run:
+   *    https://cloud.langfuse.com/project/123/datasets/456/runs/def456
+   * ```
+   *
+   * @internal
+   */
+  async prettyPrintResults(params) {
+    var _a, _b;
+    const {
+      itemResults,
+      originalData,
+      runEvaluations,
+      name,
+      description,
+      includeItemResults = false
+    } = params;
+    if (itemResults.length === 0) {
+      return "No experiment results to display.";
+    }
+    let output = "";
+    if (includeItemResults) {
+      for (let index = 0; index < itemResults.length; index++) {
+        const result = itemResults[index];
+        const originalItem = originalData[index];
+        output += `
+${index + 1}. Item ${index + 1}:
+`;
+        if ((originalItem == null ? void 0 : originalItem.input) !== void 0) {
+          output += `   Input:    ${this.formatValue(originalItem.input)}
+`;
+        }
+        const expectedOutput = (_b = (_a = originalItem == null ? void 0 : originalItem.expectedOutput) != null ? _a : result.expectedOutput) != null ? _b : null;
+        output += `   Expected: ${expectedOutput !== null ? this.formatValue(expectedOutput) : "null"}
+`;
+        output += `   Actual:   ${this.formatValue(result.output)}
+`;
+        if (result.evaluations.length > 0) {
+          output += `   Scores:
+`;
+          result.evaluations.forEach((evaluation) => {
+            const score = typeof evaluation.value === "number" ? evaluation.value.toFixed(3) : evaluation.value;
+            output += `     \u2022 ${evaluation.name}: ${score}`;
+            if (evaluation.comment) {
+              output += `
+       \u{1F4AD} ${evaluation.comment}`;
+            }
+            output += "\n";
+          });
+        }
+        if (originalItem && "id" in originalItem && "datasetId" in originalItem) {
+          const projectUrl = (await this.langfuseClient.getTraceUrl("mock")).split("/traces")[0];
+          const datasetItemUrl = `${projectUrl}/datasets/${originalItem.datasetId}/items/${originalItem.id}`;
+          output += `
+   Dataset Item:
+   ${datasetItemUrl}
+`;
+        }
+        if (result.traceId) {
+          const traceUrl = await this.langfuseClient.getTraceUrl(
+            result.traceId
+          );
+          output += `
+   Trace:
+   ${traceUrl}
+`;
+        }
+      }
+    } else {
+      output += `Individual Results: Hidden (${itemResults.length} items)
+`;
+      output += "\u{1F4A1} Call prettyPrint({ includeItemResults: true }) to view them\n";
+    }
+    const totalItems = itemResults.length;
+    const evaluationNames = new Set(
+      itemResults.flatMap((r) => r.evaluations.map((e) => e.name))
+    );
+    output += `
+${"\u2500".repeat(50)}
+`;
+    output += `\u{1F4CA} ${name}`;
+    if (description) {
+      output += ` - ${description}`;
+    }
+    output += `
+${totalItems} items`;
+    if (evaluationNames.size > 0) {
+      output += `
+Evaluations:`;
+      Array.from(evaluationNames).forEach((evalName) => {
+        output += `
+  \u2022 ${evalName}`;
+      });
+      output += "\n";
+    }
+    if (evaluationNames.size > 0) {
+      output += `
+Average Scores:`;
+      for (const evalName of evaluationNames) {
+        const scores = itemResults.flatMap((r) => r.evaluations).filter((e) => e.name === evalName && typeof e.value === "number").map((e) => e.value);
+        if (scores.length > 0) {
+          const avg = scores.reduce((a, b) => a + b, 0) / scores.length;
+          output += `
+  \u2022 ${evalName}: ${avg.toFixed(3)}`;
+        }
+      }
+      output += "\n";
+    }
+    if (runEvaluations.length > 0) {
+      output += `
+Run Evaluations:`;
+      runEvaluations.forEach((runEval) => {
+        const score = typeof runEval.value === "number" ? runEval.value.toFixed(3) : runEval.value;
+        output += `
+  \u2022 ${runEval.name}: ${score}`;
+        if (runEval.comment) {
+          output += `
+    \u{1F4AD} ${runEval.comment}`;
+        }
+      });
+      output += "\n";
+    }
+    if (params.datasetRunUrl) {
+      output += `
+\u{1F517} Dataset Run:
+   ${params.datasetRunUrl}`;
+    }
+    return output;
+  }
+  /**
+   * Formats a value for display in pretty-printed output.
+   *
+   * Handles different value types appropriately:
+   * - Strings: Truncates long strings to 50 characters with "..."
+   * - Objects/Arrays: Converts to JSON string representation
+   * - Primitives: Uses toString() representation
+   *
+   * @param value - The value to format
+   * @returns Formatted string representation suitable for display
+   *
+   * @internal
+   */
+  formatValue(value) {
+    if (typeof value === "string") {
+      return value.length > 50 ? `${value.substring(0, 47)}...` : value;
+    }
+    return JSON.stringify(value);
+  }
+  isOtelRegistered() {
+    let tracerProvider = import_api.trace.getTracerProvider();
+    if (tracerProvider instanceof import_api.ProxyTracerProvider) {
+      tracerProvider = tracerProvider.getDelegate();
+    }
+    return tracerProvider.constructor.name !== "NoopTracerProvider";
+  }
+};
+// src/media/index.ts
+var import_core2 = require("@langfuse/core");
 var MediaManager = class _MediaManager {
   /**
    * Creates a new MediaManager instance.
@@ -210,14 +767,14 @@ var MediaManager = class _MediaManager {
               const uint8Content = new Uint8Array(
                 await mediaContent.arrayBuffer()
               );
-              const base64MediaContent = (0, import_core.bytesToBase64)(uint8Content);
+              const base64MediaContent = (0, import_core2.bytesToBase64)(uint8Content);
               const base64DataUri = `data:${mediaData.contentType};base64,${base64MediaContent}`;
               referenceStringToMediaContentMap.set(
                 referenceString,
                 base64DataUri
               );
             } catch (error) {
-              (0, import_core.getGlobalLogger)().warn(
+              (0, import_core2.getGlobalLogger)().warn(
                 "Error fetching media content for reference string",
                 referenceString,
                 error
@@ -293,10 +850,10 @@ var MediaManager = class _MediaManager {
 };
 // src/prompt/promptManager.ts
-var import_core3 = require("@langfuse/core");
+var import_core4 = require("@langfuse/core");
 // src/prompt/promptCache.ts
-var import_core2 = require("@langfuse/core");
+var import_core3 = require("@langfuse/core");
 var DEFAULT_PROMPT_CACHE_TTL_SECONDS = 60;
 var LangfusePromptCacheItem = class {
   constructor(value, ttlSeconds) {
@@ -348,7 +905,7 @@ var LangfusePromptCache = class {
     return this._refreshingKeys.has(key);
   }
   invalidate(promptName) {
-    (0, import_core2.getGlobalLogger)().debug(
+    (0, import_core3.getGlobalLogger)().debug(
       "Invalidating cache keys for",
       promptName,
       this._cache.keys()
@@ -692,7 +1249,7 @@ var PromptManager = class {
     this.cache = new LangfusePromptCache();
   }
   get logger() {
-    return (0, import_core3.getGlobalLogger)();
+    return (0, import_core4.getGlobalLogger)();
   }
   /**
    * Creates a new prompt in Langfuse.
@@ -919,8 +1476,8 @@ var PromptManager = class {
 };
 // src/score/index.ts
-var import_core4 = require("@langfuse/core");
-var import_api = require("@opentelemetry/api");
+var import_core5 = require("@langfuse/core");
+var import_api2 = require("@opentelemetry/api");
 var MAX_QUEUE_SIZE = 1e5;
 var MAX_BATCH_SIZE = 100;
 var ScoreManager = class {
@@ -935,13 +1492,13 @@ var ScoreManager = class {
     this.flushPromise = null;
     this.flushTimer = null;
     this.apiClient = params.apiClient;
-    const envFlushAtCount = (0, import_core4.getEnv)("LANGFUSE_FLUSH_AT");
-    const envFlushIntervalSeconds = (0, import_core4.getEnv)("LANGFUSE_FLUSH_INTERVAL");
+    const envFlushAtCount = (0, import_core5.getEnv)("LANGFUSE_FLUSH_AT");
+    const envFlushIntervalSeconds = (0, import_core5.getEnv)("LANGFUSE_FLUSH_INTERVAL");
     this.flushAtCount = envFlushAtCount ? Number(envFlushAtCount) : 10;
     this.flushIntervalSeconds = envFlushIntervalSeconds ? Number(envFlushIntervalSeconds) : 1;
   }
   get logger() {
-    return (0, import_core4.getGlobalLogger)();
+    return (0, import_core5.getGlobalLogger)();
   }
   /**
    * Creates a new score event and adds it to the processing queue.
@@ -966,11 +1523,11 @@ var ScoreManager = class {
     var _a, _b;
     const scoreData = {
       ...data,
-      id: (_a = data.id) != null ? _a : (0, import_core4.generateUUID)(),
-      environment: (_b = data.environment) != null ? _b : (0, import_core4.getEnv)("LANGFUSE_TRACING_ENVIRONMENT")
+      id: (_a = data.id) != null ? _a : (0, import_core5.generateUUID)(),
+      environment: (_b = data.environment) != null ? _b : (0, import_core5.getEnv)("LANGFUSE_TRACING_ENVIRONMENT")
     };
     const scoreIngestionEvent = {
-      id: (0, import_core4.generateUUID)(),
+      id: (0, import_core5.generateUUID)(),
       type: "score-create",
       timestamp: (/* @__PURE__ */ new Date()).toISOString(),
       body: scoreData
@@ -982,10 +1539,14 @@ var ScoreManager = class {
       return;
     }
     this.eventQueue.push(scoreIngestionEvent);
+    this.logger.debug(
+      "Added score event to queue:\n",
+      JSON.stringify(scoreIngestionEvent, null, 2)
+    );
     if (this.eventQueue.length >= this.flushAtCount) {
       this.flushPromise = this.flush();
     } else if (!this.flushTimer) {
-      this.flushTimer = (0, import_core4.safeSetTimeout)(() => {
+      this.flushTimer = (0, import_core5.safeSetTimeout)(() => {
         this.flushPromise = this.flush();
       }, this.flushIntervalSeconds * 1e3);
     }
@@ -1068,7 +1629,7 @@ var ScoreManager = class {
    * ```
    */
   activeObservation(data) {
-    const currentOtelSpan = import_api.trace.getActiveSpan();
+    const currentOtelSpan = import_api2.trace.getActiveSpan();
     if (!currentOtelSpan) {
       this.logger.warn("No active span in context to score.");
       return;
@@ -1104,7 +1665,7 @@ var ScoreManager = class {
    * ```
    */
   activeTrace(data) {
-    const currentOtelSpan = import_api.trace.getActiveSpan();
+    const currentOtelSpan = import_api2.trace.getActiveSpan();
     if (!currentOtelSpan) {
       this.logger.warn("No active span in context to score trace.");
       return;
@@ -1204,10 +1765,10 @@ var LangfuseClient = class {
   constructor(params) {
     this.projectId = null;
     var _a, _b, _c, _d, _e, _f, _g;
-    const logger = (0, import_core5.getGlobalLogger)();
-    const publicKey = (_a = params == null ? void 0 : params.publicKey) != null ? _a : (0, import_core5.getEnv)("LANGFUSE_PUBLIC_KEY");
-    const secretKey = (_b = params == null ? void 0 : params.secretKey) != null ? _b : (0, import_core5.getEnv)("LANGFUSE_SECRET_KEY");
-    this.baseUrl = (_e = (_d = (_c = params == null ? void 0 : params.baseUrl) != null ? _c : (0, import_core5.getEnv)("LANGFUSE_BASE_URL")) != null ? _d : (0, import_core5.getEnv)("LANGFUSE_BASEURL")) != null ? _e : (
+    const logger = (0, import_core6.getGlobalLogger)();
+    const publicKey = (_a = params == null ? void 0 : params.publicKey) != null ? _a : (0, import_core6.getEnv)("LANGFUSE_PUBLIC_KEY");
+    const secretKey = (_b = params == null ? void 0 : params.secretKey) != null ? _b : (0, import_core6.getEnv)("LANGFUSE_SECRET_KEY");
+    this.baseUrl = (_e = (_d = (_c = params == null ? void 0 : params.baseUrl) != null ? _c : (0, import_core6.getEnv)("LANGFUSE_BASE_URL")) != null ? _d : (0, import_core6.getEnv)("LANGFUSE_BASEURL")) != null ? _e : (
       // legacy v2
       "https://cloud.langfuse.com"
     );
@@ -1221,13 +1782,13 @@ var LangfuseClient = class {
         "No secret key provided in constructor or as LANGFUSE_SECRET_KEY env var. Client operations will fail."
       );
     }
-    const timeoutSeconds = (_g = params == null ? void 0 : params.timeout) != null ? _g : Number((_f = (0, import_core5.getEnv)("LANGFUSE_TIMEOUT")) != null ? _f : 5);
-    this.api = new import_core5.LangfuseAPIClient({
+    const timeoutSeconds = (_g = params == null ? void 0 : params.timeout) != null ? _g : Number((_f = (0, import_core6.getEnv)("LANGFUSE_TIMEOUT")) != null ? _f : 5);
+    this.api = new import_core6.LangfuseAPIClient({
       baseUrl: this.baseUrl,
       username: publicKey,
       password: secretKey,
       xLangfusePublicKey: publicKey,
-      xLangfuseSdkVersion: import_core5.LANGFUSE_SDK_VERSION,
+      xLangfuseSdkVersion: import_core6.LANGFUSE_SDK_VERSION,
       xLangfuseSdkName: "javascript",
       environment: "",
       // noop as baseUrl is set
@@ -1239,9 +1800,10 @@ var LangfuseClient = class {
       timeoutSeconds
     });
     this.prompt = new PromptManager({ apiClient: this.api });
-    this.dataset = new DatasetManager({ apiClient: this.api });
+    this.dataset = new DatasetManager({ langfuseClient: this });
     this.score = new ScoreManager({ apiClient: this.api });
     this.media = new MediaManager({ apiClient: this.api });
+    this.experiment = new ExperimentManager({ langfuseClient: this });
     this.getPrompt = this.prompt.get.bind(this.prompt);
     this.createPrompt = this.prompt.create.bind(this.prompt);
     this.updatePrompt = this.prompt.update.bind(this.prompt);
@@ -1316,15 +1878,36 @@ var LangfuseClient = class {
     return traceUrl;
   }
 };
+// src/experiment/adapters.ts
+function autoevalsToLangfuseEvaluator(autoevalEvaluator, params) {
+  const langfuseEvaluator = async (langfuseEvaluatorParams) => {
+    var _a;
+    const score = await autoevalEvaluator({
+      ...params != null ? params : {},
+      input: langfuseEvaluatorParams.input,
+      output: langfuseEvaluatorParams.output,
+      expected: langfuseEvaluatorParams.expectedOutput
+    });
+    return {
+      name: score.name,
+      value: (_a = score.score) != null ? _a : 0,
+      metadata: score.metadata
+    };
+  };
+  return langfuseEvaluator;
+}
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
   ChatMessageType,
   ChatPromptClient,
   DatasetManager,
+  ExperimentManager,
   LangfuseClient,
   MediaManager,
   PromptManager,
   ScoreManager,
-  TextPromptClient
+  TextPromptClient,
+  autoevalsToLangfuseEvaluator
 });
 //# sourceMappingURL=index.cjs.map