@langfuse/client 4.0.0 → 4.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -33,16 +33,18 @@ __export(index_exports, {
33
33
  ChatMessageType: () => ChatMessageType,
34
34
  ChatPromptClient: () => ChatPromptClient,
35
35
  DatasetManager: () => DatasetManager,
36
+ ExperimentManager: () => ExperimentManager,
36
37
  LangfuseClient: () => LangfuseClient,
37
38
  MediaManager: () => MediaManager,
38
39
  PromptManager: () => PromptManager,
39
40
  ScoreManager: () => ScoreManager,
40
- TextPromptClient: () => TextPromptClient
41
+ TextPromptClient: () => TextPromptClient,
42
+ autoevalsToLangfuseEvaluator: () => autoevalsToLangfuseEvaluator
41
43
  });
42
44
  module.exports = __toCommonJS(index_exports);
43
45
 
44
46
  // src/LangfuseClient.ts
45
- var import_core5 = require("@langfuse/core");
47
+ var import_core6 = require("@langfuse/core");
46
48
 
47
49
  // src/dataset/index.ts
48
50
  var DatasetManager = class {
@@ -53,44 +55,91 @@ var DatasetManager = class {
53
55
  * @internal
54
56
  */
55
57
  constructor(params) {
56
- this.apiClient = params.apiClient;
58
+ this.langfuseClient = params.langfuseClient;
57
59
  }
58
60
  /**
59
- * Retrieves a dataset by name along with all its items.
61
+ * Retrieves a dataset by name with all its items and experiment functionality.
60
62
  *
61
- * This method automatically handles pagination to fetch all dataset items
62
- * and enhances each item with a `link` function for easy experiment tracking.
63
+ * This method fetches a dataset and all its associated items, with support
64
+ * for automatic pagination to handle large datasets efficiently. The returned
65
+ * dataset object includes enhanced functionality for linking items to traces
66
+ * and running experiments directly on the dataset.
63
67
  *
64
68
  * @param name - The name of the dataset to retrieve
65
- * @param options - Optional configuration for fetching
69
+ * @param options - Optional configuration for data fetching
66
70
  * @param options.fetchItemsPageSize - Number of items to fetch per page (default: 50)
71
+ * @returns Promise resolving to enhanced dataset with items, linking, and experiment capabilities
67
72
  *
68
- * @returns Promise that resolves to the dataset with enhanced items
73
+ * @example Basic dataset retrieval
74
+ * ```typescript
75
+ * const dataset = await langfuse.dataset.get("my-evaluation-dataset");
76
+ * console.log(`Dataset ${dataset.name} has ${dataset.items.length} items`);
69
77
  *
70
- * @example
78
+ * // Access dataset properties
79
+ * console.log(dataset.description);
80
+ * console.log(dataset.metadata);
81
+ * ```
82
+ *
83
+ * @example Working with dataset items
71
84
  * ```typescript
72
- * const dataset = await langfuse.dataset.get("my-dataset");
85
+ * const dataset = await langfuse.dataset.get("qa-dataset");
73
86
  *
74
87
  * for (const item of dataset.items) {
75
- * // Use the item data for your experiment
76
- * const result = await processItem(item.input);
77
- *
78
- * // Link the result to the dataset item
79
- * await item.link(
80
- * { otelSpan: currentSpan },
81
- * "experiment-run-1",
82
- * { description: "Testing new model" }
83
- * );
88
+ * console.log("Question:", item.input);
89
+ * console.log("Expected Answer:", item.expectedOutput);
90
+ *
91
+ * // Each item has a link function for connecting to traces
92
+ * // await item.link(span, "experiment-name");
84
93
  * }
85
94
  * ```
95
+ *
96
+ * @example Running experiments on datasets
97
+ * ```typescript
98
+ * const dataset = await langfuse.dataset.get("benchmark-dataset");
99
+ *
100
+ * const result = await dataset.runExperiment({
101
+ * name: "GPT-4 Benchmark",
102
+ * description: "Evaluating GPT-4 on our benchmark tasks",
103
+ * task: async ({ input }) => {
104
+ * const response = await openai.chat.completions.create({
105
+ * model: "gpt-4",
106
+ * messages: [{ role: "user", content: input }]
107
+ * });
108
+ * return response.choices[0].message.content;
109
+ * },
110
+ * evaluators: [
111
+ * async ({ output, expectedOutput }) => ({
112
+ * name: "exact_match",
113
+ * value: output === expectedOutput ? 1 : 0
114
+ * })
115
+ * ]
116
+ * });
117
+ *
118
+ * console.log(await result.prettyPrint());
119
+ * ```
120
+ *
121
+ * @example Handling large datasets
122
+ * ```typescript
123
+ * // For very large datasets, use smaller page sizes
124
+ * const largeDataset = await langfuse.dataset.get(
125
+ * "large-dataset",
126
+ * { fetchItemsPageSize: 100 }
127
+ * );
128
+ * ```
129
+ *
130
+ * @throws {Error} If the dataset does not exist or cannot be accessed
131
+ * @see {@link FetchedDataset} for the complete return type specification
132
+ * @see {@link RunExperimentOnDataset} for experiment execution details
133
+ * @public
134
+ * @since 4.0.0
86
135
  */
87
136
  async get(name, options) {
88
137
  var _a;
89
- const dataset = await this.apiClient.datasets.get(name);
138
+ const dataset = await this.langfuseClient.api.datasets.get(name);
90
139
  const items = [];
91
140
  let page = 1;
92
141
  while (true) {
93
- const itemsResponse = await this.apiClient.datasetItems.list({
142
+ const itemsResponse = await this.langfuseClient.api.datasetItems.list({
94
143
  datasetName: name,
95
144
  limit: (_a = options == null ? void 0 : options.fetchItemsPageSize) != null ? _a : 50,
96
145
  page
@@ -101,12 +150,20 @@ var DatasetManager = class {
101
150
  }
102
151
  page++;
103
152
  }
153
+ const itemsWithLinkMethod = items.map((item) => ({
154
+ ...item,
155
+ link: this.createDatasetItemLinkFunction(item)
156
+ }));
157
+ const runExperiment = (params) => {
158
+ return this.langfuseClient.experiment.run({
159
+ data: items,
160
+ ...params
161
+ });
162
+ };
104
163
  const returnDataset = {
105
164
  ...dataset,
106
- items: items.map((item) => ({
107
- ...item,
108
- link: this.createDatasetItemLinkFunction(item)
109
- }))
165
+ items: itemsWithLinkMethod,
166
+ runExperiment
110
167
  };
111
168
  return returnDataset;
112
169
  }
@@ -119,7 +176,7 @@ var DatasetManager = class {
119
176
  */
120
177
  createDatasetItemLinkFunction(item) {
121
178
  const linkFunction = async (obj, runName, runArgs) => {
122
- return await this.apiClient.datasetRunItems.create({
179
+ return await this.langfuseClient.api.datasetRunItems.create({
123
180
  runName,
124
181
  datasetItemId: item.id,
125
182
  traceId: obj.otelSpan.spanContext().traceId,
@@ -131,8 +188,498 @@ var DatasetManager = class {
131
188
  }
132
189
  };
133
190
 
134
- // src/media/index.ts
191
+ // src/experiment/ExperimentManager.ts
135
192
  var import_core = require("@langfuse/core");
193
+ var import_tracing = require("@langfuse/tracing");
194
+ var import_api = require("@opentelemetry/api");
195
+ var ExperimentManager = class {
196
+ /**
197
+ * Creates a new ExperimentManager instance.
198
+ *
199
+ * @param params - Configuration object
200
+ * @param params.langfuseClient - The Langfuse client instance for API communication
201
+ * @internal
202
+ */
203
+ constructor(params) {
204
+ this.langfuseClient = params.langfuseClient;
205
+ }
206
+ /**
207
+ * Gets the global logger instance for experiment-related logging.
208
+ *
209
+ * @returns The global logger instance
210
+ * @internal
211
+ */
212
+ get logger() {
213
+ return (0, import_core.getGlobalLogger)();
214
+ }
215
+ /**
216
+ * Executes an experiment by running a task on each data item and evaluating the results.
217
+ *
218
+ * This method orchestrates the complete experiment lifecycle:
219
+ * 1. Executes the task function on each data item with proper tracing
220
+ * 2. Runs item-level evaluators on each task output
221
+ * 3. Executes run-level evaluators on the complete result set
222
+ * 4. Links results to dataset runs (for Langfuse datasets)
223
+ * 5. Stores all scores and traces in Langfuse
224
+ *
225
+ * @param config - The experiment configuration
226
+ * @param config.name - Human-readable name for the experiment
227
+ * @param config.description - Optional description of the experiment's purpose
228
+ * @param config.metadata - Optional metadata to attach to the experiment run
229
+ * @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
230
+ * @param config.task - Function that processes each data item and returns output
231
+ * @param config.evaluators - Optional array of functions to evaluate each item's output
232
+ * @param config.runEvaluators - Optional array of functions to evaluate the entire run
233
+ * @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
234
+ *
235
+ * @returns Promise that resolves to experiment results including:
236
+ * - itemResults: Results for each processed data item
237
+ * - runEvaluations: Results from run-level evaluators
238
+ * - datasetRunId: ID of the dataset run (if using Langfuse datasets)
239
+ * - prettyPrint: Function to format and display results
240
+ *
241
+ * @throws {Error} When task execution fails and cannot be handled gracefully
242
+ * @throws {Error} When required evaluators fail critically
243
+ *
244
+ * @example Simple experiment
245
+ * ```typescript
246
+ * const result = await langfuse.experiment.run({
247
+ * name: "Translation Quality Test",
248
+ * data: [
249
+ * { input: "Hello world", expectedOutput: "Hola mundo" },
250
+ * { input: "Good morning", expectedOutput: "Buenos días" }
251
+ * ],
252
+ * task: async ({ input }) => translateText(input, 'es'),
253
+ * evaluators: [
254
+ * async ({ output, expectedOutput }) => ({
255
+ * name: "bleu_score",
256
+ * value: calculateBleuScore(output, expectedOutput)
257
+ * })
258
+ * ]
259
+ * });
260
+ * ```
261
+ *
262
+ * @example Experiment with concurrency control
263
+ * ```typescript
264
+ * const result = await langfuse.experiment.run({
265
+ * name: "Large Scale Evaluation",
266
+ * data: largeBatchOfItems,
267
+ * task: expensiveModelCall,
268
+ * maxConcurrency: 5, // Process max 5 items simultaneously
269
+ * evaluators: [myEvaluator],
270
+ * runEvaluators: [
271
+ * async ({ itemResults }) => ({
272
+ * name: "average_score",
273
+ * value: itemResults.reduce((acc, r) => acc + r.evaluations[0].value, 0) / itemResults.length
274
+ * })
275
+ * ]
276
+ * });
277
+ * ```
278
+ *
279
+ * @see {@link ExperimentParams} for detailed parameter documentation
280
+ * @see {@link ExperimentResult} for detailed return value documentation
281
+ * @see {@link Evaluator} for evaluator function specifications
282
+ * @see {@link RunEvaluator} for run evaluator function specifications
283
+ *
284
+ * @public
285
+ */
286
+ async run(config) {
287
+ const {
288
+ data,
289
+ evaluators,
290
+ task,
291
+ name,
292
+ description,
293
+ metadata,
294
+ maxConcurrency: batchSize = Infinity,
295
+ runEvaluators
296
+ } = config;
297
+ if (!this.isOtelRegistered()) {
298
+ this.logger.warn(
299
+ "OpenTelemetry has not been set up. Traces will not be sent to Langfuse.See our docs on how to set up OpenTelemetry: https://langfuse.com/docs/observability/sdk/typescript/setup#tracing-setup"
300
+ );
301
+ }
302
+ const itemResults = [];
303
+ for (let i = 0; i < data.length; i += batchSize) {
304
+ const batch = data.slice(i, i + batchSize);
305
+ const promises = batch.map(
306
+ async (item) => {
307
+ return this.runItem({
308
+ item,
309
+ evaluators,
310
+ task,
311
+ experimentName: name,
312
+ experimentDescription: description,
313
+ experimentMetadata: metadata
314
+ });
315
+ }
316
+ );
317
+ const results = await Promise.all(promises);
318
+ itemResults.push(...results);
319
+ }
320
+ const datasetRunId = itemResults.length > 0 ? itemResults[0].datasetRunId : void 0;
321
+ let datasetRunUrl = void 0;
322
+ if (datasetRunId && data.length > 0 && "datasetId" in data[0]) {
323
+ const datasetId = data[0].datasetId;
324
+ const projectUrl = (await this.langfuseClient.getTraceUrl("mock")).split(
325
+ "/traces"
326
+ )[0];
327
+ datasetRunUrl = `${projectUrl}/datasets/${datasetId}/runs/${datasetRunId}`;
328
+ }
329
+ let runEvaluations = [];
330
+ if (runEvaluators && (runEvaluators == null ? void 0 : runEvaluators.length) > 0) {
331
+ const promises = runEvaluators.map(async (runEvaluator) => {
332
+ return runEvaluator({ itemResults }).then((result) => {
333
+ return Array.isArray(result) ? result : [result];
334
+ }).catch((err) => {
335
+ this.logger.error("Run evaluator failed with error ", err);
336
+ throw err;
337
+ });
338
+ });
339
+ runEvaluations = (await Promise.allSettled(promises)).reduce(
340
+ (acc, settledPromise) => {
341
+ if (settledPromise.status === "fulfilled") {
342
+ acc.push(...settledPromise.value);
343
+ }
344
+ return acc;
345
+ },
346
+ []
347
+ );
348
+ if (datasetRunId) {
349
+ runEvaluations.forEach(
350
+ (runEval) => this.langfuseClient.score.create({ datasetRunId, ...runEval })
351
+ );
352
+ }
353
+ }
354
+ await this.langfuseClient.score.flush();
355
+ return {
356
+ itemResults,
357
+ datasetRunId,
358
+ runEvaluations,
359
+ prettyPrint: async (options) => {
360
+ var _a;
361
+ return await this.prettyPrintResults({
362
+ datasetRunUrl,
363
+ itemResults,
364
+ originalData: data,
365
+ runEvaluations,
366
+ name: config.name,
367
+ description: config.description,
368
+ includeItemResults: (_a = options == null ? void 0 : options.includeItemResults) != null ? _a : true
369
+ });
370
+ }
371
+ };
372
+ }
373
+ /**
374
+ * Executes the task and evaluators for a single data item.
375
+ *
376
+ * This method handles the complete processing pipeline for one data item:
377
+ * 1. Executes the task within a traced observation span
378
+ * 2. Links the result to a dataset run (if applicable)
379
+ * 3. Runs all item-level evaluators on the output
380
+ * 4. Stores evaluation scores in Langfuse
381
+ * 5. Handles errors gracefully by continuing with remaining evaluators
382
+ *
383
+ * @param params - Parameters for item execution
384
+ * @param params.experimentName - Name of the parent experiment
385
+ * @param params.experimentDescription - Description of the parent experiment
386
+ * @param params.experimentMetadata - Metadata for the parent experiment
387
+ * @param params.item - The data item to process
388
+ * @param params.task - The task function to execute
389
+ * @param params.evaluators - Optional evaluators to run on the output
390
+ *
391
+ * @returns Promise resolving to the item result with output, evaluations, and trace info
392
+ *
393
+ * @throws {Error} When task execution fails (propagated from task function)
394
+ *
395
+ * @internal
396
+ */
397
+ async runItem(params) {
398
+ const { item, evaluators = [], task } = params;
399
+ const { output, traceId } = await (0, import_tracing.startActiveObservation)(
400
+ "experiment-item-run",
401
+ async (span) => {
402
+ const output2 = await task(item);
403
+ span.update({
404
+ input: item.input,
405
+ output: output2
406
+ });
407
+ return { output: output2, traceId: span.traceId };
408
+ }
409
+ );
410
+ let datasetRunId = void 0;
411
+ if ("id" in item) {
412
+ await this.langfuseClient.api.datasetRunItems.create({
413
+ runName: params.experimentName,
414
+ runDescription: params.experimentDescription,
415
+ metadata: params.experimentMetadata,
416
+ datasetItemId: item.id,
417
+ traceId
418
+ }).then((result) => {
419
+ datasetRunId = result.datasetRunId;
420
+ }).catch(
421
+ (err) => this.logger.error("Linking dataset run item failed", err)
422
+ );
423
+ }
424
+ const evalPromises = evaluators.map(
425
+ async (evaluator) => {
426
+ const params2 = {
427
+ input: item.input,
428
+ expectedOutput: item.expectedOutput,
429
+ output
430
+ };
431
+ return evaluator(params2).then((result) => {
432
+ return Array.isArray(result) ? result : [result];
433
+ }).catch((err) => {
434
+ this.logger.error(
435
+ `Evaluator '${evaluator.name}' failed for params
436
+
437
+ ${JSON.stringify(params2)}
438
+
439
+ with error: ${err}`
440
+ );
441
+ throw err;
442
+ });
443
+ }
444
+ );
445
+ const evals = (await Promise.allSettled(evalPromises)).reduce(
446
+ (acc, promiseResult) => {
447
+ if (promiseResult.status === "fulfilled") {
448
+ acc.push(...promiseResult.value.flat());
449
+ }
450
+ return acc;
451
+ },
452
+ []
453
+ );
454
+ for (const ev of evals) {
455
+ this.langfuseClient.score.create({
456
+ traceId,
457
+ name: ev.name,
458
+ comment: ev.comment,
459
+ value: ev.value,
460
+ metadata: ev.metadata,
461
+ dataType: ev.dataType
462
+ });
463
+ }
464
+ return {
465
+ output,
466
+ evaluations: evals,
467
+ traceId,
468
+ datasetRunId
469
+ };
470
+ }
471
+ /**
472
+ * Formats experiment results into a human-readable string representation.
473
+ *
474
+ * Creates a comprehensive, nicely formatted summary of the experiment including:
475
+ * - Individual item results with inputs, outputs, expected values, and scores
476
+ * - Dataset item and trace links (when available)
477
+ * - Experiment overview with aggregate statistics
478
+ * - Average scores across all evaluations
479
+ * - Run-level evaluation results
480
+ * - Links to dataset runs in the Langfuse UI
481
+ *
482
+ * @param params - Formatting parameters
483
+ * @param params.datasetRunUrl - Optional URL to the dataset run in Langfuse UI
484
+ * @param params.itemResults - Results from processing each data item
485
+ * @param params.originalData - The original input data items
486
+ * @param params.runEvaluations - Results from run-level evaluators
487
+ * @param params.name - Name of the experiment
488
+ * @param params.description - Optional description of the experiment
489
+ * @param params.includeItemResults - Whether to include individual item details (default: true)
490
+ *
491
+ * @returns Promise resolving to formatted string representation
492
+ *
493
+ * @example Output format
494
+ * ```
495
+ * 1. Item 1:
496
+ * Input: What is the capital of France?
497
+ * Expected: Paris
498
+ * Actual: Paris
499
+ * Scores:
500
+ * • exact_match: 1.000
501
+ * • similarity: 0.95
502
+ * 💭 Very close match with expected output
503
+ *
504
+ * Dataset Item:
505
+ * https://cloud.langfuse.com/project/123/datasets/456/items/789
506
+ *
507
+ * Trace:
508
+ * https://cloud.langfuse.com/project/123/traces/abc123
509
+ *
510
+ * ──────────────────────────────────────────────────
511
+ * 📊 Translation Quality Test - Testing model accuracy
512
+ * 2 items
513
+ * Evaluations:
514
+ * • exact_match
515
+ * • similarity
516
+ *
517
+ * Average Scores:
518
+ * • exact_match: 0.850
519
+ * • similarity: 0.923
520
+ *
521
+ * Run Evaluations:
522
+ * • overall_quality: 0.887
523
+ * 💭 Good performance with room for improvement
524
+ *
525
+ * 🔗 Dataset Run:
526
+ * https://cloud.langfuse.com/project/123/datasets/456/runs/def456
527
+ * ```
528
+ *
529
+ * @internal
530
+ */
531
+ async prettyPrintResults(params) {
532
+ var _a, _b;
533
+ const {
534
+ itemResults,
535
+ originalData,
536
+ runEvaluations,
537
+ name,
538
+ description,
539
+ includeItemResults = true
540
+ } = params;
541
+ if (itemResults.length === 0) {
542
+ return "No experiment results to display.";
543
+ }
544
+ let output = "";
545
+ if (includeItemResults) {
546
+ for (let index = 0; index < itemResults.length; index++) {
547
+ const result = itemResults[index];
548
+ const originalItem = originalData[index];
549
+ output += `
550
+ ${index + 1}. Item ${index + 1}:
551
+ `;
552
+ if ((originalItem == null ? void 0 : originalItem.input) !== void 0) {
553
+ output += ` Input: ${this.formatValue(originalItem.input)}
554
+ `;
555
+ }
556
+ const expectedOutput = (_b = (_a = originalItem == null ? void 0 : originalItem.expectedOutput) != null ? _a : result.expectedOutput) != null ? _b : null;
557
+ output += ` Expected: ${expectedOutput !== null ? this.formatValue(expectedOutput) : "null"}
558
+ `;
559
+ output += ` Actual: ${this.formatValue(result.output)}
560
+ `;
561
+ if (result.evaluations.length > 0) {
562
+ output += ` Scores:
563
+ `;
564
+ result.evaluations.forEach((evaluation) => {
565
+ const score = typeof evaluation.value === "number" ? evaluation.value.toFixed(3) : evaluation.value;
566
+ output += ` \u2022 ${evaluation.name}: ${score}`;
567
+ if (evaluation.comment) {
568
+ output += `
569
+ \u{1F4AD} ${evaluation.comment}`;
570
+ }
571
+ output += "\n";
572
+ });
573
+ }
574
+ if (originalItem && "id" in originalItem && "datasetId" in originalItem) {
575
+ const projectUrl = (await this.langfuseClient.getTraceUrl("mock")).split("/traces")[0];
576
+ const datasetItemUrl = `${projectUrl}/datasets/${originalItem.datasetId}/items/${originalItem.id}`;
577
+ output += `
578
+ Dataset Item:
579
+ ${datasetItemUrl}
580
+ `;
581
+ }
582
+ if (result.traceId) {
583
+ const traceUrl = await this.langfuseClient.getTraceUrl(
584
+ result.traceId
585
+ );
586
+ output += `
587
+ Trace:
588
+ ${traceUrl}
589
+ `;
590
+ }
591
+ }
592
+ } else {
593
+ output += `Individual Results: Hidden (${itemResults.length} items)
594
+ `;
595
+ output += "\u{1F4A1} Call prettyPrint({ includeItemResults: true }) to view them\n";
596
+ }
597
+ const totalItems = itemResults.length;
598
+ const evaluationNames = new Set(
599
+ itemResults.flatMap((r) => r.evaluations.map((e) => e.name))
600
+ );
601
+ output += `
602
+ ${"\u2500".repeat(50)}
603
+ `;
604
+ output += `\u{1F4CA} ${name}`;
605
+ if (description) {
606
+ output += ` - ${description}`;
607
+ }
608
+ output += `
609
+ ${totalItems} items`;
610
+ if (evaluationNames.size > 0) {
611
+ output += `
612
+ Evaluations:`;
613
+ Array.from(evaluationNames).forEach((evalName) => {
614
+ output += `
615
+ \u2022 ${evalName}`;
616
+ });
617
+ output += "\n";
618
+ }
619
+ if (evaluationNames.size > 0) {
620
+ output += `
621
+ Average Scores:`;
622
+ for (const evalName of evaluationNames) {
623
+ const scores = itemResults.flatMap((r) => r.evaluations).filter((e) => e.name === evalName && typeof e.value === "number").map((e) => e.value);
624
+ if (scores.length > 0) {
625
+ const avg = scores.reduce((a, b) => a + b, 0) / scores.length;
626
+ output += `
627
+ \u2022 ${evalName}: ${avg.toFixed(3)}`;
628
+ }
629
+ }
630
+ output += "\n";
631
+ }
632
+ if (runEvaluations.length > 0) {
633
+ output += `
634
+ Run Evaluations:`;
635
+ runEvaluations.forEach((runEval) => {
636
+ const score = typeof runEval.value === "number" ? runEval.value.toFixed(3) : runEval.value;
637
+ output += `
638
+ \u2022 ${runEval.name}: ${score}`;
639
+ if (runEval.comment) {
640
+ output += `
641
+ \u{1F4AD} ${runEval.comment}`;
642
+ }
643
+ });
644
+ output += "\n";
645
+ }
646
+ if (params.datasetRunUrl) {
647
+ output += `
648
+ \u{1F517} Dataset Run:
649
+ ${params.datasetRunUrl}`;
650
+ }
651
+ return output;
652
+ }
653
+ /**
654
+ * Formats a value for display in pretty-printed output.
655
+ *
656
+ * Handles different value types appropriately:
657
+ * - Strings: Truncates long strings to 50 characters with "..."
658
+ * - Objects/Arrays: Converts to JSON string representation
659
+ * - Primitives: Uses toString() representation
660
+ *
661
+ * @param value - The value to format
662
+ * @returns Formatted string representation suitable for display
663
+ *
664
+ * @internal
665
+ */
666
+ formatValue(value) {
667
+ if (typeof value === "string") {
668
+ return value.length > 50 ? `${value.substring(0, 47)}...` : value;
669
+ }
670
+ return JSON.stringify(value);
671
+ }
672
+ isOtelRegistered() {
673
+ let tracerProvider = import_api.trace.getTracerProvider();
674
+ if (tracerProvider instanceof import_api.ProxyTracerProvider) {
675
+ tracerProvider = tracerProvider.getDelegate();
676
+ }
677
+ return tracerProvider.constructor.name !== "NoopTracerProvider";
678
+ }
679
+ };
680
+
681
+ // src/media/index.ts
682
+ var import_core2 = require("@langfuse/core");
136
683
  var MediaManager = class _MediaManager {
137
684
  /**
138
685
  * Creates a new MediaManager instance.
@@ -210,14 +757,14 @@ var MediaManager = class _MediaManager {
210
757
  const uint8Content = new Uint8Array(
211
758
  await mediaContent.arrayBuffer()
212
759
  );
213
- const base64MediaContent = (0, import_core.bytesToBase64)(uint8Content);
760
+ const base64MediaContent = (0, import_core2.bytesToBase64)(uint8Content);
214
761
  const base64DataUri = `data:${mediaData.contentType};base64,${base64MediaContent}`;
215
762
  referenceStringToMediaContentMap.set(
216
763
  referenceString,
217
764
  base64DataUri
218
765
  );
219
766
  } catch (error) {
220
- (0, import_core.getGlobalLogger)().warn(
767
+ (0, import_core2.getGlobalLogger)().warn(
221
768
  "Error fetching media content for reference string",
222
769
  referenceString,
223
770
  error
@@ -293,10 +840,10 @@ var MediaManager = class _MediaManager {
293
840
  };
294
841
 
295
842
  // src/prompt/promptManager.ts
296
- var import_core3 = require("@langfuse/core");
843
+ var import_core4 = require("@langfuse/core");
297
844
 
298
845
  // src/prompt/promptCache.ts
299
- var import_core2 = require("@langfuse/core");
846
+ var import_core3 = require("@langfuse/core");
300
847
  var DEFAULT_PROMPT_CACHE_TTL_SECONDS = 60;
301
848
  var LangfusePromptCacheItem = class {
302
849
  constructor(value, ttlSeconds) {
@@ -348,7 +895,7 @@ var LangfusePromptCache = class {
348
895
  return this._refreshingKeys.has(key);
349
896
  }
350
897
  invalidate(promptName) {
351
- (0, import_core2.getGlobalLogger)().debug(
898
+ (0, import_core3.getGlobalLogger)().debug(
352
899
  "Invalidating cache keys for",
353
900
  promptName,
354
901
  this._cache.keys()
@@ -692,7 +1239,7 @@ var PromptManager = class {
692
1239
  this.cache = new LangfusePromptCache();
693
1240
  }
694
1241
  get logger() {
695
- return (0, import_core3.getGlobalLogger)();
1242
+ return (0, import_core4.getGlobalLogger)();
696
1243
  }
697
1244
  /**
698
1245
  * Creates a new prompt in Langfuse.
@@ -919,8 +1466,8 @@ var PromptManager = class {
919
1466
  };
920
1467
 
921
1468
  // src/score/index.ts
922
- var import_core4 = require("@langfuse/core");
923
- var import_api = require("@opentelemetry/api");
1469
+ var import_core5 = require("@langfuse/core");
1470
+ var import_api2 = require("@opentelemetry/api");
924
1471
  var MAX_QUEUE_SIZE = 1e5;
925
1472
  var MAX_BATCH_SIZE = 100;
926
1473
  var ScoreManager = class {
@@ -935,13 +1482,13 @@ var ScoreManager = class {
935
1482
  this.flushPromise = null;
936
1483
  this.flushTimer = null;
937
1484
  this.apiClient = params.apiClient;
938
- const envFlushAtCount = (0, import_core4.getEnv)("LANGFUSE_FLUSH_AT");
939
- const envFlushIntervalSeconds = (0, import_core4.getEnv)("LANGFUSE_FLUSH_INTERVAL");
1485
+ const envFlushAtCount = (0, import_core5.getEnv)("LANGFUSE_FLUSH_AT");
1486
+ const envFlushIntervalSeconds = (0, import_core5.getEnv)("LANGFUSE_FLUSH_INTERVAL");
940
1487
  this.flushAtCount = envFlushAtCount ? Number(envFlushAtCount) : 10;
941
1488
  this.flushIntervalSeconds = envFlushIntervalSeconds ? Number(envFlushIntervalSeconds) : 1;
942
1489
  }
943
1490
  get logger() {
944
- return (0, import_core4.getGlobalLogger)();
1491
+ return (0, import_core5.getGlobalLogger)();
945
1492
  }
946
1493
  /**
947
1494
  * Creates a new score event and adds it to the processing queue.
@@ -966,11 +1513,11 @@ var ScoreManager = class {
966
1513
  var _a, _b;
967
1514
  const scoreData = {
968
1515
  ...data,
969
- id: (_a = data.id) != null ? _a : (0, import_core4.generateUUID)(),
970
- environment: (_b = data.environment) != null ? _b : (0, import_core4.getEnv)("LANGFUSE_TRACING_ENVIRONMENT")
1516
+ id: (_a = data.id) != null ? _a : (0, import_core5.generateUUID)(),
1517
+ environment: (_b = data.environment) != null ? _b : (0, import_core5.getEnv)("LANGFUSE_TRACING_ENVIRONMENT")
971
1518
  };
972
1519
  const scoreIngestionEvent = {
973
- id: (0, import_core4.generateUUID)(),
1520
+ id: (0, import_core5.generateUUID)(),
974
1521
  type: "score-create",
975
1522
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
976
1523
  body: scoreData
@@ -982,10 +1529,14 @@ var ScoreManager = class {
982
1529
  return;
983
1530
  }
984
1531
  this.eventQueue.push(scoreIngestionEvent);
1532
+ this.logger.debug(
1533
+ "Added score event to queue:\n",
1534
+ JSON.stringify(scoreIngestionEvent, null, 2)
1535
+ );
985
1536
  if (this.eventQueue.length >= this.flushAtCount) {
986
1537
  this.flushPromise = this.flush();
987
1538
  } else if (!this.flushTimer) {
988
- this.flushTimer = (0, import_core4.safeSetTimeout)(() => {
1539
+ this.flushTimer = (0, import_core5.safeSetTimeout)(() => {
989
1540
  this.flushPromise = this.flush();
990
1541
  }, this.flushIntervalSeconds * 1e3);
991
1542
  }
@@ -1068,7 +1619,7 @@ var ScoreManager = class {
1068
1619
  * ```
1069
1620
  */
1070
1621
  activeObservation(data) {
1071
- const currentOtelSpan = import_api.trace.getActiveSpan();
1622
+ const currentOtelSpan = import_api2.trace.getActiveSpan();
1072
1623
  if (!currentOtelSpan) {
1073
1624
  this.logger.warn("No active span in context to score.");
1074
1625
  return;
@@ -1104,7 +1655,7 @@ var ScoreManager = class {
1104
1655
  * ```
1105
1656
  */
1106
1657
  activeTrace(data) {
1107
- const currentOtelSpan = import_api.trace.getActiveSpan();
1658
+ const currentOtelSpan = import_api2.trace.getActiveSpan();
1108
1659
  if (!currentOtelSpan) {
1109
1660
  this.logger.warn("No active span in context to score trace.");
1110
1661
  return;
@@ -1204,10 +1755,10 @@ var LangfuseClient = class {
1204
1755
  constructor(params) {
1205
1756
  this.projectId = null;
1206
1757
  var _a, _b, _c, _d, _e, _f, _g;
1207
- const logger = (0, import_core5.getGlobalLogger)();
1208
- const publicKey = (_a = params == null ? void 0 : params.publicKey) != null ? _a : (0, import_core5.getEnv)("LANGFUSE_PUBLIC_KEY");
1209
- const secretKey = (_b = params == null ? void 0 : params.secretKey) != null ? _b : (0, import_core5.getEnv)("LANGFUSE_SECRET_KEY");
1210
- this.baseUrl = (_e = (_d = (_c = params == null ? void 0 : params.baseUrl) != null ? _c : (0, import_core5.getEnv)("LANGFUSE_BASE_URL")) != null ? _d : (0, import_core5.getEnv)("LANGFUSE_BASEURL")) != null ? _e : (
1758
+ const logger = (0, import_core6.getGlobalLogger)();
1759
+ const publicKey = (_a = params == null ? void 0 : params.publicKey) != null ? _a : (0, import_core6.getEnv)("LANGFUSE_PUBLIC_KEY");
1760
+ const secretKey = (_b = params == null ? void 0 : params.secretKey) != null ? _b : (0, import_core6.getEnv)("LANGFUSE_SECRET_KEY");
1761
+ this.baseUrl = (_e = (_d = (_c = params == null ? void 0 : params.baseUrl) != null ? _c : (0, import_core6.getEnv)("LANGFUSE_BASE_URL")) != null ? _d : (0, import_core6.getEnv)("LANGFUSE_BASEURL")) != null ? _e : (
1211
1762
  // legacy v2
1212
1763
  "https://cloud.langfuse.com"
1213
1764
  );
@@ -1221,13 +1772,13 @@ var LangfuseClient = class {
1221
1772
  "No secret key provided in constructor or as LANGFUSE_SECRET_KEY env var. Client operations will fail."
1222
1773
  );
1223
1774
  }
1224
- const timeoutSeconds = (_g = params == null ? void 0 : params.timeout) != null ? _g : Number((_f = (0, import_core5.getEnv)("LANGFUSE_TIMEOUT")) != null ? _f : 5);
1225
- this.api = new import_core5.LangfuseAPIClient({
1775
+ const timeoutSeconds = (_g = params == null ? void 0 : params.timeout) != null ? _g : Number((_f = (0, import_core6.getEnv)("LANGFUSE_TIMEOUT")) != null ? _f : 5);
1776
+ this.api = new import_core6.LangfuseAPIClient({
1226
1777
  baseUrl: this.baseUrl,
1227
1778
  username: publicKey,
1228
1779
  password: secretKey,
1229
1780
  xLangfusePublicKey: publicKey,
1230
- xLangfuseSdkVersion: import_core5.LANGFUSE_SDK_VERSION,
1781
+ xLangfuseSdkVersion: import_core6.LANGFUSE_SDK_VERSION,
1231
1782
  xLangfuseSdkName: "javascript",
1232
1783
  environment: "",
1233
1784
  // noop as baseUrl is set
@@ -1239,9 +1790,10 @@ var LangfuseClient = class {
1239
1790
  timeoutSeconds
1240
1791
  });
1241
1792
  this.prompt = new PromptManager({ apiClient: this.api });
1242
- this.dataset = new DatasetManager({ apiClient: this.api });
1793
+ this.dataset = new DatasetManager({ langfuseClient: this });
1243
1794
  this.score = new ScoreManager({ apiClient: this.api });
1244
1795
  this.media = new MediaManager({ apiClient: this.api });
1796
+ this.experiment = new ExperimentManager({ langfuseClient: this });
1245
1797
  this.getPrompt = this.prompt.get.bind(this.prompt);
1246
1798
  this.createPrompt = this.prompt.create.bind(this.prompt);
1247
1799
  this.updatePrompt = this.prompt.update.bind(this.prompt);
@@ -1316,15 +1868,36 @@ var LangfuseClient = class {
1316
1868
  return traceUrl;
1317
1869
  }
1318
1870
  };
1871
+
1872
+ // src/experiment/adapters.ts
1873
+ function autoevalsToLangfuseEvaluator(autoevalEvaluator, params) {
1874
+ const langfuseEvaluator = async (langfuseEvaluatorParams) => {
1875
+ var _a;
1876
+ const score = await autoevalEvaluator({
1877
+ ...params != null ? params : {},
1878
+ input: langfuseEvaluatorParams.input,
1879
+ output: langfuseEvaluatorParams.output,
1880
+ expected: langfuseEvaluatorParams.expectedOutput
1881
+ });
1882
+ return {
1883
+ name: score.name,
1884
+ value: (_a = score.score) != null ? _a : 0,
1885
+ metadata: score.metadata
1886
+ };
1887
+ };
1888
+ return langfuseEvaluator;
1889
+ }
1319
1890
  // Annotate the CommonJS export names for ESM import in node:
1320
1891
  0 && (module.exports = {
1321
1892
  ChatMessageType,
1322
1893
  ChatPromptClient,
1323
1894
  DatasetManager,
1895
+ ExperimentManager,
1324
1896
  LangfuseClient,
1325
1897
  MediaManager,
1326
1898
  PromptManager,
1327
1899
  ScoreManager,
1328
- TextPromptClient
1900
+ TextPromptClient,
1901
+ autoevalsToLangfuseEvaluator
1329
1902
  });
1330
1903
  //# sourceMappingURL=index.cjs.map