@langfuse/client 4.0.0 → 4.1.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -33,16 +33,18 @@ __export(index_exports, {
33
33
  ChatMessageType: () => ChatMessageType,
34
34
  ChatPromptClient: () => ChatPromptClient,
35
35
  DatasetManager: () => DatasetManager,
36
+ ExperimentManager: () => ExperimentManager,
36
37
  LangfuseClient: () => LangfuseClient,
37
38
  MediaManager: () => MediaManager,
38
39
  PromptManager: () => PromptManager,
39
40
  ScoreManager: () => ScoreManager,
40
- TextPromptClient: () => TextPromptClient
41
+ TextPromptClient: () => TextPromptClient,
42
+ autoevalsToLangfuseEvaluator: () => autoevalsToLangfuseEvaluator
41
43
  });
42
44
  module.exports = __toCommonJS(index_exports);
43
45
 
44
46
  // src/LangfuseClient.ts
45
- var import_core5 = require("@langfuse/core");
47
+ var import_core6 = require("@langfuse/core");
46
48
 
47
49
  // src/dataset/index.ts
48
50
  var DatasetManager = class {
@@ -53,44 +55,91 @@ var DatasetManager = class {
53
55
  * @internal
54
56
  */
55
57
  constructor(params) {
56
- this.apiClient = params.apiClient;
58
+ this.langfuseClient = params.langfuseClient;
57
59
  }
58
60
  /**
59
- * Retrieves a dataset by name along with all its items.
61
+ * Retrieves a dataset by name with all its items and experiment functionality.
60
62
  *
61
- * This method automatically handles pagination to fetch all dataset items
62
- * and enhances each item with a `link` function for easy experiment tracking.
63
+ * This method fetches a dataset and all its associated items, with support
64
+ * for automatic pagination to handle large datasets efficiently. The returned
65
+ * dataset object includes enhanced functionality for linking items to traces
66
+ * and running experiments directly on the dataset.
63
67
  *
64
68
  * @param name - The name of the dataset to retrieve
65
- * @param options - Optional configuration for fetching
69
+ * @param options - Optional configuration for data fetching
66
70
  * @param options.fetchItemsPageSize - Number of items to fetch per page (default: 50)
71
+ * @returns Promise resolving to enhanced dataset with items, linking, and experiment capabilities
67
72
  *
68
- * @returns Promise that resolves to the dataset with enhanced items
73
+ * @example Basic dataset retrieval
74
+ * ```typescript
75
+ * const dataset = await langfuse.dataset.get("my-evaluation-dataset");
76
+ * console.log(`Dataset ${dataset.name} has ${dataset.items.length} items`);
69
77
  *
70
- * @example
78
+ * // Access dataset properties
79
+ * console.log(dataset.description);
80
+ * console.log(dataset.metadata);
81
+ * ```
82
+ *
83
+ * @example Working with dataset items
71
84
  * ```typescript
72
- * const dataset = await langfuse.dataset.get("my-dataset");
85
+ * const dataset = await langfuse.dataset.get("qa-dataset");
73
86
  *
74
87
  * for (const item of dataset.items) {
75
- * // Use the item data for your experiment
76
- * const result = await processItem(item.input);
77
- *
78
- * // Link the result to the dataset item
79
- * await item.link(
80
- * { otelSpan: currentSpan },
81
- * "experiment-run-1",
82
- * { description: "Testing new model" }
83
- * );
88
+ * console.log("Question:", item.input);
89
+ * console.log("Expected Answer:", item.expectedOutput);
90
+ *
91
+ * // Each item has a link function for connecting to traces
92
+ * // await item.link(span, "experiment-name");
84
93
  * }
85
94
  * ```
95
+ *
96
+ * @example Running experiments on datasets
97
+ * ```typescript
98
+ * const dataset = await langfuse.dataset.get("benchmark-dataset");
99
+ *
100
+ * const result = await dataset.runExperiment({
101
+ * name: "GPT-4 Benchmark",
102
+ * description: "Evaluating GPT-4 on our benchmark tasks",
103
+ * task: async ({ input }) => {
104
+ * const response = await openai.chat.completions.create({
105
+ * model: "gpt-4",
106
+ * messages: [{ role: "user", content: input }]
107
+ * });
108
+ * return response.choices[0].message.content;
109
+ * },
110
+ * evaluators: [
111
+ * async ({ output, expectedOutput }) => ({
112
+ * name: "exact_match",
113
+ * value: output === expectedOutput ? 1 : 0
114
+ * })
115
+ * ]
116
+ * });
117
+ *
118
+ * console.log(await result.prettyPrint());
119
+ * ```
120
+ *
121
+ * @example Handling large datasets
122
+ * ```typescript
123
+ * // For very large datasets, use smaller page sizes
124
+ * const largeDataset = await langfuse.dataset.get(
125
+ * "large-dataset",
126
+ * { fetchItemsPageSize: 100 }
127
+ * );
128
+ * ```
129
+ *
130
+ * @throws {Error} If the dataset does not exist or cannot be accessed
131
+ * @see {@link FetchedDataset} for the complete return type specification
132
+ * @see {@link RunExperimentOnDataset} for experiment execution details
133
+ * @public
134
+ * @since 4.0.0
86
135
  */
87
136
  async get(name, options) {
88
137
  var _a;
89
- const dataset = await this.apiClient.datasets.get(name);
138
+ const dataset = await this.langfuseClient.api.datasets.get(name);
90
139
  const items = [];
91
140
  let page = 1;
92
141
  while (true) {
93
- const itemsResponse = await this.apiClient.datasetItems.list({
142
+ const itemsResponse = await this.langfuseClient.api.datasetItems.list({
94
143
  datasetName: name,
95
144
  limit: (_a = options == null ? void 0 : options.fetchItemsPageSize) != null ? _a : 50,
96
145
  page
@@ -101,12 +150,20 @@ var DatasetManager = class {
101
150
  }
102
151
  page++;
103
152
  }
153
+ const itemsWithLinkMethod = items.map((item) => ({
154
+ ...item,
155
+ link: this.createDatasetItemLinkFunction(item)
156
+ }));
157
+ const runExperiment = (params) => {
158
+ return this.langfuseClient.experiment.run({
159
+ data: items,
160
+ ...params
161
+ });
162
+ };
104
163
  const returnDataset = {
105
164
  ...dataset,
106
- items: items.map((item) => ({
107
- ...item,
108
- link: this.createDatasetItemLinkFunction(item)
109
- }))
165
+ items: itemsWithLinkMethod,
166
+ runExperiment
110
167
  };
111
168
  return returnDataset;
112
169
  }
@@ -119,7 +176,7 @@ var DatasetManager = class {
119
176
  */
120
177
  createDatasetItemLinkFunction(item) {
121
178
  const linkFunction = async (obj, runName, runArgs) => {
122
- return await this.apiClient.datasetRunItems.create({
179
+ return await this.langfuseClient.api.datasetRunItems.create({
123
180
  runName,
124
181
  datasetItemId: item.id,
125
182
  traceId: obj.otelSpan.spanContext().traceId,
@@ -131,8 +188,508 @@ var DatasetManager = class {
131
188
  }
132
189
  };
133
190
 
134
- // src/media/index.ts
191
+ // src/experiment/ExperimentManager.ts
135
192
  var import_core = require("@langfuse/core");
193
+ var import_tracing = require("@langfuse/tracing");
194
+ var import_api = require("@opentelemetry/api");
195
+ var ExperimentManager = class {
196
+ /**
197
+ * Creates a new ExperimentManager instance.
198
+ *
199
+ * @param params - Configuration object
200
+ * @param params.langfuseClient - The Langfuse client instance for API communication
201
+ * @internal
202
+ */
203
+ constructor(params) {
204
+ this.langfuseClient = params.langfuseClient;
205
+ }
206
+ /**
207
+ * Gets the global logger instance for experiment-related logging.
208
+ *
209
+ * @returns The global logger instance
210
+ * @internal
211
+ */
212
+ get logger() {
213
+ return (0, import_core.getGlobalLogger)();
214
+ }
215
+ /**
216
+ * Executes an experiment by running a task on each data item and evaluating the results.
217
+ *
218
+ * This method orchestrates the complete experiment lifecycle:
219
+ * 1. Executes the task function on each data item with proper tracing
220
+ * 2. Runs item-level evaluators on each task output
221
+ * 3. Executes run-level evaluators on the complete result set
222
+ * 4. Links results to dataset runs (for Langfuse datasets)
223
+ * 5. Stores all scores and traces in Langfuse
224
+ *
225
+ * @param config - The experiment configuration
226
+ * @param config.name - Human-readable name for the experiment
227
+ * @param config.description - Optional description of the experiment's purpose
228
+ * @param config.metadata - Optional metadata to attach to the experiment run
229
+ * @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
230
+ * @param config.task - Function that processes each data item and returns output
231
+ * @param config.evaluators - Optional array of functions to evaluate each item's output
232
+ * @param config.runEvaluators - Optional array of functions to evaluate the entire run
233
+ * @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
234
+ *
235
+ * @returns Promise that resolves to experiment results including:
236
+ * - itemResults: Results for each processed data item
237
+ * - runEvaluations: Results from run-level evaluators
238
+ * - datasetRunId: ID of the dataset run (if using Langfuse datasets)
239
+ * - prettyPrint: Function to format and display results
240
+ *
241
+ * @throws {Error} When task execution fails and cannot be handled gracefully
242
+ * @throws {Error} When required evaluators fail critically
243
+ *
244
+ * @example Simple experiment
245
+ * ```typescript
246
+ * const result = await langfuse.experiment.run({
247
+ * name: "Translation Quality Test",
248
+ * data: [
249
+ * { input: "Hello world", expectedOutput: "Hola mundo" },
250
+ * { input: "Good morning", expectedOutput: "Buenos días" }
251
+ * ],
252
+ * task: async ({ input }) => translateText(input, 'es'),
253
+ * evaluators: [
254
+ * async ({ output, expectedOutput }) => ({
255
+ * name: "bleu_score",
256
+ * value: calculateBleuScore(output, expectedOutput)
257
+ * })
258
+ * ]
259
+ * });
260
+ * ```
261
+ *
262
+ * @example Experiment with concurrency control
263
+ * ```typescript
264
+ * const result = await langfuse.experiment.run({
265
+ * name: "Large Scale Evaluation",
266
+ * data: largeBatchOfItems,
267
+ * task: expensiveModelCall,
268
+ * maxConcurrency: 5, // Process max 5 items simultaneously
269
+ * evaluators: [myEvaluator],
270
+ * runEvaluators: [
271
+ * async ({ itemResults }) => ({
272
+ * name: "average_score",
273
+ * value: itemResults.reduce((acc, r) => acc + r.evaluations[0].value, 0) / itemResults.length
274
+ * })
275
+ * ]
276
+ * });
277
+ * ```
278
+ *
279
+ * @see {@link ExperimentParams} for detailed parameter documentation
280
+ * @see {@link ExperimentResult} for detailed return value documentation
281
+ * @see {@link Evaluator} for evaluator function specifications
282
+ * @see {@link RunEvaluator} for run evaluator function specifications
283
+ *
284
+ * @public
285
+ */
286
+ async run(config) {
287
+ const {
288
+ data,
289
+ evaluators,
290
+ task,
291
+ name,
292
+ description,
293
+ metadata,
294
+ maxConcurrency: batchSize = Infinity,
295
+ runEvaluators
296
+ } = config;
297
+ if (!this.isOtelRegistered()) {
298
+ this.logger.warn(
299
+ "OpenTelemetry has not been set up. Traces will not be sent to Langfuse.See our docs on how to set up OpenTelemetry: https://langfuse.com/docs/observability/sdk/typescript/setup#tracing-setup"
300
+ );
301
+ }
302
+ const itemResults = [];
303
+ for (let i = 0; i < data.length; i += batchSize) {
304
+ const batch = data.slice(i, i + batchSize);
305
+ const promises = batch.map(async (item) => {
306
+ return this.runItem({
307
+ item,
308
+ evaluators,
309
+ task,
310
+ experimentName: name,
311
+ experimentDescription: description,
312
+ experimentMetadata: metadata
313
+ });
314
+ });
315
+ const results = await Promise.all(promises);
316
+ itemResults.push(...results);
317
+ }
318
+ const datasetRunId = itemResults.length > 0 ? itemResults[0].datasetRunId : void 0;
319
+ let datasetRunUrl = void 0;
320
+ if (datasetRunId && data.length > 0 && "datasetId" in data[0]) {
321
+ const datasetId = data[0].datasetId;
322
+ const projectUrl = (await this.langfuseClient.getTraceUrl("mock")).split(
323
+ "/traces"
324
+ )[0];
325
+ datasetRunUrl = `${projectUrl}/datasets/${datasetId}/runs/${datasetRunId}`;
326
+ }
327
+ let runEvaluations = [];
328
+ if (runEvaluators && (runEvaluators == null ? void 0 : runEvaluators.length) > 0) {
329
+ const promises = runEvaluators.map(async (runEvaluator) => {
330
+ return runEvaluator({ itemResults }).then((result) => {
331
+ return Array.isArray(result) ? result : [result];
332
+ }).catch((err) => {
333
+ this.logger.error("Run evaluator failed with error ", err);
334
+ throw err;
335
+ });
336
+ });
337
+ runEvaluations = (await Promise.allSettled(promises)).reduce(
338
+ (acc, settledPromise) => {
339
+ if (settledPromise.status === "fulfilled") {
340
+ acc.push(...settledPromise.value);
341
+ }
342
+ return acc;
343
+ },
344
+ []
345
+ );
346
+ if (datasetRunId) {
347
+ runEvaluations.forEach(
348
+ (runEval) => this.langfuseClient.score.create({ datasetRunId, ...runEval })
349
+ );
350
+ }
351
+ }
352
+ await this.langfuseClient.score.flush();
353
+ return {
354
+ itemResults,
355
+ datasetRunId,
356
+ datasetRunUrl,
357
+ runEvaluations,
358
+ prettyPrint: async (options) => {
359
+ var _a;
360
+ return await this.prettyPrintResults({
361
+ datasetRunUrl,
362
+ itemResults,
363
+ originalData: data,
364
+ runEvaluations,
365
+ name: config.name,
366
+ description: config.description,
367
+ includeItemResults: (_a = options == null ? void 0 : options.includeItemResults) != null ? _a : false
368
+ });
369
+ }
370
+ };
371
+ }
372
+ /**
373
+ * Executes the task and evaluators for a single data item.
374
+ *
375
+ * This method handles the complete processing pipeline for one data item:
376
+ * 1. Executes the task within a traced observation span
377
+ * 2. Links the result to a dataset run (if applicable)
378
+ * 3. Runs all item-level evaluators on the output
379
+ * 4. Stores evaluation scores in Langfuse
380
+ * 5. Handles errors gracefully by continuing with remaining evaluators
381
+ *
382
+ * @param params - Parameters for item execution
383
+ * @param params.experimentName - Name of the parent experiment
384
+ * @param params.experimentDescription - Description of the parent experiment
385
+ * @param params.experimentMetadata - Metadata for the parent experiment
386
+ * @param params.item - The data item to process
387
+ * @param params.task - The task function to execute
388
+ * @param params.evaluators - Optional evaluators to run on the output
389
+ *
390
+ * @returns Promise resolving to the item result with output, evaluations, and trace info
391
+ *
392
+ * @throws {Error} When task execution fails (propagated from task function)
393
+ *
394
+ * @internal
395
+ */
396
+ async runItem(params) {
397
+ const { item, evaluators = [], task, experimentMetadata = {} } = params;
398
+ const { output, traceId } = await (0, import_tracing.startActiveObservation)(
399
+ "experiment-item-run",
400
+ async (span) => {
401
+ var _a;
402
+ const output2 = await task(item);
403
+ span.update({
404
+ input: item.input,
405
+ output: output2,
406
+ metadata: {
407
+ experimentName: params.experimentName,
408
+ ...experimentMetadata,
409
+ ...(_a = item.metadata) != null ? _a : {},
410
+ ..."id" in item && "datasetId" in item ? {
411
+ datasetId: item["datasetId"],
412
+ datasetItemId: item["id"]
413
+ } : {}
414
+ }
415
+ });
416
+ return { output: output2, traceId: span.traceId };
417
+ }
418
+ );
419
+ let datasetRunId = void 0;
420
+ if ("id" in item) {
421
+ await this.langfuseClient.api.datasetRunItems.create({
422
+ runName: params.experimentName,
423
+ runDescription: params.experimentDescription,
424
+ metadata: params.experimentMetadata,
425
+ datasetItemId: item.id,
426
+ traceId
427
+ }).then((result) => {
428
+ datasetRunId = result.datasetRunId;
429
+ }).catch(
430
+ (err) => this.logger.error("Linking dataset run item failed", err)
431
+ );
432
+ }
433
+ const evalPromises = evaluators.map(
434
+ async (evaluator) => {
435
+ const params2 = {
436
+ input: item.input,
437
+ expectedOutput: item.expectedOutput,
438
+ output
439
+ };
440
+ return evaluator(params2).then((result) => {
441
+ return Array.isArray(result) ? result : [result];
442
+ }).catch((err) => {
443
+ this.logger.error(
444
+ `Evaluator '${evaluator.name}' failed for params
445
+
446
+ ${JSON.stringify(params2)}
447
+
448
+ with error: ${err}`
449
+ );
450
+ throw err;
451
+ });
452
+ }
453
+ );
454
+ const evals = (await Promise.allSettled(evalPromises)).reduce(
455
+ (acc, promiseResult) => {
456
+ if (promiseResult.status === "fulfilled") {
457
+ acc.push(...promiseResult.value.flat());
458
+ }
459
+ return acc;
460
+ },
461
+ []
462
+ );
463
+ for (const ev of evals) {
464
+ this.langfuseClient.score.create({
465
+ traceId,
466
+ name: ev.name,
467
+ comment: ev.comment,
468
+ value: ev.value,
469
+ metadata: ev.metadata,
470
+ dataType: ev.dataType
471
+ });
472
+ }
473
+ return {
474
+ output,
475
+ evaluations: evals,
476
+ traceId,
477
+ datasetRunId,
478
+ item
479
+ };
480
+ }
481
+ /**
482
+ * Formats experiment results into a human-readable string representation.
483
+ *
484
+ * Creates a comprehensive, nicely formatted summary of the experiment including:
485
+ * - Individual item results with inputs, outputs, expected values, and scores
486
+ * - Dataset item and trace links (when available)
487
+ * - Experiment overview with aggregate statistics
488
+ * - Average scores across all evaluations
489
+ * - Run-level evaluation results
490
+ * - Links to dataset runs in the Langfuse UI
491
+ *
492
+ * @param params - Formatting parameters
493
+ * @param params.datasetRunUrl - Optional URL to the dataset run in Langfuse UI
494
+ * @param params.itemResults - Results from processing each data item
495
+ * @param params.originalData - The original input data items
496
+ * @param params.runEvaluations - Results from run-level evaluators
497
+ * @param params.name - Name of the experiment
498
+ * @param params.description - Optional description of the experiment
499
+ * @param params.includeItemResults - Whether to include individual item details (default: false)
500
+ *
501
+ * @returns Promise resolving to formatted string representation
502
+ *
503
+ * @example Output format
504
+ * ```
505
+ * 1. Item 1:
506
+ * Input: What is the capital of France?
507
+ * Expected: Paris
508
+ * Actual: Paris
509
+ * Scores:
510
+ * • exact_match: 1.000
511
+ * • similarity: 0.95
512
+ * 💭 Very close match with expected output
513
+ *
514
+ * Dataset Item:
515
+ * https://cloud.langfuse.com/project/123/datasets/456/items/789
516
+ *
517
+ * Trace:
518
+ * https://cloud.langfuse.com/project/123/traces/abc123
519
+ *
520
+ * ──────────────────────────────────────────────────
521
+ * 📊 Translation Quality Test - Testing model accuracy
522
+ * 2 items
523
+ * Evaluations:
524
+ * • exact_match
525
+ * • similarity
526
+ *
527
+ * Average Scores:
528
+ * • exact_match: 0.850
529
+ * • similarity: 0.923
530
+ *
531
+ * Run Evaluations:
532
+ * • overall_quality: 0.887
533
+ * 💭 Good performance with room for improvement
534
+ *
535
+ * 🔗 Dataset Run:
536
+ * https://cloud.langfuse.com/project/123/datasets/456/runs/def456
537
+ * ```
538
+ *
539
+ * @internal
540
+ */
541
+ async prettyPrintResults(params) {
542
+ var _a, _b;
543
+ const {
544
+ itemResults,
545
+ originalData,
546
+ runEvaluations,
547
+ name,
548
+ description,
549
+ includeItemResults = false
550
+ } = params;
551
+ if (itemResults.length === 0) {
552
+ return "No experiment results to display.";
553
+ }
554
+ let output = "";
555
+ if (includeItemResults) {
556
+ for (let index = 0; index < itemResults.length; index++) {
557
+ const result = itemResults[index];
558
+ const originalItem = originalData[index];
559
+ output += `
560
+ ${index + 1}. Item ${index + 1}:
561
+ `;
562
+ if ((originalItem == null ? void 0 : originalItem.input) !== void 0) {
563
+ output += ` Input: ${this.formatValue(originalItem.input)}
564
+ `;
565
+ }
566
+ const expectedOutput = (_b = (_a = originalItem == null ? void 0 : originalItem.expectedOutput) != null ? _a : result.expectedOutput) != null ? _b : null;
567
+ output += ` Expected: ${expectedOutput !== null ? this.formatValue(expectedOutput) : "null"}
568
+ `;
569
+ output += ` Actual: ${this.formatValue(result.output)}
570
+ `;
571
+ if (result.evaluations.length > 0) {
572
+ output += ` Scores:
573
+ `;
574
+ result.evaluations.forEach((evaluation) => {
575
+ const score = typeof evaluation.value === "number" ? evaluation.value.toFixed(3) : evaluation.value;
576
+ output += ` \u2022 ${evaluation.name}: ${score}`;
577
+ if (evaluation.comment) {
578
+ output += `
579
+ \u{1F4AD} ${evaluation.comment}`;
580
+ }
581
+ output += "\n";
582
+ });
583
+ }
584
+ if (originalItem && "id" in originalItem && "datasetId" in originalItem) {
585
+ const projectUrl = (await this.langfuseClient.getTraceUrl("mock")).split("/traces")[0];
586
+ const datasetItemUrl = `${projectUrl}/datasets/${originalItem.datasetId}/items/${originalItem.id}`;
587
+ output += `
588
+ Dataset Item:
589
+ ${datasetItemUrl}
590
+ `;
591
+ }
592
+ if (result.traceId) {
593
+ const traceUrl = await this.langfuseClient.getTraceUrl(
594
+ result.traceId
595
+ );
596
+ output += `
597
+ Trace:
598
+ ${traceUrl}
599
+ `;
600
+ }
601
+ }
602
+ } else {
603
+ output += `Individual Results: Hidden (${itemResults.length} items)
604
+ `;
605
+ output += "\u{1F4A1} Call prettyPrint({ includeItemResults: true }) to view them\n";
606
+ }
607
+ const totalItems = itemResults.length;
608
+ const evaluationNames = new Set(
609
+ itemResults.flatMap((r) => r.evaluations.map((e) => e.name))
610
+ );
611
+ output += `
612
+ ${"\u2500".repeat(50)}
613
+ `;
614
+ output += `\u{1F4CA} ${name}`;
615
+ if (description) {
616
+ output += ` - ${description}`;
617
+ }
618
+ output += `
619
+ ${totalItems} items`;
620
+ if (evaluationNames.size > 0) {
621
+ output += `
622
+ Evaluations:`;
623
+ Array.from(evaluationNames).forEach((evalName) => {
624
+ output += `
625
+ \u2022 ${evalName}`;
626
+ });
627
+ output += "\n";
628
+ }
629
+ if (evaluationNames.size > 0) {
630
+ output += `
631
+ Average Scores:`;
632
+ for (const evalName of evaluationNames) {
633
+ const scores = itemResults.flatMap((r) => r.evaluations).filter((e) => e.name === evalName && typeof e.value === "number").map((e) => e.value);
634
+ if (scores.length > 0) {
635
+ const avg = scores.reduce((a, b) => a + b, 0) / scores.length;
636
+ output += `
637
+ \u2022 ${evalName}: ${avg.toFixed(3)}`;
638
+ }
639
+ }
640
+ output += "\n";
641
+ }
642
+ if (runEvaluations.length > 0) {
643
+ output += `
644
+ Run Evaluations:`;
645
+ runEvaluations.forEach((runEval) => {
646
+ const score = typeof runEval.value === "number" ? runEval.value.toFixed(3) : runEval.value;
647
+ output += `
648
+ \u2022 ${runEval.name}: ${score}`;
649
+ if (runEval.comment) {
650
+ output += `
651
+ \u{1F4AD} ${runEval.comment}`;
652
+ }
653
+ });
654
+ output += "\n";
655
+ }
656
+ if (params.datasetRunUrl) {
657
+ output += `
658
+ \u{1F517} Dataset Run:
659
+ ${params.datasetRunUrl}`;
660
+ }
661
+ return output;
662
+ }
663
+ /**
664
+ * Formats a value for display in pretty-printed output.
665
+ *
666
+ * Handles different value types appropriately:
667
+ * - Strings: Truncates long strings to 50 characters with "..."
668
+ * - Objects/Arrays: Converts to JSON string representation
669
+ * - Primitives: Uses toString() representation
670
+ *
671
+ * @param value - The value to format
672
+ * @returns Formatted string representation suitable for display
673
+ *
674
+ * @internal
675
+ */
676
+ formatValue(value) {
677
+ if (typeof value === "string") {
678
+ return value.length > 50 ? `${value.substring(0, 47)}...` : value;
679
+ }
680
+ return JSON.stringify(value);
681
+ }
682
+ isOtelRegistered() {
683
+ let tracerProvider = import_api.trace.getTracerProvider();
684
+ if (tracerProvider instanceof import_api.ProxyTracerProvider) {
685
+ tracerProvider = tracerProvider.getDelegate();
686
+ }
687
+ return tracerProvider.constructor.name !== "NoopTracerProvider";
688
+ }
689
+ };
690
+
691
+ // src/media/index.ts
692
+ var import_core2 = require("@langfuse/core");
136
693
  var MediaManager = class _MediaManager {
137
694
  /**
138
695
  * Creates a new MediaManager instance.
@@ -210,14 +767,14 @@ var MediaManager = class _MediaManager {
210
767
  const uint8Content = new Uint8Array(
211
768
  await mediaContent.arrayBuffer()
212
769
  );
213
- const base64MediaContent = (0, import_core.bytesToBase64)(uint8Content);
770
+ const base64MediaContent = (0, import_core2.bytesToBase64)(uint8Content);
214
771
  const base64DataUri = `data:${mediaData.contentType};base64,${base64MediaContent}`;
215
772
  referenceStringToMediaContentMap.set(
216
773
  referenceString,
217
774
  base64DataUri
218
775
  );
219
776
  } catch (error) {
220
- (0, import_core.getGlobalLogger)().warn(
777
+ (0, import_core2.getGlobalLogger)().warn(
221
778
  "Error fetching media content for reference string",
222
779
  referenceString,
223
780
  error
@@ -293,10 +850,10 @@ var MediaManager = class _MediaManager {
293
850
  };
294
851
 
295
852
  // src/prompt/promptManager.ts
296
- var import_core3 = require("@langfuse/core");
853
+ var import_core4 = require("@langfuse/core");
297
854
 
298
855
  // src/prompt/promptCache.ts
299
- var import_core2 = require("@langfuse/core");
856
+ var import_core3 = require("@langfuse/core");
300
857
  var DEFAULT_PROMPT_CACHE_TTL_SECONDS = 60;
301
858
  var LangfusePromptCacheItem = class {
302
859
  constructor(value, ttlSeconds) {
@@ -348,7 +905,7 @@ var LangfusePromptCache = class {
348
905
  return this._refreshingKeys.has(key);
349
906
  }
350
907
  invalidate(promptName) {
351
- (0, import_core2.getGlobalLogger)().debug(
908
+ (0, import_core3.getGlobalLogger)().debug(
352
909
  "Invalidating cache keys for",
353
910
  promptName,
354
911
  this._cache.keys()
@@ -692,7 +1249,7 @@ var PromptManager = class {
692
1249
  this.cache = new LangfusePromptCache();
693
1250
  }
694
1251
  get logger() {
695
- return (0, import_core3.getGlobalLogger)();
1252
+ return (0, import_core4.getGlobalLogger)();
696
1253
  }
697
1254
  /**
698
1255
  * Creates a new prompt in Langfuse.
@@ -919,8 +1476,8 @@ var PromptManager = class {
919
1476
  };
920
1477
 
921
1478
  // src/score/index.ts
922
- var import_core4 = require("@langfuse/core");
923
- var import_api = require("@opentelemetry/api");
1479
+ var import_core5 = require("@langfuse/core");
1480
+ var import_api2 = require("@opentelemetry/api");
924
1481
  var MAX_QUEUE_SIZE = 1e5;
925
1482
  var MAX_BATCH_SIZE = 100;
926
1483
  var ScoreManager = class {
@@ -935,13 +1492,13 @@ var ScoreManager = class {
935
1492
  this.flushPromise = null;
936
1493
  this.flushTimer = null;
937
1494
  this.apiClient = params.apiClient;
938
- const envFlushAtCount = (0, import_core4.getEnv)("LANGFUSE_FLUSH_AT");
939
- const envFlushIntervalSeconds = (0, import_core4.getEnv)("LANGFUSE_FLUSH_INTERVAL");
1495
+ const envFlushAtCount = (0, import_core5.getEnv)("LANGFUSE_FLUSH_AT");
1496
+ const envFlushIntervalSeconds = (0, import_core5.getEnv)("LANGFUSE_FLUSH_INTERVAL");
940
1497
  this.flushAtCount = envFlushAtCount ? Number(envFlushAtCount) : 10;
941
1498
  this.flushIntervalSeconds = envFlushIntervalSeconds ? Number(envFlushIntervalSeconds) : 1;
942
1499
  }
943
1500
  get logger() {
944
- return (0, import_core4.getGlobalLogger)();
1501
+ return (0, import_core5.getGlobalLogger)();
945
1502
  }
946
1503
  /**
947
1504
  * Creates a new score event and adds it to the processing queue.
@@ -966,11 +1523,11 @@ var ScoreManager = class {
966
1523
  var _a, _b;
967
1524
  const scoreData = {
968
1525
  ...data,
969
- id: (_a = data.id) != null ? _a : (0, import_core4.generateUUID)(),
970
- environment: (_b = data.environment) != null ? _b : (0, import_core4.getEnv)("LANGFUSE_TRACING_ENVIRONMENT")
1526
+ id: (_a = data.id) != null ? _a : (0, import_core5.generateUUID)(),
1527
+ environment: (_b = data.environment) != null ? _b : (0, import_core5.getEnv)("LANGFUSE_TRACING_ENVIRONMENT")
971
1528
  };
972
1529
  const scoreIngestionEvent = {
973
- id: (0, import_core4.generateUUID)(),
1530
+ id: (0, import_core5.generateUUID)(),
974
1531
  type: "score-create",
975
1532
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
976
1533
  body: scoreData
@@ -982,10 +1539,14 @@ var ScoreManager = class {
982
1539
  return;
983
1540
  }
984
1541
  this.eventQueue.push(scoreIngestionEvent);
1542
+ this.logger.debug(
1543
+ "Added score event to queue:\n",
1544
+ JSON.stringify(scoreIngestionEvent, null, 2)
1545
+ );
985
1546
  if (this.eventQueue.length >= this.flushAtCount) {
986
1547
  this.flushPromise = this.flush();
987
1548
  } else if (!this.flushTimer) {
988
- this.flushTimer = (0, import_core4.safeSetTimeout)(() => {
1549
+ this.flushTimer = (0, import_core5.safeSetTimeout)(() => {
989
1550
  this.flushPromise = this.flush();
990
1551
  }, this.flushIntervalSeconds * 1e3);
991
1552
  }
@@ -1068,7 +1629,7 @@ var ScoreManager = class {
1068
1629
  * ```
1069
1630
  */
1070
1631
  activeObservation(data) {
1071
- const currentOtelSpan = import_api.trace.getActiveSpan();
1632
+ const currentOtelSpan = import_api2.trace.getActiveSpan();
1072
1633
  if (!currentOtelSpan) {
1073
1634
  this.logger.warn("No active span in context to score.");
1074
1635
  return;
@@ -1104,7 +1665,7 @@ var ScoreManager = class {
1104
1665
  * ```
1105
1666
  */
1106
1667
  activeTrace(data) {
1107
- const currentOtelSpan = import_api.trace.getActiveSpan();
1668
+ const currentOtelSpan = import_api2.trace.getActiveSpan();
1108
1669
  if (!currentOtelSpan) {
1109
1670
  this.logger.warn("No active span in context to score trace.");
1110
1671
  return;
@@ -1204,10 +1765,10 @@ var LangfuseClient = class {
1204
1765
  constructor(params) {
1205
1766
  this.projectId = null;
1206
1767
  var _a, _b, _c, _d, _e, _f, _g;
1207
- const logger = (0, import_core5.getGlobalLogger)();
1208
- const publicKey = (_a = params == null ? void 0 : params.publicKey) != null ? _a : (0, import_core5.getEnv)("LANGFUSE_PUBLIC_KEY");
1209
- const secretKey = (_b = params == null ? void 0 : params.secretKey) != null ? _b : (0, import_core5.getEnv)("LANGFUSE_SECRET_KEY");
1210
- this.baseUrl = (_e = (_d = (_c = params == null ? void 0 : params.baseUrl) != null ? _c : (0, import_core5.getEnv)("LANGFUSE_BASE_URL")) != null ? _d : (0, import_core5.getEnv)("LANGFUSE_BASEURL")) != null ? _e : (
1768
+ const logger = (0, import_core6.getGlobalLogger)();
1769
+ const publicKey = (_a = params == null ? void 0 : params.publicKey) != null ? _a : (0, import_core6.getEnv)("LANGFUSE_PUBLIC_KEY");
1770
+ const secretKey = (_b = params == null ? void 0 : params.secretKey) != null ? _b : (0, import_core6.getEnv)("LANGFUSE_SECRET_KEY");
1771
+ this.baseUrl = (_e = (_d = (_c = params == null ? void 0 : params.baseUrl) != null ? _c : (0, import_core6.getEnv)("LANGFUSE_BASE_URL")) != null ? _d : (0, import_core6.getEnv)("LANGFUSE_BASEURL")) != null ? _e : (
1211
1772
  // legacy v2
1212
1773
  "https://cloud.langfuse.com"
1213
1774
  );
@@ -1221,13 +1782,13 @@ var LangfuseClient = class {
1221
1782
  "No secret key provided in constructor or as LANGFUSE_SECRET_KEY env var. Client operations will fail."
1222
1783
  );
1223
1784
  }
1224
- const timeoutSeconds = (_g = params == null ? void 0 : params.timeout) != null ? _g : Number((_f = (0, import_core5.getEnv)("LANGFUSE_TIMEOUT")) != null ? _f : 5);
1225
- this.api = new import_core5.LangfuseAPIClient({
1785
+ const timeoutSeconds = (_g = params == null ? void 0 : params.timeout) != null ? _g : Number((_f = (0, import_core6.getEnv)("LANGFUSE_TIMEOUT")) != null ? _f : 5);
1786
+ this.api = new import_core6.LangfuseAPIClient({
1226
1787
  baseUrl: this.baseUrl,
1227
1788
  username: publicKey,
1228
1789
  password: secretKey,
1229
1790
  xLangfusePublicKey: publicKey,
1230
- xLangfuseSdkVersion: import_core5.LANGFUSE_SDK_VERSION,
1791
+ xLangfuseSdkVersion: import_core6.LANGFUSE_SDK_VERSION,
1231
1792
  xLangfuseSdkName: "javascript",
1232
1793
  environment: "",
1233
1794
  // noop as baseUrl is set
@@ -1239,9 +1800,10 @@ var LangfuseClient = class {
1239
1800
  timeoutSeconds
1240
1801
  });
1241
1802
  this.prompt = new PromptManager({ apiClient: this.api });
1242
- this.dataset = new DatasetManager({ apiClient: this.api });
1803
+ this.dataset = new DatasetManager({ langfuseClient: this });
1243
1804
  this.score = new ScoreManager({ apiClient: this.api });
1244
1805
  this.media = new MediaManager({ apiClient: this.api });
1806
+ this.experiment = new ExperimentManager({ langfuseClient: this });
1245
1807
  this.getPrompt = this.prompt.get.bind(this.prompt);
1246
1808
  this.createPrompt = this.prompt.create.bind(this.prompt);
1247
1809
  this.updatePrompt = this.prompt.update.bind(this.prompt);
@@ -1316,15 +1878,36 @@ var LangfuseClient = class {
1316
1878
  return traceUrl;
1317
1879
  }
1318
1880
  };
1881
+
1882
+ // src/experiment/adapters.ts
1883
+ function autoevalsToLangfuseEvaluator(autoevalEvaluator, params) {
1884
+ const langfuseEvaluator = async (langfuseEvaluatorParams) => {
1885
+ var _a;
1886
+ const score = await autoevalEvaluator({
1887
+ ...params != null ? params : {},
1888
+ input: langfuseEvaluatorParams.input,
1889
+ output: langfuseEvaluatorParams.output,
1890
+ expected: langfuseEvaluatorParams.expectedOutput
1891
+ });
1892
+ return {
1893
+ name: score.name,
1894
+ value: (_a = score.score) != null ? _a : 0,
1895
+ metadata: score.metadata
1896
+ };
1897
+ };
1898
+ return langfuseEvaluator;
1899
+ }
1319
1900
  // Annotate the CommonJS export names for ESM import in node:
1320
1901
  0 && (module.exports = {
1321
1902
  ChatMessageType,
1322
1903
  ChatPromptClient,
1323
1904
  DatasetManager,
1905
+ ExperimentManager,
1324
1906
  LangfuseClient,
1325
1907
  MediaManager,
1326
1908
  PromptManager,
1327
1909
  ScoreManager,
1328
- TextPromptClient
1910
+ TextPromptClient,
1911
+ autoevalsToLangfuseEvaluator
1329
1912
  });
1330
1913
  //# sourceMappingURL=index.cjs.map