@langfuse/client 4.0.0 → 4.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  LangfuseAPIClient as LangfuseAPIClient4,
4
4
  LANGFUSE_SDK_VERSION,
5
- getGlobalLogger as getGlobalLogger5,
5
+ getGlobalLogger as getGlobalLogger6,
6
6
  getEnv as getEnv2
7
7
  } from "@langfuse/core";
8
8
 
@@ -15,44 +15,91 @@ var DatasetManager = class {
15
15
  * @internal
16
16
  */
17
17
  constructor(params) {
18
- this.apiClient = params.apiClient;
18
+ this.langfuseClient = params.langfuseClient;
19
19
  }
20
20
  /**
21
- * Retrieves a dataset by name along with all its items.
21
+ * Retrieves a dataset by name with all its items and experiment functionality.
22
22
  *
23
- * This method automatically handles pagination to fetch all dataset items
24
- * and enhances each item with a `link` function for easy experiment tracking.
23
+ * This method fetches a dataset and all its associated items, with support
24
+ * for automatic pagination to handle large datasets efficiently. The returned
25
+ * dataset object includes enhanced functionality for linking items to traces
26
+ * and running experiments directly on the dataset.
25
27
  *
26
28
  * @param name - The name of the dataset to retrieve
27
- * @param options - Optional configuration for fetching
29
+ * @param options - Optional configuration for data fetching
28
30
  * @param options.fetchItemsPageSize - Number of items to fetch per page (default: 50)
31
+ * @returns Promise resolving to enhanced dataset with items, linking, and experiment capabilities
29
32
  *
30
- * @returns Promise that resolves to the dataset with enhanced items
33
+ * @example Basic dataset retrieval
34
+ * ```typescript
35
+ * const dataset = await langfuse.dataset.get("my-evaluation-dataset");
36
+ * console.log(`Dataset ${dataset.name} has ${dataset.items.length} items`);
31
37
  *
32
- * @example
38
+ * // Access dataset properties
39
+ * console.log(dataset.description);
40
+ * console.log(dataset.metadata);
41
+ * ```
42
+ *
43
+ * @example Working with dataset items
33
44
  * ```typescript
34
- * const dataset = await langfuse.dataset.get("my-dataset");
45
+ * const dataset = await langfuse.dataset.get("qa-dataset");
35
46
  *
36
47
  * for (const item of dataset.items) {
37
- * // Use the item data for your experiment
38
- * const result = await processItem(item.input);
39
- *
40
- * // Link the result to the dataset item
41
- * await item.link(
42
- * { otelSpan: currentSpan },
43
- * "experiment-run-1",
44
- * { description: "Testing new model" }
45
- * );
48
+ * console.log("Question:", item.input);
49
+ * console.log("Expected Answer:", item.expectedOutput);
50
+ *
51
+ * // Each item has a link function for connecting to traces
52
+ * // await item.link(span, "experiment-name");
46
53
  * }
47
54
  * ```
55
+ *
56
+ * @example Running experiments on datasets
57
+ * ```typescript
58
+ * const dataset = await langfuse.dataset.get("benchmark-dataset");
59
+ *
60
+ * const result = await dataset.runExperiment({
61
+ * name: "GPT-4 Benchmark",
62
+ * description: "Evaluating GPT-4 on our benchmark tasks",
63
+ * task: async ({ input }) => {
64
+ * const response = await openai.chat.completions.create({
65
+ * model: "gpt-4",
66
+ * messages: [{ role: "user", content: input }]
67
+ * });
68
+ * return response.choices[0].message.content;
69
+ * },
70
+ * evaluators: [
71
+ * async ({ output, expectedOutput }) => ({
72
+ * name: "exact_match",
73
+ * value: output === expectedOutput ? 1 : 0
74
+ * })
75
+ * ]
76
+ * });
77
+ *
78
+ * console.log(await result.prettyPrint());
79
+ * ```
80
+ *
81
+ * @example Handling large datasets
82
+ * ```typescript
83
+ * // For very large datasets, use smaller page sizes
84
+ * const largeDataset = await langfuse.dataset.get(
85
+ * "large-dataset",
86
+ * { fetchItemsPageSize: 100 }
87
+ * );
88
+ * ```
89
+ *
90
+ * @throws {Error} If the dataset does not exist or cannot be accessed
91
+ * @see {@link FetchedDataset} for the complete return type specification
92
+ * @see {@link RunExperimentOnDataset} for experiment execution details
93
+ * @public
94
+ * @since 4.0.0
48
95
  */
49
96
  async get(name, options) {
50
97
  var _a;
51
- const dataset = await this.apiClient.datasets.get(name);
98
+ const dataset = await this.langfuseClient.api.datasets.get(name);
52
99
  const items = [];
53
100
  let page = 1;
54
101
  while (true) {
55
- const itemsResponse = await this.apiClient.datasetItems.list({
102
+ const itemsResponse = await this.langfuseClient.api.datasetItems.list({
56
103
  datasetName: name,
57
104
  limit: (_a = options == null ? void 0 : options.fetchItemsPageSize) != null ? _a : 50,
58
105
  page
@@ -63,12 +110,20 @@ var DatasetManager = class {
63
110
  }
64
111
  page++;
65
112
  }
113
+ const itemsWithLinkMethod = items.map((item) => ({
114
+ ...item,
115
+ link: this.createDatasetItemLinkFunction(item)
116
+ }));
117
+ const runExperiment = (params) => {
118
+ return this.langfuseClient.experiment.run({
119
+ data: items,
120
+ ...params
121
+ });
122
+ };
66
123
  const returnDataset = {
67
124
  ...dataset,
68
- items: items.map((item) => ({
69
- ...item,
70
- link: this.createDatasetItemLinkFunction(item)
71
- }))
125
+ items: itemsWithLinkMethod,
126
+ runExperiment
72
127
  };
73
128
  return returnDataset;
74
129
  }
@@ -81,7 +136,7 @@ var DatasetManager = class {
81
136
  */
82
137
  createDatasetItemLinkFunction(item) {
83
138
  const linkFunction = async (obj, runName, runArgs) => {
84
- return await this.apiClient.datasetRunItems.create({
139
+ return await this.langfuseClient.api.datasetRunItems.create({
85
140
  runName,
86
141
  datasetItemId: item.id,
87
142
  traceId: obj.otelSpan.spanContext().traceId,
@@ -93,9 +148,499 @@ var DatasetManager = class {
93
148
  }
94
149
  };
95
150
 
151
+ // src/experiment/ExperimentManager.ts
152
+ import { getGlobalLogger } from "@langfuse/core";
153
+ import { startActiveObservation } from "@langfuse/tracing";
154
+ import { ProxyTracerProvider, trace } from "@opentelemetry/api";
155
+ var ExperimentManager = class {
156
+ /**
157
+ * Creates a new ExperimentManager instance.
158
+ *
159
+ * @param params - Configuration object
160
+ * @param params.langfuseClient - The Langfuse client instance for API communication
161
+ * @internal
162
+ */
163
+ constructor(params) {
164
+ this.langfuseClient = params.langfuseClient;
165
+ }
166
+ /**
167
+ * Gets the global logger instance for experiment-related logging.
168
+ *
169
+ * @returns The global logger instance
170
+ * @internal
171
+ */
172
+ get logger() {
173
+ return getGlobalLogger();
174
+ }
175
+ /**
176
+ * Executes an experiment by running a task on each data item and evaluating the results.
177
+ *
178
+ * This method orchestrates the complete experiment lifecycle:
179
+ * 1. Executes the task function on each data item with proper tracing
180
+ * 2. Runs item-level evaluators on each task output
181
+ * 3. Executes run-level evaluators on the complete result set
182
+ * 4. Links results to dataset runs (for Langfuse datasets)
183
+ * 5. Stores all scores and traces in Langfuse
184
+ *
185
+ * @param config - The experiment configuration
186
+ * @param config.name - Human-readable name for the experiment
187
+ * @param config.description - Optional description of the experiment's purpose
188
+ * @param config.metadata - Optional metadata to attach to the experiment run
189
+ * @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
190
+ * @param config.task - Function that processes each data item and returns output
191
+ * @param config.evaluators - Optional array of functions to evaluate each item's output
192
+ * @param config.runEvaluators - Optional array of functions to evaluate the entire run
193
+ * @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
194
+ *
195
+ * @returns Promise that resolves to experiment results including:
196
+ * - itemResults: Results for each processed data item
197
+ * - runEvaluations: Results from run-level evaluators
198
+ * - datasetRunId: ID of the dataset run (if using Langfuse datasets)
199
+ * - prettyPrint: Function to format and display results
200
+ *
201
+ * @throws {Error} When task execution fails and cannot be handled gracefully
202
+ * @throws {Error} When required evaluators fail critically
203
+ *
204
+ * @example Simple experiment
205
+ * ```typescript
206
+ * const result = await langfuse.experiment.run({
207
+ * name: "Translation Quality Test",
208
+ * data: [
209
+ * { input: "Hello world", expectedOutput: "Hola mundo" },
210
+ * { input: "Good morning", expectedOutput: "Buenos días" }
211
+ * ],
212
+ * task: async ({ input }) => translateText(input, 'es'),
213
+ * evaluators: [
214
+ * async ({ output, expectedOutput }) => ({
215
+ * name: "bleu_score",
216
+ * value: calculateBleuScore(output, expectedOutput)
217
+ * })
218
+ * ]
219
+ * });
220
+ * ```
221
+ *
222
+ * @example Experiment with concurrency control
223
+ * ```typescript
224
+ * const result = await langfuse.experiment.run({
225
+ * name: "Large Scale Evaluation",
226
+ * data: largeBatchOfItems,
227
+ * task: expensiveModelCall,
228
+ * maxConcurrency: 5, // Process max 5 items simultaneously
229
+ * evaluators: [myEvaluator],
230
+ * runEvaluators: [
231
+ * async ({ itemResults }) => ({
232
+ * name: "average_score",
233
+ * value: itemResults.reduce((acc, r) => acc + r.evaluations[0].value, 0) / itemResults.length
234
+ * })
235
+ * ]
236
+ * });
237
+ * ```
238
+ *
239
+ * @see {@link ExperimentParams} for detailed parameter documentation
240
+ * @see {@link ExperimentResult} for detailed return value documentation
241
+ * @see {@link Evaluator} for evaluator function specifications
242
+ * @see {@link RunEvaluator} for run evaluator function specifications
243
+ *
244
+ * @public
245
+ */
246
+ async run(config) {
247
+ const {
248
+ data,
249
+ evaluators,
250
+ task,
251
+ name,
252
+ description,
253
+ metadata,
254
+ maxConcurrency: batchSize = Infinity,
255
+ runEvaluators
256
+ } = config;
257
+ if (!this.isOtelRegistered()) {
258
+ this.logger.warn(
259
+ "OpenTelemetry has not been set up. Traces will not be sent to Langfuse.See our docs on how to set up OpenTelemetry: https://langfuse.com/docs/observability/sdk/typescript/setup#tracing-setup"
260
+ );
261
+ }
262
+ const itemResults = [];
263
+ for (let i = 0; i < data.length; i += batchSize) {
264
+ const batch = data.slice(i, i + batchSize);
265
+ const promises = batch.map(
266
+ async (item) => {
267
+ return this.runItem({
268
+ item,
269
+ evaluators,
270
+ task,
271
+ experimentName: name,
272
+ experimentDescription: description,
273
+ experimentMetadata: metadata
274
+ });
275
+ }
276
+ );
277
+ const results = await Promise.all(promises);
278
+ itemResults.push(...results);
279
+ }
280
+ const datasetRunId = itemResults.length > 0 ? itemResults[0].datasetRunId : void 0;
281
+ let datasetRunUrl = void 0;
282
+ if (datasetRunId && data.length > 0 && "datasetId" in data[0]) {
283
+ const datasetId = data[0].datasetId;
284
+ const projectUrl = (await this.langfuseClient.getTraceUrl("mock")).split(
285
+ "/traces"
286
+ )[0];
287
+ datasetRunUrl = `${projectUrl}/datasets/${datasetId}/runs/${datasetRunId}`;
288
+ }
289
+ let runEvaluations = [];
290
+ if (runEvaluators && (runEvaluators == null ? void 0 : runEvaluators.length) > 0) {
291
+ const promises = runEvaluators.map(async (runEvaluator) => {
292
+ return runEvaluator({ itemResults }).then((result) => {
293
+ return Array.isArray(result) ? result : [result];
294
+ }).catch((err) => {
295
+ this.logger.error("Run evaluator failed with error ", err);
296
+ throw err;
297
+ });
298
+ });
299
+ runEvaluations = (await Promise.allSettled(promises)).reduce(
300
+ (acc, settledPromise) => {
301
+ if (settledPromise.status === "fulfilled") {
302
+ acc.push(...settledPromise.value);
303
+ }
304
+ return acc;
305
+ },
306
+ []
307
+ );
308
+ if (datasetRunId) {
309
+ runEvaluations.forEach(
310
+ (runEval) => this.langfuseClient.score.create({ datasetRunId, ...runEval })
311
+ );
312
+ }
313
+ }
314
+ await this.langfuseClient.score.flush();
315
+ return {
316
+ itemResults,
317
+ datasetRunId,
318
+ runEvaluations,
319
+ prettyPrint: async (options) => {
320
+ var _a;
321
+ return await this.prettyPrintResults({
322
+ datasetRunUrl,
323
+ itemResults,
324
+ originalData: data,
325
+ runEvaluations,
326
+ name: config.name,
327
+ description: config.description,
328
+ includeItemResults: (_a = options == null ? void 0 : options.includeItemResults) != null ? _a : true
329
+ });
330
+ }
331
+ };
332
+ }
333
+ /**
334
+ * Executes the task and evaluators for a single data item.
335
+ *
336
+ * This method handles the complete processing pipeline for one data item:
337
+ * 1. Executes the task within a traced observation span
338
+ * 2. Links the result to a dataset run (if applicable)
339
+ * 3. Runs all item-level evaluators on the output
340
+ * 4. Stores evaluation scores in Langfuse
341
+ * 5. Handles errors gracefully by continuing with remaining evaluators
342
+ *
343
+ * @param params - Parameters for item execution
344
+ * @param params.experimentName - Name of the parent experiment
345
+ * @param params.experimentDescription - Description of the parent experiment
346
+ * @param params.experimentMetadata - Metadata for the parent experiment
347
+ * @param params.item - The data item to process
348
+ * @param params.task - The task function to execute
349
+ * @param params.evaluators - Optional evaluators to run on the output
350
+ *
351
+ * @returns Promise resolving to the item result with output, evaluations, and trace info
352
+ *
353
+ * @throws {Error} When task execution fails (propagated from task function)
354
+ *
355
+ * @internal
356
+ */
357
+ async runItem(params) {
358
+ const { item, evaluators = [], task } = params;
359
+ const { output, traceId } = await startActiveObservation(
360
+ "experiment-item-run",
361
+ async (span) => {
362
+ const output2 = await task(item);
363
+ span.update({
364
+ input: item.input,
365
+ output: output2
366
+ });
367
+ return { output: output2, traceId: span.traceId };
368
+ }
369
+ );
370
+ let datasetRunId = void 0;
371
+ if ("id" in item) {
372
+ await this.langfuseClient.api.datasetRunItems.create({
373
+ runName: params.experimentName,
374
+ runDescription: params.experimentDescription,
375
+ metadata: params.experimentMetadata,
376
+ datasetItemId: item.id,
377
+ traceId
378
+ }).then((result) => {
379
+ datasetRunId = result.datasetRunId;
380
+ }).catch(
381
+ (err) => this.logger.error("Linking dataset run item failed", err)
382
+ );
383
+ }
384
+ const evalPromises = evaluators.map(
385
+ async (evaluator) => {
386
+ const params2 = {
387
+ input: item.input,
388
+ expectedOutput: item.expectedOutput,
389
+ output
390
+ };
391
+ return evaluator(params2).then((result) => {
392
+ return Array.isArray(result) ? result : [result];
393
+ }).catch((err) => {
394
+ this.logger.error(
395
+ `Evaluator '${evaluator.name}' failed for params
396
+
397
+ ${JSON.stringify(params2)}
398
+
399
+ with error: ${err}`
400
+ );
401
+ throw err;
402
+ });
403
+ }
404
+ );
405
+ const evals = (await Promise.allSettled(evalPromises)).reduce(
406
+ (acc, promiseResult) => {
407
+ if (promiseResult.status === "fulfilled") {
408
+ acc.push(...promiseResult.value.flat());
409
+ }
410
+ return acc;
411
+ },
412
+ []
413
+ );
414
+ for (const ev of evals) {
415
+ this.langfuseClient.score.create({
416
+ traceId,
417
+ name: ev.name,
418
+ comment: ev.comment,
419
+ value: ev.value,
420
+ metadata: ev.metadata,
421
+ dataType: ev.dataType
422
+ });
423
+ }
424
+ return {
425
+ output,
426
+ evaluations: evals,
427
+ traceId,
428
+ datasetRunId
429
+ };
430
+ }
431
+ /**
432
+ * Formats experiment results into a human-readable string representation.
433
+ *
434
+ * Creates a comprehensive, nicely formatted summary of the experiment including:
435
+ * - Individual item results with inputs, outputs, expected values, and scores
436
+ * - Dataset item and trace links (when available)
437
+ * - Experiment overview with aggregate statistics
438
+ * - Average scores across all evaluations
439
+ * - Run-level evaluation results
440
+ * - Links to dataset runs in the Langfuse UI
441
+ *
442
+ * @param params - Formatting parameters
443
+ * @param params.datasetRunUrl - Optional URL to the dataset run in Langfuse UI
444
+ * @param params.itemResults - Results from processing each data item
445
+ * @param params.originalData - The original input data items
446
+ * @param params.runEvaluations - Results from run-level evaluators
447
+ * @param params.name - Name of the experiment
448
+ * @param params.description - Optional description of the experiment
449
+ * @param params.includeItemResults - Whether to include individual item details (default: true)
450
+ *
451
+ * @returns Promise resolving to formatted string representation
452
+ *
453
+ * @example Output format
454
+ * ```
455
+ * 1. Item 1:
456
+ * Input: What is the capital of France?
457
+ * Expected: Paris
458
+ * Actual: Paris
459
+ * Scores:
460
+ * • exact_match: 1.000
461
+ * • similarity: 0.95
462
+ * 💭 Very close match with expected output
463
+ *
464
+ * Dataset Item:
465
+ * https://cloud.langfuse.com/project/123/datasets/456/items/789
466
+ *
467
+ * Trace:
468
+ * https://cloud.langfuse.com/project/123/traces/abc123
469
+ *
470
+ * ──────────────────────────────────────────────────
471
+ * 📊 Translation Quality Test - Testing model accuracy
472
+ * 2 items
473
+ * Evaluations:
474
+ * • exact_match
475
+ * • similarity
476
+ *
477
+ * Average Scores:
478
+ * • exact_match: 0.850
479
+ * • similarity: 0.923
480
+ *
481
+ * Run Evaluations:
482
+ * • overall_quality: 0.887
483
+ * 💭 Good performance with room for improvement
484
+ *
485
+ * 🔗 Dataset Run:
486
+ * https://cloud.langfuse.com/project/123/datasets/456/runs/def456
487
+ * ```
488
+ *
489
+ * @internal
490
+ */
491
+ async prettyPrintResults(params) {
492
+ var _a, _b;
493
+ const {
494
+ itemResults,
495
+ originalData,
496
+ runEvaluations,
497
+ name,
498
+ description,
499
+ includeItemResults = true
500
+ } = params;
501
+ if (itemResults.length === 0) {
502
+ return "No experiment results to display.";
503
+ }
504
+ let output = "";
505
+ if (includeItemResults) {
506
+ for (let index = 0; index < itemResults.length; index++) {
507
+ const result = itemResults[index];
508
+ const originalItem = originalData[index];
509
+ output += `
510
+ ${index + 1}. Item ${index + 1}:
511
+ `;
512
+ if ((originalItem == null ? void 0 : originalItem.input) !== void 0) {
513
+ output += ` Input: ${this.formatValue(originalItem.input)}
514
+ `;
515
+ }
516
+ const expectedOutput = (_b = (_a = originalItem == null ? void 0 : originalItem.expectedOutput) != null ? _a : result.expectedOutput) != null ? _b : null;
517
+ output += ` Expected: ${expectedOutput !== null ? this.formatValue(expectedOutput) : "null"}
518
+ `;
519
+ output += ` Actual: ${this.formatValue(result.output)}
520
+ `;
521
+ if (result.evaluations.length > 0) {
522
+ output += ` Scores:
523
+ `;
524
+ result.evaluations.forEach((evaluation) => {
525
+ const score = typeof evaluation.value === "number" ? evaluation.value.toFixed(3) : evaluation.value;
526
+ output += ` \u2022 ${evaluation.name}: ${score}`;
527
+ if (evaluation.comment) {
528
+ output += `
529
+ \u{1F4AD} ${evaluation.comment}`;
530
+ }
531
+ output += "\n";
532
+ });
533
+ }
534
+ if (originalItem && "id" in originalItem && "datasetId" in originalItem) {
535
+ const projectUrl = (await this.langfuseClient.getTraceUrl("mock")).split("/traces")[0];
536
+ const datasetItemUrl = `${projectUrl}/datasets/${originalItem.datasetId}/items/${originalItem.id}`;
537
+ output += `
538
+ Dataset Item:
539
+ ${datasetItemUrl}
540
+ `;
541
+ }
542
+ if (result.traceId) {
543
+ const traceUrl = await this.langfuseClient.getTraceUrl(
544
+ result.traceId
545
+ );
546
+ output += `
547
+ Trace:
548
+ ${traceUrl}
549
+ `;
550
+ }
551
+ }
552
+ } else {
553
+ output += `Individual Results: Hidden (${itemResults.length} items)
554
+ `;
555
+ output += "\u{1F4A1} Call prettyPrint({ includeItemResults: true }) to view them\n";
556
+ }
557
+ const totalItems = itemResults.length;
558
+ const evaluationNames = new Set(
559
+ itemResults.flatMap((r) => r.evaluations.map((e) => e.name))
560
+ );
561
+ output += `
562
+ ${"\u2500".repeat(50)}
563
+ `;
564
+ output += `\u{1F4CA} ${name}`;
565
+ if (description) {
566
+ output += ` - ${description}`;
567
+ }
568
+ output += `
569
+ ${totalItems} items`;
570
+ if (evaluationNames.size > 0) {
571
+ output += `
572
+ Evaluations:`;
573
+ Array.from(evaluationNames).forEach((evalName) => {
574
+ output += `
575
+ \u2022 ${evalName}`;
576
+ });
577
+ output += "\n";
578
+ }
579
+ if (evaluationNames.size > 0) {
580
+ output += `
581
+ Average Scores:`;
582
+ for (const evalName of evaluationNames) {
583
+ const scores = itemResults.flatMap((r) => r.evaluations).filter((e) => e.name === evalName && typeof e.value === "number").map((e) => e.value);
584
+ if (scores.length > 0) {
585
+ const avg = scores.reduce((a, b) => a + b, 0) / scores.length;
586
+ output += `
587
+ \u2022 ${evalName}: ${avg.toFixed(3)}`;
588
+ }
589
+ }
590
+ output += "\n";
591
+ }
592
+ if (runEvaluations.length > 0) {
593
+ output += `
594
+ Run Evaluations:`;
595
+ runEvaluations.forEach((runEval) => {
596
+ const score = typeof runEval.value === "number" ? runEval.value.toFixed(3) : runEval.value;
597
+ output += `
598
+ \u2022 ${runEval.name}: ${score}`;
599
+ if (runEval.comment) {
600
+ output += `
601
+ \u{1F4AD} ${runEval.comment}`;
602
+ }
603
+ });
604
+ output += "\n";
605
+ }
606
+ if (params.datasetRunUrl) {
607
+ output += `
608
+ \u{1F517} Dataset Run:
609
+ ${params.datasetRunUrl}`;
610
+ }
611
+ return output;
612
+ }
613
+ /**
614
+ * Formats a value for display in pretty-printed output.
615
+ *
616
+ * Handles different value types appropriately:
617
+ * - Strings: Truncates long strings to 50 characters with "..."
618
+ * - Objects/Arrays: Converts to JSON string representation
619
+ * - Primitives: Uses toString() representation
620
+ *
621
+ * @param value - The value to format
622
+ * @returns Formatted string representation suitable for display
623
+ *
624
+ * @internal
625
+ */
626
+ formatValue(value) {
627
+ if (typeof value === "string") {
628
+ return value.length > 50 ? `${value.substring(0, 47)}...` : value;
629
+ }
630
+ return JSON.stringify(value);
631
+ }
632
+ isOtelRegistered() {
633
+ let tracerProvider = trace.getTracerProvider();
634
+ if (tracerProvider instanceof ProxyTracerProvider) {
635
+ tracerProvider = tracerProvider.getDelegate();
636
+ }
637
+ return tracerProvider.constructor.name !== "NoopTracerProvider";
638
+ }
639
+ };
640
+
96
641
  // src/media/index.ts
97
642
  import {
98
- getGlobalLogger,
643
+ getGlobalLogger as getGlobalLogger2,
99
644
  bytesToBase64
100
645
  } from "@langfuse/core";
101
646
  var MediaManager = class _MediaManager {
@@ -182,7 +727,7 @@ var MediaManager = class _MediaManager {
182
727
  base64DataUri
183
728
  );
184
729
  } catch (error) {
185
- getGlobalLogger().warn(
730
+ getGlobalLogger2().warn(
186
731
  "Error fetching media content for reference string",
187
732
  referenceString,
188
733
  error
@@ -259,11 +804,11 @@ var MediaManager = class _MediaManager {
259
804
 
260
805
  // src/prompt/promptManager.ts
261
806
  import {
262
- getGlobalLogger as getGlobalLogger3
807
+ getGlobalLogger as getGlobalLogger4
263
808
  } from "@langfuse/core";
264
809
 
265
810
  // src/prompt/promptCache.ts
266
- import { getGlobalLogger as getGlobalLogger2 } from "@langfuse/core";
811
+ import { getGlobalLogger as getGlobalLogger3 } from "@langfuse/core";
267
812
  var DEFAULT_PROMPT_CACHE_TTL_SECONDS = 60;
268
813
  var LangfusePromptCacheItem = class {
269
814
  constructor(value, ttlSeconds) {
@@ -315,7 +860,7 @@ var LangfusePromptCache = class {
315
860
  return this._refreshingKeys.has(key);
316
861
  }
317
862
  invalidate(promptName) {
318
- getGlobalLogger2().debug(
863
+ getGlobalLogger3().debug(
319
864
  "Invalidating cache keys for",
320
865
  promptName,
321
866
  this._cache.keys()
@@ -659,7 +1204,7 @@ var PromptManager = class {
659
1204
  this.cache = new LangfusePromptCache();
660
1205
  }
661
1206
  get logger() {
662
- return getGlobalLogger3();
1207
+ return getGlobalLogger4();
663
1208
  }
664
1209
  /**
665
1210
  * Creates a new prompt in Langfuse.
@@ -889,10 +1434,10 @@ var PromptManager = class {
889
1434
  import {
890
1435
  getEnv,
891
1436
  generateUUID,
892
- getGlobalLogger as getGlobalLogger4,
1437
+ getGlobalLogger as getGlobalLogger5,
893
1438
  safeSetTimeout
894
1439
  } from "@langfuse/core";
895
- import { trace } from "@opentelemetry/api";
1440
+ import { trace as trace2 } from "@opentelemetry/api";
896
1441
  var MAX_QUEUE_SIZE = 1e5;
897
1442
  var MAX_BATCH_SIZE = 100;
898
1443
  var ScoreManager = class {
@@ -913,7 +1458,7 @@ var ScoreManager = class {
913
1458
  this.flushIntervalSeconds = envFlushIntervalSeconds ? Number(envFlushIntervalSeconds) : 1;
914
1459
  }
915
1460
  get logger() {
916
- return getGlobalLogger4();
1461
+ return getGlobalLogger5();
917
1462
  }
918
1463
  /**
919
1464
  * Creates a new score event and adds it to the processing queue.
@@ -954,6 +1499,10 @@ var ScoreManager = class {
954
1499
  return;
955
1500
  }
956
1501
  this.eventQueue.push(scoreIngestionEvent);
1502
+ this.logger.debug(
1503
+ "Added score event to queue:\n",
1504
+ JSON.stringify(scoreIngestionEvent, null, 2)
1505
+ );
957
1506
  if (this.eventQueue.length >= this.flushAtCount) {
958
1507
  this.flushPromise = this.flush();
959
1508
  } else if (!this.flushTimer) {
@@ -1040,7 +1589,7 @@ var ScoreManager = class {
1040
1589
  * ```
1041
1590
  */
1042
1591
  activeObservation(data) {
1043
- const currentOtelSpan = trace.getActiveSpan();
1592
+ const currentOtelSpan = trace2.getActiveSpan();
1044
1593
  if (!currentOtelSpan) {
1045
1594
  this.logger.warn("No active span in context to score.");
1046
1595
  return;
@@ -1076,7 +1625,7 @@ var ScoreManager = class {
1076
1625
  * ```
1077
1626
  */
1078
1627
  activeTrace(data) {
1079
- const currentOtelSpan = trace.getActiveSpan();
1628
+ const currentOtelSpan = trace2.getActiveSpan();
1080
1629
  if (!currentOtelSpan) {
1081
1630
  this.logger.warn("No active span in context to score trace.");
1082
1631
  return;
@@ -1176,7 +1725,7 @@ var LangfuseClient = class {
1176
1725
  constructor(params) {
1177
1726
  this.projectId = null;
1178
1727
  var _a, _b, _c, _d, _e, _f, _g;
1179
- const logger = getGlobalLogger5();
1728
+ const logger = getGlobalLogger6();
1180
1729
  const publicKey = (_a = params == null ? void 0 : params.publicKey) != null ? _a : getEnv2("LANGFUSE_PUBLIC_KEY");
1181
1730
  const secretKey = (_b = params == null ? void 0 : params.secretKey) != null ? _b : getEnv2("LANGFUSE_SECRET_KEY");
1182
1731
  this.baseUrl = (_e = (_d = (_c = params == null ? void 0 : params.baseUrl) != null ? _c : getEnv2("LANGFUSE_BASE_URL")) != null ? _d : getEnv2("LANGFUSE_BASEURL")) != null ? _e : (
@@ -1211,9 +1760,10 @@ var LangfuseClient = class {
1211
1760
  timeoutSeconds
1212
1761
  });
1213
1762
  this.prompt = new PromptManager({ apiClient: this.api });
1214
- this.dataset = new DatasetManager({ apiClient: this.api });
1763
+ this.dataset = new DatasetManager({ langfuseClient: this });
1215
1764
  this.score = new ScoreManager({ apiClient: this.api });
1216
1765
  this.media = new MediaManager({ apiClient: this.api });
1766
+ this.experiment = new ExperimentManager({ langfuseClient: this });
1217
1767
  this.getPrompt = this.prompt.get.bind(this.prompt);
1218
1768
  this.createPrompt = this.prompt.create.bind(this.prompt);
1219
1769
  this.updatePrompt = this.prompt.update.bind(this.prompt);
@@ -1288,14 +1838,35 @@ var LangfuseClient = class {
1288
1838
  return traceUrl;
1289
1839
  }
1290
1840
  };
1841
+
1842
+ // src/experiment/adapters.ts
1843
+ function autoevalsToLangfuseEvaluator(autoevalEvaluator, params) {
1844
+ const langfuseEvaluator = async (langfuseEvaluatorParams) => {
1845
+ var _a;
1846
+ const score = await autoevalEvaluator({
1847
+ ...params != null ? params : {},
1848
+ input: langfuseEvaluatorParams.input,
1849
+ output: langfuseEvaluatorParams.output,
1850
+ expected: langfuseEvaluatorParams.expectedOutput
1851
+ });
1852
+ return {
1853
+ name: score.name,
1854
+ value: (_a = score.score) != null ? _a : 0,
1855
+ metadata: score.metadata
1856
+ };
1857
+ };
1858
+ return langfuseEvaluator;
1859
+ }
1291
1860
  export {
1292
1861
  ChatMessageType,
1293
1862
  ChatPromptClient,
1294
1863
  DatasetManager,
1864
+ ExperimentManager,
1295
1865
  LangfuseClient,
1296
1866
  MediaManager,
1297
1867
  PromptManager,
1298
1868
  ScoreManager,
1299
- TextPromptClient
1869
+ TextPromptClient,
1870
+ autoevalsToLangfuseEvaluator
1300
1871
  };
1301
1872
  //# sourceMappingURL=index.mjs.map