@langfuse/client 4.1.0-alpha.2 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -120,6 +120,14 @@ type ExperimentParams<Input = any, ExpectedOutput = any, Metadata extends Record
120
120
  * Choose a descriptive name that identifies the experiment's purpose.
121
121
  */
122
122
  name: string;
123
+ /**
124
+ * Optional exact name for the experiment run.
125
+ *
126
+ * If provided, this will be used as the exact dataset run name if the data
127
+ * contains Langfuse dataset items. If not provided, this will default to
128
+ * the experiment name appended with an ISO timestamp.
129
+ */
130
+ runName?: string;
123
131
  /**
124
132
  * Optional description explaining the experiment's purpose.
125
133
  *
@@ -227,10 +235,10 @@ type ExperimentItemResult<Input = any, ExpectedOutput = any, Metadata extends Re
227
235
  * console.log(`Average score: ${avgScore?.value}`);
228
236
  *
229
237
  * // Print formatted results
230
- * console.log(await result.prettyPrint());
238
+ * console.log(await result.format());
231
239
  *
232
240
  * // Print summary with individual item results
233
- * console.log(await result.prettyPrint({ includeItemResults: true }));
241
+ * console.log(await result.format({ includeItemResults: true }));
234
242
  *
235
243
  * // Link to dataset run (if available)
236
244
  * if (result.datasetRunUrl) {
@@ -241,6 +249,13 @@ type ExperimentItemResult<Input = any, ExpectedOutput = any, Metadata extends Re
241
249
  * @public
242
250
  */
243
251
  type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record<string, any> = Record<string, any>> = {
252
+ /**
253
+ * The experiment run name.
254
+ *
255
+ * This is equal to the dataset run name if experiment was on Langfuse dataset.
256
+ * Either the provided runName parameter or generated name (experiment name + timestamp).
257
+ */
258
+ runName: string;
244
259
  /**
245
260
  * ID of the dataset run in Langfuse (only for experiments on Langfuse datasets).
246
261
  *
@@ -273,7 +288,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
273
288
  */
274
289
  runEvaluations: Evaluation[];
275
290
  /**
276
- * Function to format and display experiment results in a human-readable format.
291
+ * Function to format experiment results in a human-readable format.
277
292
  *
278
293
  * Generates a comprehensive, nicely formatted summary including individual results,
279
294
  * aggregate statistics, evaluation scores, and links to traces and dataset runs.
@@ -282,7 +297,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
282
297
  * @param options.includeItemResults - Whether to include individual item details (default: false)
283
298
  * @returns Promise resolving to formatted string representation
284
299
  */
285
- prettyPrint: (options?: {
300
+ format: (options?: {
286
301
  includeItemResults?: boolean;
287
302
  }) => Promise<string>;
288
303
  };
@@ -301,6 +316,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
301
316
  * const dataset = await langfuse.dataset.get("my-dataset");
302
317
  * const result = await dataset.runExperiment({
303
318
  * name: "Model Evaluation",
319
+ * runName: "Model Evaluation Run 1", // optional
304
320
  * task: myTask,
305
321
  * evaluators: [myEvaluator]
306
322
  * });
@@ -469,6 +485,7 @@ declare class DatasetManager {
469
485
  *
470
486
  * const result = await dataset.runExperiment({
471
487
  * name: "GPT-4 Benchmark",
488
+ * runName: "GPT-4 Benchmark v1.2", // optional exact run name
472
489
  * description: "Evaluating GPT-4 on our benchmark tasks",
473
490
  * task: async ({ input }) => {
474
491
  * const response = await openai.chat.completions.create({
@@ -485,7 +502,7 @@ declare class DatasetManager {
485
502
  * ]
486
503
  * });
487
504
  *
488
- * console.log(await result.prettyPrint());
505
+ * console.log(await result.format());
489
506
  * ```
490
507
  *
491
508
  * @example Handling large datasets
@@ -549,7 +566,7 @@ declare class DatasetManager {
549
566
  * ]
550
567
  * });
551
568
  *
552
- * console.log(await result.prettyPrint());
569
+ * console.log(await result.format());
553
570
  * ```
554
571
  *
555
572
  * @example Using with Langfuse datasets
@@ -597,6 +614,7 @@ declare class ExperimentManager {
597
614
  *
598
615
  * @param config - The experiment configuration
599
616
  * @param config.name - Human-readable name for the experiment
617
+ * @param config.runName - Optional exact name for the experiment run (defaults to name + timestamp)
600
618
  * @param config.description - Optional description of the experiment's purpose
601
619
  * @param config.metadata - Optional metadata to attach to the experiment run
602
620
  * @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
@@ -606,10 +624,11 @@ declare class ExperimentManager {
606
624
  * @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
607
625
  *
608
626
  * @returns Promise that resolves to experiment results including:
627
+ * - runName: The experiment run name (either provided or generated)
609
628
  * - itemResults: Results for each processed data item
610
629
  * - runEvaluations: Results from run-level evaluators
611
630
  * - datasetRunId: ID of the dataset run (if using Langfuse datasets)
612
- * - prettyPrint: Function to format and display results
631
+ * - format: Function to format results for display
613
632
  *
614
633
  * @throws {Error} When task execution fails and cannot be handled gracefully
615
634
  * @throws {Error} When required evaluators fail critically
@@ -669,6 +688,7 @@ declare class ExperimentManager {
669
688
  *
670
689
  * @param params - Parameters for item execution
671
690
  * @param params.experimentName - Name of the parent experiment
691
+ * @param params.experimentRunName - Run name for the parent experiment
672
692
  * @param params.experimentDescription - Description of the parent experiment
673
693
  * @param params.experimentMetadata - Metadata for the parent experiment
674
694
  * @param params.item - The data item to process
@@ -758,6 +778,20 @@ declare class ExperimentManager {
758
778
  */
759
779
  private formatValue;
760
780
  private isOtelRegistered;
781
+ /**
782
+ * Creates an experiment run name based on provided parameters.
783
+ *
784
+ * If runName is provided, returns it directly. Otherwise, generates
785
+ * a name by combining the experiment name with an ISO timestamp.
786
+ *
787
+ * @param params - Parameters for run name creation
788
+ * @param params.name - The experiment name
789
+ * @param params.runName - Optional provided run name
790
+ * @returns The final run name to use
791
+ *
792
+ * @internal
793
+ */
794
+ private createExperimentRunName;
761
795
  }
762
796
 
763
797
  /**
@@ -1501,7 +1535,7 @@ declare class LangfuseClient {
1501
1535
  * ]
1502
1536
  * });
1503
1537
  *
1504
- * console.log(await result.prettyPrint());
1538
+ * console.log(await result.format());
1505
1539
  * ```
1506
1540
  *
1507
1541
  * @example Using with datasets
package/dist/index.d.ts CHANGED
@@ -120,6 +120,14 @@ type ExperimentParams<Input = any, ExpectedOutput = any, Metadata extends Record
120
120
  * Choose a descriptive name that identifies the experiment's purpose.
121
121
  */
122
122
  name: string;
123
+ /**
124
+ * Optional exact name for the experiment run.
125
+ *
126
+ * If provided, this will be used as the exact dataset run name if the data
127
+ * contains Langfuse dataset items. If not provided, this will default to
128
+ * the experiment name appended with an ISO timestamp.
129
+ */
130
+ runName?: string;
123
131
  /**
124
132
  * Optional description explaining the experiment's purpose.
125
133
  *
@@ -227,10 +235,10 @@ type ExperimentItemResult<Input = any, ExpectedOutput = any, Metadata extends Re
227
235
  * console.log(`Average score: ${avgScore?.value}`);
228
236
  *
229
237
  * // Print formatted results
230
- * console.log(await result.prettyPrint());
238
+ * console.log(await result.format());
231
239
  *
232
240
  * // Print summary with individual item results
233
- * console.log(await result.prettyPrint({ includeItemResults: true }));
241
+ * console.log(await result.format({ includeItemResults: true }));
234
242
  *
235
243
  * // Link to dataset run (if available)
236
244
  * if (result.datasetRunUrl) {
@@ -241,6 +249,13 @@ type ExperimentItemResult<Input = any, ExpectedOutput = any, Metadata extends Re
241
249
  * @public
242
250
  */
243
251
  type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record<string, any> = Record<string, any>> = {
252
+ /**
253
+ * The experiment run name.
254
+ *
255
+ * This is equal to the dataset run name if experiment was on Langfuse dataset.
256
+ * Either the provided runName parameter or generated name (experiment name + timestamp).
257
+ */
258
+ runName: string;
244
259
  /**
245
260
  * ID of the dataset run in Langfuse (only for experiments on Langfuse datasets).
246
261
  *
@@ -273,7 +288,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
273
288
  */
274
289
  runEvaluations: Evaluation[];
275
290
  /**
276
- * Function to format and display experiment results in a human-readable format.
291
+ * Function to format experiment results in a human-readable format.
277
292
  *
278
293
  * Generates a comprehensive, nicely formatted summary including individual results,
279
294
  * aggregate statistics, evaluation scores, and links to traces and dataset runs.
@@ -282,7 +297,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
282
297
  * @param options.includeItemResults - Whether to include individual item details (default: false)
283
298
  * @returns Promise resolving to formatted string representation
284
299
  */
285
- prettyPrint: (options?: {
300
+ format: (options?: {
286
301
  includeItemResults?: boolean;
287
302
  }) => Promise<string>;
288
303
  };
@@ -301,6 +316,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
301
316
  * const dataset = await langfuse.dataset.get("my-dataset");
302
317
  * const result = await dataset.runExperiment({
303
318
  * name: "Model Evaluation",
319
+ * runName: "Model Evaluation Run 1", // optional
304
320
  * task: myTask,
305
321
  * evaluators: [myEvaluator]
306
322
  * });
@@ -469,6 +485,7 @@ declare class DatasetManager {
469
485
  *
470
486
  * const result = await dataset.runExperiment({
471
487
  * name: "GPT-4 Benchmark",
488
+ * runName: "GPT-4 Benchmark v1.2", // optional exact run name
472
489
  * description: "Evaluating GPT-4 on our benchmark tasks",
473
490
  * task: async ({ input }) => {
474
491
  * const response = await openai.chat.completions.create({
@@ -485,7 +502,7 @@ declare class DatasetManager {
485
502
  * ]
486
503
  * });
487
504
  *
488
- * console.log(await result.prettyPrint());
505
+ * console.log(await result.format());
489
506
  * ```
490
507
  *
491
508
  * @example Handling large datasets
@@ -549,7 +566,7 @@ declare class DatasetManager {
549
566
  * ]
550
567
  * });
551
568
  *
552
- * console.log(await result.prettyPrint());
569
+ * console.log(await result.format());
553
570
  * ```
554
571
  *
555
572
  * @example Using with Langfuse datasets
@@ -597,6 +614,7 @@ declare class ExperimentManager {
597
614
  *
598
615
  * @param config - The experiment configuration
599
616
  * @param config.name - Human-readable name for the experiment
617
+ * @param config.runName - Optional exact name for the experiment run (defaults to name + timestamp)
600
618
  * @param config.description - Optional description of the experiment's purpose
601
619
  * @param config.metadata - Optional metadata to attach to the experiment run
602
620
  * @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
@@ -606,10 +624,11 @@ declare class ExperimentManager {
606
624
  * @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
607
625
  *
608
626
  * @returns Promise that resolves to experiment results including:
627
+ * - runName: The experiment run name (either provided or generated)
609
628
  * - itemResults: Results for each processed data item
610
629
  * - runEvaluations: Results from run-level evaluators
611
630
  * - datasetRunId: ID of the dataset run (if using Langfuse datasets)
612
- * - prettyPrint: Function to format and display results
631
+ * - format: Function to format results for display
613
632
  *
614
633
  * @throws {Error} When task execution fails and cannot be handled gracefully
615
634
  * @throws {Error} When required evaluators fail critically
@@ -669,6 +688,7 @@ declare class ExperimentManager {
669
688
  *
670
689
  * @param params - Parameters for item execution
671
690
  * @param params.experimentName - Name of the parent experiment
691
+ * @param params.experimentRunName - Run name for the parent experiment
672
692
  * @param params.experimentDescription - Description of the parent experiment
673
693
  * @param params.experimentMetadata - Metadata for the parent experiment
674
694
  * @param params.item - The data item to process
@@ -758,6 +778,20 @@ declare class ExperimentManager {
758
778
  */
759
779
  private formatValue;
760
780
  private isOtelRegistered;
781
+ /**
782
+ * Creates an experiment run name based on provided parameters.
783
+ *
784
+ * If runName is provided, returns it directly. Otherwise, generates
785
+ * a name by combining the experiment name with an ISO timestamp.
786
+ *
787
+ * @param params - Parameters for run name creation
788
+ * @param params.name - The experiment name
789
+ * @param params.runName - Optional provided run name
790
+ * @returns The final run name to use
791
+ *
792
+ * @internal
793
+ */
794
+ private createExperimentRunName;
761
795
  }
762
796
 
763
797
  /**
@@ -1501,7 +1535,7 @@ declare class LangfuseClient {
1501
1535
  * ]
1502
1536
  * });
1503
1537
  *
1504
- * console.log(await result.prettyPrint());
1538
+ * console.log(await result.format());
1505
1539
  * ```
1506
1540
  *
1507
1541
  * @example Using with datasets
package/dist/index.mjs CHANGED
@@ -59,6 +59,7 @@ var DatasetManager = class {
59
59
  *
60
60
  * const result = await dataset.runExperiment({
61
61
  * name: "GPT-4 Benchmark",
62
+ * runName: "GPT-4 Benchmark v1.2", // optional exact run name
62
63
  * description: "Evaluating GPT-4 on our benchmark tasks",
63
64
  * task: async ({ input }) => {
64
65
  * const response = await openai.chat.completions.create({
@@ -75,7 +76,7 @@ var DatasetManager = class {
75
76
  * ]
76
77
  * });
77
78
  *
78
- * console.log(await result.prettyPrint());
79
+ * console.log(await result.format());
79
80
  * ```
80
81
  *
81
82
  * @example Handling large datasets
@@ -184,6 +185,7 @@ var ExperimentManager = class {
184
185
  *
185
186
  * @param config - The experiment configuration
186
187
  * @param config.name - Human-readable name for the experiment
188
+ * @param config.runName - Optional exact name for the experiment run (defaults to name + timestamp)
187
189
  * @param config.description - Optional description of the experiment's purpose
188
190
  * @param config.metadata - Optional metadata to attach to the experiment run
189
191
  * @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
@@ -193,10 +195,11 @@ var ExperimentManager = class {
193
195
  * @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
194
196
  *
195
197
  * @returns Promise that resolves to experiment results including:
198
+ * - runName: The experiment run name (either provided or generated)
196
199
  * - itemResults: Results for each processed data item
197
200
  * - runEvaluations: Results from run-level evaluators
198
201
  * - datasetRunId: ID of the dataset run (if using Langfuse datasets)
199
- * - prettyPrint: Function to format and display results
202
+ * - format: Function to format results for display
200
203
  *
201
204
  * @throws {Error} When task execution fails and cannot be handled gracefully
202
205
  * @throws {Error} When required evaluators fail critically
@@ -249,11 +252,16 @@ var ExperimentManager = class {
249
252
  evaluators,
250
253
  task,
251
254
  name,
255
+ runName: providedRunName,
252
256
  description,
253
257
  metadata,
254
258
  maxConcurrency: batchSize = Infinity,
255
259
  runEvaluators
256
260
  } = config;
261
+ const runName = this.createExperimentRunName({
262
+ name,
263
+ runName: providedRunName
264
+ });
257
265
  if (!this.isOtelRegistered()) {
258
266
  this.logger.warn(
259
267
  "OpenTelemetry has not been set up. Traces will not be sent to Langfuse.See our docs on how to set up OpenTelemetry: https://langfuse.com/docs/observability/sdk/typescript/setup#tracing-setup"
@@ -268,6 +276,7 @@ var ExperimentManager = class {
268
276
  evaluators,
269
277
  task,
270
278
  experimentName: name,
279
+ experimentRunName: runName,
271
280
  experimentDescription: description,
272
281
  experimentMetadata: metadata
273
282
  });
@@ -325,11 +334,12 @@ var ExperimentManager = class {
325
334
  }
326
335
  await this.langfuseClient.score.flush();
327
336
  return {
337
+ runName,
328
338
  itemResults,
329
339
  datasetRunId,
330
340
  datasetRunUrl,
331
341
  runEvaluations,
332
- prettyPrint: async (options) => {
342
+ format: async (options) => {
333
343
  var _a;
334
344
  return await this.prettyPrintResults({
335
345
  datasetRunUrl,
@@ -337,6 +347,7 @@ var ExperimentManager = class {
337
347
  originalData: data,
338
348
  runEvaluations,
339
349
  name: config.name,
350
+ runName,
340
351
  description: config.description,
341
352
  includeItemResults: (_a = options == null ? void 0 : options.includeItemResults) != null ? _a : false
342
353
  });
@@ -355,6 +366,7 @@ var ExperimentManager = class {
355
366
  *
356
367
  * @param params - Parameters for item execution
357
368
  * @param params.experimentName - Name of the parent experiment
369
+ * @param params.experimentRunName - Run name for the parent experiment
358
370
  * @param params.experimentDescription - Description of the parent experiment
359
371
  * @param params.experimentMetadata - Metadata for the parent experiment
360
372
  * @param params.item - The data item to process
@@ -369,7 +381,7 @@ var ExperimentManager = class {
369
381
  */
370
382
  async runItem(params) {
371
383
  const { item, evaluators = [], task, experimentMetadata = {} } = params;
372
- const { output, traceId } = await startActiveObservation(
384
+ const { output, traceId, observationId } = await startActiveObservation(
373
385
  "experiment-item-run",
374
386
  async (span) => {
375
387
  var _a;
@@ -378,7 +390,8 @@ var ExperimentManager = class {
378
390
  input: item.input,
379
391
  output: output2,
380
392
  metadata: {
381
- experimentName: params.experimentName,
393
+ experiment_name: params.experimentName,
394
+ experiment_run_name: params.experimentRunName,
382
395
  ...experimentMetadata,
383
396
  ...(_a = item.metadata) != null ? _a : {},
384
397
  ..."id" in item && "datasetId" in item ? {
@@ -387,17 +400,18 @@ var ExperimentManager = class {
387
400
  } : {}
388
401
  }
389
402
  });
390
- return { output: output2, traceId: span.traceId };
403
+ return { output: output2, traceId: span.traceId, observationId: span.id };
391
404
  }
392
405
  );
393
406
  let datasetRunId = void 0;
394
407
  if ("id" in item) {
395
408
  await this.langfuseClient.api.datasetRunItems.create({
396
- runName: params.experimentName,
409
+ runName: params.experimentRunName,
397
410
  runDescription: params.experimentDescription,
398
411
  metadata: params.experimentMetadata,
399
412
  datasetItemId: item.id,
400
- traceId
413
+ traceId,
414
+ observationId
401
415
  }).then((result) => {
402
416
  datasetRunId = result.datasetRunId;
403
417
  }).catch(
@@ -519,6 +533,7 @@ ${JSON.stringify(params2)}
519
533
  originalData,
520
534
  runEvaluations,
521
535
  name,
536
+ runName,
522
537
  description,
523
538
  includeItemResults = false
524
539
  } = params;
@@ -576,7 +591,7 @@ ${index + 1}. Item ${index + 1}:
576
591
  } else {
577
592
  output += `Individual Results: Hidden (${itemResults.length} items)
578
593
  `;
579
- output += "\u{1F4A1} Call prettyPrint({ includeItemResults: true }) to view them\n";
594
+ output += "\u{1F4A1} Call format({ includeItemResults: true }) to view them\n";
580
595
  }
581
596
  const totalItems = itemResults.length;
582
597
  const evaluationNames = new Set(
@@ -585,7 +600,9 @@ ${index + 1}. Item ${index + 1}:
585
600
  output += `
586
601
  ${"\u2500".repeat(50)}
587
602
  `;
588
- output += `\u{1F4CA} ${name}`;
603
+ output += `\u{1F9EA} Experiment: ${name}`;
604
+ output += `
605
+ \u{1F4CB} Run name: ${runName}`;
589
606
  if (description) {
590
607
  output += ` - ${description}`;
591
608
  }
@@ -660,6 +677,26 @@ Run Evaluations:`;
660
677
  }
661
678
  return tracerProvider.constructor.name !== "NoopTracerProvider";
662
679
  }
680
+ /**
681
+ * Creates an experiment run name based on provided parameters.
682
+ *
683
+ * If runName is provided, returns it directly. Otherwise, generates
684
+ * a name by combining the experiment name with an ISO timestamp.
685
+ *
686
+ * @param params - Parameters for run name creation
687
+ * @param params.name - The experiment name
688
+ * @param params.runName - Optional provided run name
689
+ * @returns The final run name to use
690
+ *
691
+ * @internal
692
+ */
693
+ createExperimentRunName(params) {
694
+ if (params.runName) {
695
+ return params.runName;
696
+ }
697
+ const isoTimestamp = (/* @__PURE__ */ new Date()).toISOString();
698
+ return `${params.name} - ${isoTimestamp}`;
699
+ }
663
700
  };
664
701
 
665
702
  // src/media/index.ts
@@ -1180,10 +1217,10 @@ var ChatPromptClient = class _ChatPromptClient extends BasePromptClient {
1180
1217
  JSON.stringify(placeholderValue)
1181
1218
  );
1182
1219
  } else {
1183
- messagesWithPlaceholdersReplaced.push({
1184
- variableName: item.name,
1185
- optional: false
1186
- });
1220
+ messagesWithPlaceholdersReplaced.push([
1221
+ "placeholder",
1222
+ `{${item.name}}`
1223
+ ]);
1187
1224
  }
1188
1225
  } else if ("role" in item && "content" in item && item.type === "chatmessage" /* ChatMessage */) {
1189
1226
  messagesWithPlaceholdersReplaced.push({