@langfuse/client 4.1.0-alpha.1 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -39,7 +39,7 @@ __export(index_exports, {
39
39
  PromptManager: () => PromptManager,
40
40
  ScoreManager: () => ScoreManager,
41
41
  TextPromptClient: () => TextPromptClient,
42
- autoevalsToLangfuseEvaluator: () => autoevalsToLangfuseEvaluator
42
+ createEvaluatorFromAutoevals: () => createEvaluatorFromAutoevals
43
43
  });
44
44
  module.exports = __toCommonJS(index_exports);
45
45
 
@@ -99,6 +99,7 @@ var DatasetManager = class {
99
99
  *
100
100
  * const result = await dataset.runExperiment({
101
101
  * name: "GPT-4 Benchmark",
102
+ * runName: "GPT-4 Benchmark v1.2", // optional exact run name
102
103
  * description: "Evaluating GPT-4 on our benchmark tasks",
103
104
  * task: async ({ input }) => {
104
105
  * const response = await openai.chat.completions.create({
@@ -115,7 +116,7 @@ var DatasetManager = class {
115
116
  * ]
116
117
  * });
117
118
  *
118
- * console.log(await result.prettyPrint());
119
+ * console.log(await result.format());
119
120
  * ```
120
121
  *
121
122
  * @example Handling large datasets
@@ -224,6 +225,7 @@ var ExperimentManager = class {
224
225
  *
225
226
  * @param config - The experiment configuration
226
227
  * @param config.name - Human-readable name for the experiment
228
+ * @param config.runName - Optional exact name for the experiment run (defaults to name + timestamp)
227
229
  * @param config.description - Optional description of the experiment's purpose
228
230
  * @param config.metadata - Optional metadata to attach to the experiment run
229
231
  * @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
@@ -233,10 +235,11 @@ var ExperimentManager = class {
233
235
  * @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
234
236
  *
235
237
  * @returns Promise that resolves to experiment results including:
238
+ * - runName: The experiment run name (either provided or generated)
236
239
  * - itemResults: Results for each processed data item
237
240
  * - runEvaluations: Results from run-level evaluators
238
241
  * - datasetRunId: ID of the dataset run (if using Langfuse datasets)
239
- * - prettyPrint: Function to format and display results
242
+ * - format: Function to format results for display
240
243
  *
241
244
  * @throws {Error} When task execution fails and cannot be handled gracefully
242
245
  * @throws {Error} When required evaluators fail critically
@@ -289,11 +292,16 @@ var ExperimentManager = class {
289
292
  evaluators,
290
293
  task,
291
294
  name,
295
+ runName: providedRunName,
292
296
  description,
293
297
  metadata,
294
298
  maxConcurrency: batchSize = Infinity,
295
299
  runEvaluators
296
300
  } = config;
301
+ const runName = this.createExperimentRunName({
302
+ name,
303
+ runName: providedRunName
304
+ });
297
305
  if (!this.isOtelRegistered()) {
298
306
  this.logger.warn(
299
307
  "OpenTelemetry has not been set up. Traces will not be sent to Langfuse.See our docs on how to set up OpenTelemetry: https://langfuse.com/docs/observability/sdk/typescript/setup#tracing-setup"
@@ -308,11 +316,26 @@ var ExperimentManager = class {
308
316
  evaluators,
309
317
  task,
310
318
  experimentName: name,
319
+ experimentRunName: runName,
311
320
  experimentDescription: description,
312
321
  experimentMetadata: metadata
313
322
  });
314
323
  });
315
- const results = await Promise.all(promises);
324
+ const settledResults = await Promise.allSettled(promises);
325
+ const results = settledResults.reduce(
326
+ (acc, settledResult) => {
327
+ if (settledResult.status === "fulfilled") {
328
+ acc.push(settledResult.value);
329
+ } else {
330
+ const errorMessage = settledResult.reason instanceof Error ? settledResult.reason.message : String(settledResult.reason);
331
+ this.logger.error(
332
+ `Task failed with error: ${errorMessage}. Skipping item.`
333
+ );
334
+ }
335
+ return acc;
336
+ },
337
+ []
338
+ );
316
339
  itemResults.push(...results);
317
340
  }
318
341
  const datasetRunId = itemResults.length > 0 ? itemResults[0].datasetRunId : void 0;
@@ -351,11 +374,12 @@ var ExperimentManager = class {
351
374
  }
352
375
  await this.langfuseClient.score.flush();
353
376
  return {
377
+ runName,
354
378
  itemResults,
355
379
  datasetRunId,
356
380
  datasetRunUrl,
357
381
  runEvaluations,
358
- prettyPrint: async (options) => {
382
+ format: async (options) => {
359
383
  var _a;
360
384
  return await this.prettyPrintResults({
361
385
  datasetRunUrl,
@@ -363,6 +387,7 @@ var ExperimentManager = class {
363
387
  originalData: data,
364
388
  runEvaluations,
365
389
  name: config.name,
390
+ runName,
366
391
  description: config.description,
367
392
  includeItemResults: (_a = options == null ? void 0 : options.includeItemResults) != null ? _a : false
368
393
  });
@@ -381,6 +406,7 @@ var ExperimentManager = class {
381
406
  *
382
407
  * @param params - Parameters for item execution
383
408
  * @param params.experimentName - Name of the parent experiment
409
+ * @param params.experimentRunName - Run name for the parent experiment
384
410
  * @param params.experimentDescription - Description of the parent experiment
385
411
  * @param params.experimentMetadata - Metadata for the parent experiment
386
412
  * @param params.item - The data item to process
@@ -395,7 +421,7 @@ var ExperimentManager = class {
395
421
  */
396
422
  async runItem(params) {
397
423
  const { item, evaluators = [], task, experimentMetadata = {} } = params;
398
- const { output, traceId } = await (0, import_tracing.startActiveObservation)(
424
+ const { output, traceId, observationId } = await (0, import_tracing.startActiveObservation)(
399
425
  "experiment-item-run",
400
426
  async (span) => {
401
427
  var _a;
@@ -404,26 +430,28 @@ var ExperimentManager = class {
404
430
  input: item.input,
405
431
  output: output2,
406
432
  metadata: {
407
- experimentName: params.experimentName,
433
+ experiment_name: params.experimentName,
434
+ experiment_run_name: params.experimentRunName,
408
435
  ...experimentMetadata,
409
436
  ...(_a = item.metadata) != null ? _a : {},
410
437
  ..."id" in item && "datasetId" in item ? {
411
- datasetId: item["datasetId"],
412
- datasetItemId: item["id"]
438
+ dataset_id: item["datasetId"],
439
+ dataset_item_id: item["id"]
413
440
  } : {}
414
441
  }
415
442
  });
416
- return { output: output2, traceId: span.traceId };
443
+ return { output: output2, traceId: span.traceId, observationId: span.id };
417
444
  }
418
445
  );
419
446
  let datasetRunId = void 0;
420
447
  if ("id" in item) {
421
448
  await this.langfuseClient.api.datasetRunItems.create({
422
- runName: params.experimentName,
449
+ runName: params.experimentRunName,
423
450
  runDescription: params.experimentDescription,
424
451
  metadata: params.experimentMetadata,
425
452
  datasetItemId: item.id,
426
- traceId
453
+ traceId,
454
+ observationId
427
455
  }).then((result) => {
428
456
  datasetRunId = result.datasetRunId;
429
457
  }).catch(
@@ -545,6 +573,7 @@ ${JSON.stringify(params2)}
545
573
  originalData,
546
574
  runEvaluations,
547
575
  name,
576
+ runName,
548
577
  description,
549
578
  includeItemResults = false
550
579
  } = params;
@@ -602,7 +631,7 @@ ${index + 1}. Item ${index + 1}:
602
631
  } else {
603
632
  output += `Individual Results: Hidden (${itemResults.length} items)
604
633
  `;
605
- output += "\u{1F4A1} Call prettyPrint({ includeItemResults: true }) to view them\n";
634
+ output += "\u{1F4A1} Call format({ includeItemResults: true }) to view them\n";
606
635
  }
607
636
  const totalItems = itemResults.length;
608
637
  const evaluationNames = new Set(
@@ -611,7 +640,9 @@ ${index + 1}. Item ${index + 1}:
611
640
  output += `
612
641
  ${"\u2500".repeat(50)}
613
642
  `;
614
- output += `\u{1F4CA} ${name}`;
643
+ output += `\u{1F9EA} Experiment: ${name}`;
644
+ output += `
645
+ \u{1F4CB} Run name: ${runName}`;
615
646
  if (description) {
616
647
  output += ` - ${description}`;
617
648
  }
@@ -686,6 +717,26 @@ Run Evaluations:`;
686
717
  }
687
718
  return tracerProvider.constructor.name !== "NoopTracerProvider";
688
719
  }
720
+ /**
721
+ * Creates an experiment run name based on provided parameters.
722
+ *
723
+ * If runName is provided, returns it directly. Otherwise, generates
724
+ * a name by combining the experiment name with an ISO timestamp.
725
+ *
726
+ * @param params - Parameters for run name creation
727
+ * @param params.name - The experiment name
728
+ * @param params.runName - Optional provided run name
729
+ * @returns The final run name to use
730
+ *
731
+ * @internal
732
+ */
733
+ createExperimentRunName(params) {
734
+ if (params.runName) {
735
+ return params.runName;
736
+ }
737
+ const isoTimestamp = (/* @__PURE__ */ new Date()).toISOString();
738
+ return `${params.name} - ${isoTimestamp}`;
739
+ }
689
740
  };
690
741
 
691
742
  // src/media/index.ts
@@ -1880,7 +1931,7 @@ var LangfuseClient = class {
1880
1931
  };
1881
1932
 
1882
1933
  // src/experiment/adapters.ts
1883
- function autoevalsToLangfuseEvaluator(autoevalEvaluator, params) {
1934
+ function createEvaluatorFromAutoevals(autoevalEvaluator, params) {
1884
1935
  const langfuseEvaluator = async (langfuseEvaluatorParams) => {
1885
1936
  var _a;
1886
1937
  const score = await autoevalEvaluator({
@@ -1908,6 +1959,6 @@ function autoevalsToLangfuseEvaluator(autoevalEvaluator, params) {
1908
1959
  PromptManager,
1909
1960
  ScoreManager,
1910
1961
  TextPromptClient,
1911
- autoevalsToLangfuseEvaluator
1962
+ createEvaluatorFromAutoevals
1912
1963
  });
1913
1964
  //# sourceMappingURL=index.cjs.map