@langfuse/client 4.1.0-alpha.1 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +67 -16
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +48 -14
- package/dist/index.d.ts +48 -14
- package/dist/index.mjs +66 -15
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -3
package/dist/index.cjs
CHANGED
|
@@ -39,7 +39,7 @@ __export(index_exports, {
|
|
|
39
39
|
PromptManager: () => PromptManager,
|
|
40
40
|
ScoreManager: () => ScoreManager,
|
|
41
41
|
TextPromptClient: () => TextPromptClient,
|
|
42
|
-
|
|
42
|
+
createEvaluatorFromAutoevals: () => createEvaluatorFromAutoevals
|
|
43
43
|
});
|
|
44
44
|
module.exports = __toCommonJS(index_exports);
|
|
45
45
|
|
|
@@ -99,6 +99,7 @@ var DatasetManager = class {
|
|
|
99
99
|
*
|
|
100
100
|
* const result = await dataset.runExperiment({
|
|
101
101
|
* name: "GPT-4 Benchmark",
|
|
102
|
+
* runName: "GPT-4 Benchmark v1.2", // optional exact run name
|
|
102
103
|
* description: "Evaluating GPT-4 on our benchmark tasks",
|
|
103
104
|
* task: async ({ input }) => {
|
|
104
105
|
* const response = await openai.chat.completions.create({
|
|
@@ -115,7 +116,7 @@ var DatasetManager = class {
|
|
|
115
116
|
* ]
|
|
116
117
|
* });
|
|
117
118
|
*
|
|
118
|
-
* console.log(await result.
|
|
119
|
+
* console.log(await result.format());
|
|
119
120
|
* ```
|
|
120
121
|
*
|
|
121
122
|
* @example Handling large datasets
|
|
@@ -224,6 +225,7 @@ var ExperimentManager = class {
|
|
|
224
225
|
*
|
|
225
226
|
* @param config - The experiment configuration
|
|
226
227
|
* @param config.name - Human-readable name for the experiment
|
|
228
|
+
* @param config.runName - Optional exact name for the experiment run (defaults to name + timestamp)
|
|
227
229
|
* @param config.description - Optional description of the experiment's purpose
|
|
228
230
|
* @param config.metadata - Optional metadata to attach to the experiment run
|
|
229
231
|
* @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
|
|
@@ -233,10 +235,11 @@ var ExperimentManager = class {
|
|
|
233
235
|
* @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
|
|
234
236
|
*
|
|
235
237
|
* @returns Promise that resolves to experiment results including:
|
|
238
|
+
* - runName: The experiment run name (either provided or generated)
|
|
236
239
|
* - itemResults: Results for each processed data item
|
|
237
240
|
* - runEvaluations: Results from run-level evaluators
|
|
238
241
|
* - datasetRunId: ID of the dataset run (if using Langfuse datasets)
|
|
239
|
-
* -
|
|
242
|
+
* - format: Function to format results for display
|
|
240
243
|
*
|
|
241
244
|
* @throws {Error} When task execution fails and cannot be handled gracefully
|
|
242
245
|
* @throws {Error} When required evaluators fail critically
|
|
@@ -289,11 +292,16 @@ var ExperimentManager = class {
|
|
|
289
292
|
evaluators,
|
|
290
293
|
task,
|
|
291
294
|
name,
|
|
295
|
+
runName: providedRunName,
|
|
292
296
|
description,
|
|
293
297
|
metadata,
|
|
294
298
|
maxConcurrency: batchSize = Infinity,
|
|
295
299
|
runEvaluators
|
|
296
300
|
} = config;
|
|
301
|
+
const runName = this.createExperimentRunName({
|
|
302
|
+
name,
|
|
303
|
+
runName: providedRunName
|
|
304
|
+
});
|
|
297
305
|
if (!this.isOtelRegistered()) {
|
|
298
306
|
this.logger.warn(
|
|
299
307
|
"OpenTelemetry has not been set up. Traces will not be sent to Langfuse.See our docs on how to set up OpenTelemetry: https://langfuse.com/docs/observability/sdk/typescript/setup#tracing-setup"
|
|
@@ -308,11 +316,26 @@ var ExperimentManager = class {
|
|
|
308
316
|
evaluators,
|
|
309
317
|
task,
|
|
310
318
|
experimentName: name,
|
|
319
|
+
experimentRunName: runName,
|
|
311
320
|
experimentDescription: description,
|
|
312
321
|
experimentMetadata: metadata
|
|
313
322
|
});
|
|
314
323
|
});
|
|
315
|
-
const
|
|
324
|
+
const settledResults = await Promise.allSettled(promises);
|
|
325
|
+
const results = settledResults.reduce(
|
|
326
|
+
(acc, settledResult) => {
|
|
327
|
+
if (settledResult.status === "fulfilled") {
|
|
328
|
+
acc.push(settledResult.value);
|
|
329
|
+
} else {
|
|
330
|
+
const errorMessage = settledResult.reason instanceof Error ? settledResult.reason.message : String(settledResult.reason);
|
|
331
|
+
this.logger.error(
|
|
332
|
+
`Task failed with error: ${errorMessage}. Skipping item.`
|
|
333
|
+
);
|
|
334
|
+
}
|
|
335
|
+
return acc;
|
|
336
|
+
},
|
|
337
|
+
[]
|
|
338
|
+
);
|
|
316
339
|
itemResults.push(...results);
|
|
317
340
|
}
|
|
318
341
|
const datasetRunId = itemResults.length > 0 ? itemResults[0].datasetRunId : void 0;
|
|
@@ -351,11 +374,12 @@ var ExperimentManager = class {
|
|
|
351
374
|
}
|
|
352
375
|
await this.langfuseClient.score.flush();
|
|
353
376
|
return {
|
|
377
|
+
runName,
|
|
354
378
|
itemResults,
|
|
355
379
|
datasetRunId,
|
|
356
380
|
datasetRunUrl,
|
|
357
381
|
runEvaluations,
|
|
358
|
-
|
|
382
|
+
format: async (options) => {
|
|
359
383
|
var _a;
|
|
360
384
|
return await this.prettyPrintResults({
|
|
361
385
|
datasetRunUrl,
|
|
@@ -363,6 +387,7 @@ var ExperimentManager = class {
|
|
|
363
387
|
originalData: data,
|
|
364
388
|
runEvaluations,
|
|
365
389
|
name: config.name,
|
|
390
|
+
runName,
|
|
366
391
|
description: config.description,
|
|
367
392
|
includeItemResults: (_a = options == null ? void 0 : options.includeItemResults) != null ? _a : false
|
|
368
393
|
});
|
|
@@ -381,6 +406,7 @@ var ExperimentManager = class {
|
|
|
381
406
|
*
|
|
382
407
|
* @param params - Parameters for item execution
|
|
383
408
|
* @param params.experimentName - Name of the parent experiment
|
|
409
|
+
* @param params.experimentRunName - Run name for the parent experiment
|
|
384
410
|
* @param params.experimentDescription - Description of the parent experiment
|
|
385
411
|
* @param params.experimentMetadata - Metadata for the parent experiment
|
|
386
412
|
* @param params.item - The data item to process
|
|
@@ -395,7 +421,7 @@ var ExperimentManager = class {
|
|
|
395
421
|
*/
|
|
396
422
|
async runItem(params) {
|
|
397
423
|
const { item, evaluators = [], task, experimentMetadata = {} } = params;
|
|
398
|
-
const { output, traceId } = await (0, import_tracing.startActiveObservation)(
|
|
424
|
+
const { output, traceId, observationId } = await (0, import_tracing.startActiveObservation)(
|
|
399
425
|
"experiment-item-run",
|
|
400
426
|
async (span) => {
|
|
401
427
|
var _a;
|
|
@@ -404,26 +430,28 @@ var ExperimentManager = class {
|
|
|
404
430
|
input: item.input,
|
|
405
431
|
output: output2,
|
|
406
432
|
metadata: {
|
|
407
|
-
|
|
433
|
+
experiment_name: params.experimentName,
|
|
434
|
+
experiment_run_name: params.experimentRunName,
|
|
408
435
|
...experimentMetadata,
|
|
409
436
|
...(_a = item.metadata) != null ? _a : {},
|
|
410
437
|
..."id" in item && "datasetId" in item ? {
|
|
411
|
-
|
|
412
|
-
|
|
438
|
+
dataset_id: item["datasetId"],
|
|
439
|
+
dataset_item_id: item["id"]
|
|
413
440
|
} : {}
|
|
414
441
|
}
|
|
415
442
|
});
|
|
416
|
-
return { output: output2, traceId: span.traceId };
|
|
443
|
+
return { output: output2, traceId: span.traceId, observationId: span.id };
|
|
417
444
|
}
|
|
418
445
|
);
|
|
419
446
|
let datasetRunId = void 0;
|
|
420
447
|
if ("id" in item) {
|
|
421
448
|
await this.langfuseClient.api.datasetRunItems.create({
|
|
422
|
-
runName: params.
|
|
449
|
+
runName: params.experimentRunName,
|
|
423
450
|
runDescription: params.experimentDescription,
|
|
424
451
|
metadata: params.experimentMetadata,
|
|
425
452
|
datasetItemId: item.id,
|
|
426
|
-
traceId
|
|
453
|
+
traceId,
|
|
454
|
+
observationId
|
|
427
455
|
}).then((result) => {
|
|
428
456
|
datasetRunId = result.datasetRunId;
|
|
429
457
|
}).catch(
|
|
@@ -545,6 +573,7 @@ ${JSON.stringify(params2)}
|
|
|
545
573
|
originalData,
|
|
546
574
|
runEvaluations,
|
|
547
575
|
name,
|
|
576
|
+
runName,
|
|
548
577
|
description,
|
|
549
578
|
includeItemResults = false
|
|
550
579
|
} = params;
|
|
@@ -602,7 +631,7 @@ ${index + 1}. Item ${index + 1}:
|
|
|
602
631
|
} else {
|
|
603
632
|
output += `Individual Results: Hidden (${itemResults.length} items)
|
|
604
633
|
`;
|
|
605
|
-
output += "\u{1F4A1} Call
|
|
634
|
+
output += "\u{1F4A1} Call format({ includeItemResults: true }) to view them\n";
|
|
606
635
|
}
|
|
607
636
|
const totalItems = itemResults.length;
|
|
608
637
|
const evaluationNames = new Set(
|
|
@@ -611,7 +640,9 @@ ${index + 1}. Item ${index + 1}:
|
|
|
611
640
|
output += `
|
|
612
641
|
${"\u2500".repeat(50)}
|
|
613
642
|
`;
|
|
614
|
-
output += `\u{
|
|
643
|
+
output += `\u{1F9EA} Experiment: ${name}`;
|
|
644
|
+
output += `
|
|
645
|
+
\u{1F4CB} Run name: ${runName}`;
|
|
615
646
|
if (description) {
|
|
616
647
|
output += ` - ${description}`;
|
|
617
648
|
}
|
|
@@ -686,6 +717,26 @@ Run Evaluations:`;
|
|
|
686
717
|
}
|
|
687
718
|
return tracerProvider.constructor.name !== "NoopTracerProvider";
|
|
688
719
|
}
|
|
720
|
+
/**
|
|
721
|
+
* Creates an experiment run name based on provided parameters.
|
|
722
|
+
*
|
|
723
|
+
* If runName is provided, returns it directly. Otherwise, generates
|
|
724
|
+
* a name by combining the experiment name with an ISO timestamp.
|
|
725
|
+
*
|
|
726
|
+
* @param params - Parameters for run name creation
|
|
727
|
+
* @param params.name - The experiment name
|
|
728
|
+
* @param params.runName - Optional provided run name
|
|
729
|
+
* @returns The final run name to use
|
|
730
|
+
*
|
|
731
|
+
* @internal
|
|
732
|
+
*/
|
|
733
|
+
createExperimentRunName(params) {
|
|
734
|
+
if (params.runName) {
|
|
735
|
+
return params.runName;
|
|
736
|
+
}
|
|
737
|
+
const isoTimestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
738
|
+
return `${params.name} - ${isoTimestamp}`;
|
|
739
|
+
}
|
|
689
740
|
};
|
|
690
741
|
|
|
691
742
|
// src/media/index.ts
|
|
@@ -1880,7 +1931,7 @@ var LangfuseClient = class {
|
|
|
1880
1931
|
};
|
|
1881
1932
|
|
|
1882
1933
|
// src/experiment/adapters.ts
|
|
1883
|
-
function
|
|
1934
|
+
function createEvaluatorFromAutoevals(autoevalEvaluator, params) {
|
|
1884
1935
|
const langfuseEvaluator = async (langfuseEvaluatorParams) => {
|
|
1885
1936
|
var _a;
|
|
1886
1937
|
const score = await autoevalEvaluator({
|
|
@@ -1908,6 +1959,6 @@ function autoevalsToLangfuseEvaluator(autoevalEvaluator, params) {
|
|
|
1908
1959
|
PromptManager,
|
|
1909
1960
|
ScoreManager,
|
|
1910
1961
|
TextPromptClient,
|
|
1911
|
-
|
|
1962
|
+
createEvaluatorFromAutoevals
|
|
1912
1963
|
});
|
|
1913
1964
|
//# sourceMappingURL=index.cjs.map
|