@langfuse/client 4.1.0-alpha.2 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +51 -14
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +42 -8
- package/dist/index.d.ts +42 -8
- package/dist/index.mjs +51 -14
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -3
package/dist/index.cjs
CHANGED
|
@@ -99,6 +99,7 @@ var DatasetManager = class {
|
|
|
99
99
|
*
|
|
100
100
|
* const result = await dataset.runExperiment({
|
|
101
101
|
* name: "GPT-4 Benchmark",
|
|
102
|
+
* runName: "GPT-4 Benchmark v1.2", // optional exact run name
|
|
102
103
|
* description: "Evaluating GPT-4 on our benchmark tasks",
|
|
103
104
|
* task: async ({ input }) => {
|
|
104
105
|
* const response = await openai.chat.completions.create({
|
|
@@ -115,7 +116,7 @@ var DatasetManager = class {
|
|
|
115
116
|
* ]
|
|
116
117
|
* });
|
|
117
118
|
*
|
|
118
|
-
* console.log(await result.
|
|
119
|
+
* console.log(await result.format());
|
|
119
120
|
* ```
|
|
120
121
|
*
|
|
121
122
|
* @example Handling large datasets
|
|
@@ -224,6 +225,7 @@ var ExperimentManager = class {
|
|
|
224
225
|
*
|
|
225
226
|
* @param config - The experiment configuration
|
|
226
227
|
* @param config.name - Human-readable name for the experiment
|
|
228
|
+
* @param config.runName - Optional exact name for the experiment run (defaults to name + timestamp)
|
|
227
229
|
* @param config.description - Optional description of the experiment's purpose
|
|
228
230
|
* @param config.metadata - Optional metadata to attach to the experiment run
|
|
229
231
|
* @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
|
|
@@ -233,10 +235,11 @@ var ExperimentManager = class {
|
|
|
233
235
|
* @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
|
|
234
236
|
*
|
|
235
237
|
* @returns Promise that resolves to experiment results including:
|
|
238
|
+
* - runName: The experiment run name (either provided or generated)
|
|
236
239
|
* - itemResults: Results for each processed data item
|
|
237
240
|
* - runEvaluations: Results from run-level evaluators
|
|
238
241
|
* - datasetRunId: ID of the dataset run (if using Langfuse datasets)
|
|
239
|
-
* -
|
|
242
|
+
* - format: Function to format results for display
|
|
240
243
|
*
|
|
241
244
|
* @throws {Error} When task execution fails and cannot be handled gracefully
|
|
242
245
|
* @throws {Error} When required evaluators fail critically
|
|
@@ -289,11 +292,16 @@ var ExperimentManager = class {
|
|
|
289
292
|
evaluators,
|
|
290
293
|
task,
|
|
291
294
|
name,
|
|
295
|
+
runName: providedRunName,
|
|
292
296
|
description,
|
|
293
297
|
metadata,
|
|
294
298
|
maxConcurrency: batchSize = Infinity,
|
|
295
299
|
runEvaluators
|
|
296
300
|
} = config;
|
|
301
|
+
const runName = this.createExperimentRunName({
|
|
302
|
+
name,
|
|
303
|
+
runName: providedRunName
|
|
304
|
+
});
|
|
297
305
|
if (!this.isOtelRegistered()) {
|
|
298
306
|
this.logger.warn(
|
|
299
307
|
"OpenTelemetry has not been set up. Traces will not be sent to Langfuse.See our docs on how to set up OpenTelemetry: https://langfuse.com/docs/observability/sdk/typescript/setup#tracing-setup"
|
|
@@ -308,6 +316,7 @@ var ExperimentManager = class {
|
|
|
308
316
|
evaluators,
|
|
309
317
|
task,
|
|
310
318
|
experimentName: name,
|
|
319
|
+
experimentRunName: runName,
|
|
311
320
|
experimentDescription: description,
|
|
312
321
|
experimentMetadata: metadata
|
|
313
322
|
});
|
|
@@ -365,11 +374,12 @@ var ExperimentManager = class {
|
|
|
365
374
|
}
|
|
366
375
|
await this.langfuseClient.score.flush();
|
|
367
376
|
return {
|
|
377
|
+
runName,
|
|
368
378
|
itemResults,
|
|
369
379
|
datasetRunId,
|
|
370
380
|
datasetRunUrl,
|
|
371
381
|
runEvaluations,
|
|
372
|
-
|
|
382
|
+
format: async (options) => {
|
|
373
383
|
var _a;
|
|
374
384
|
return await this.prettyPrintResults({
|
|
375
385
|
datasetRunUrl,
|
|
@@ -377,6 +387,7 @@ var ExperimentManager = class {
|
|
|
377
387
|
originalData: data,
|
|
378
388
|
runEvaluations,
|
|
379
389
|
name: config.name,
|
|
390
|
+
runName,
|
|
380
391
|
description: config.description,
|
|
381
392
|
includeItemResults: (_a = options == null ? void 0 : options.includeItemResults) != null ? _a : false
|
|
382
393
|
});
|
|
@@ -395,6 +406,7 @@ var ExperimentManager = class {
|
|
|
395
406
|
*
|
|
396
407
|
* @param params - Parameters for item execution
|
|
397
408
|
* @param params.experimentName - Name of the parent experiment
|
|
409
|
+
* @param params.experimentRunName - Run name for the parent experiment
|
|
398
410
|
* @param params.experimentDescription - Description of the parent experiment
|
|
399
411
|
* @param params.experimentMetadata - Metadata for the parent experiment
|
|
400
412
|
* @param params.item - The data item to process
|
|
@@ -409,7 +421,7 @@ var ExperimentManager = class {
|
|
|
409
421
|
*/
|
|
410
422
|
async runItem(params) {
|
|
411
423
|
const { item, evaluators = [], task, experimentMetadata = {} } = params;
|
|
412
|
-
const { output, traceId } = await (0, import_tracing.startActiveObservation)(
|
|
424
|
+
const { output, traceId, observationId } = await (0, import_tracing.startActiveObservation)(
|
|
413
425
|
"experiment-item-run",
|
|
414
426
|
async (span) => {
|
|
415
427
|
var _a;
|
|
@@ -418,7 +430,8 @@ var ExperimentManager = class {
|
|
|
418
430
|
input: item.input,
|
|
419
431
|
output: output2,
|
|
420
432
|
metadata: {
|
|
421
|
-
|
|
433
|
+
experiment_name: params.experimentName,
|
|
434
|
+
experiment_run_name: params.experimentRunName,
|
|
422
435
|
...experimentMetadata,
|
|
423
436
|
...(_a = item.metadata) != null ? _a : {},
|
|
424
437
|
..."id" in item && "datasetId" in item ? {
|
|
@@ -427,17 +440,18 @@ var ExperimentManager = class {
|
|
|
427
440
|
} : {}
|
|
428
441
|
}
|
|
429
442
|
});
|
|
430
|
-
return { output: output2, traceId: span.traceId };
|
|
443
|
+
return { output: output2, traceId: span.traceId, observationId: span.id };
|
|
431
444
|
}
|
|
432
445
|
);
|
|
433
446
|
let datasetRunId = void 0;
|
|
434
447
|
if ("id" in item) {
|
|
435
448
|
await this.langfuseClient.api.datasetRunItems.create({
|
|
436
|
-
runName: params.
|
|
449
|
+
runName: params.experimentRunName,
|
|
437
450
|
runDescription: params.experimentDescription,
|
|
438
451
|
metadata: params.experimentMetadata,
|
|
439
452
|
datasetItemId: item.id,
|
|
440
|
-
traceId
|
|
453
|
+
traceId,
|
|
454
|
+
observationId
|
|
441
455
|
}).then((result) => {
|
|
442
456
|
datasetRunId = result.datasetRunId;
|
|
443
457
|
}).catch(
|
|
@@ -559,6 +573,7 @@ ${JSON.stringify(params2)}
|
|
|
559
573
|
originalData,
|
|
560
574
|
runEvaluations,
|
|
561
575
|
name,
|
|
576
|
+
runName,
|
|
562
577
|
description,
|
|
563
578
|
includeItemResults = false
|
|
564
579
|
} = params;
|
|
@@ -616,7 +631,7 @@ ${index + 1}. Item ${index + 1}:
|
|
|
616
631
|
} else {
|
|
617
632
|
output += `Individual Results: Hidden (${itemResults.length} items)
|
|
618
633
|
`;
|
|
619
|
-
output += "\u{1F4A1} Call
|
|
634
|
+
output += "\u{1F4A1} Call format({ includeItemResults: true }) to view them\n";
|
|
620
635
|
}
|
|
621
636
|
const totalItems = itemResults.length;
|
|
622
637
|
const evaluationNames = new Set(
|
|
@@ -625,7 +640,9 @@ ${index + 1}. Item ${index + 1}:
|
|
|
625
640
|
output += `
|
|
626
641
|
${"\u2500".repeat(50)}
|
|
627
642
|
`;
|
|
628
|
-
output += `\u{
|
|
643
|
+
output += `\u{1F9EA} Experiment: ${name}`;
|
|
644
|
+
output += `
|
|
645
|
+
\u{1F4CB} Run name: ${runName}`;
|
|
629
646
|
if (description) {
|
|
630
647
|
output += ` - ${description}`;
|
|
631
648
|
}
|
|
@@ -700,6 +717,26 @@ Run Evaluations:`;
|
|
|
700
717
|
}
|
|
701
718
|
return tracerProvider.constructor.name !== "NoopTracerProvider";
|
|
702
719
|
}
|
|
720
|
+
/**
|
|
721
|
+
* Creates an experiment run name based on provided parameters.
|
|
722
|
+
*
|
|
723
|
+
* If runName is provided, returns it directly. Otherwise, generates
|
|
724
|
+
* a name by combining the experiment name with an ISO timestamp.
|
|
725
|
+
*
|
|
726
|
+
* @param params - Parameters for run name creation
|
|
727
|
+
* @param params.name - The experiment name
|
|
728
|
+
* @param params.runName - Optional provided run name
|
|
729
|
+
* @returns The final run name to use
|
|
730
|
+
*
|
|
731
|
+
* @internal
|
|
732
|
+
*/
|
|
733
|
+
createExperimentRunName(params) {
|
|
734
|
+
if (params.runName) {
|
|
735
|
+
return params.runName;
|
|
736
|
+
}
|
|
737
|
+
const isoTimestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
738
|
+
return `${params.name} - ${isoTimestamp}`;
|
|
739
|
+
}
|
|
703
740
|
};
|
|
704
741
|
|
|
705
742
|
// src/media/index.ts
|
|
@@ -1215,10 +1252,10 @@ var ChatPromptClient = class _ChatPromptClient extends BasePromptClient {
|
|
|
1215
1252
|
JSON.stringify(placeholderValue)
|
|
1216
1253
|
);
|
|
1217
1254
|
} else {
|
|
1218
|
-
messagesWithPlaceholdersReplaced.push(
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1255
|
+
messagesWithPlaceholdersReplaced.push([
|
|
1256
|
+
"placeholder",
|
|
1257
|
+
`{${item.name}}`
|
|
1258
|
+
]);
|
|
1222
1259
|
}
|
|
1223
1260
|
} else if ("role" in item && "content" in item && item.type === "chatmessage" /* ChatMessage */) {
|
|
1224
1261
|
messagesWithPlaceholdersReplaced.push({
|