@langfuse/client 4.1.0-alpha.2 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +51 -14
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +42 -8
- package/dist/index.d.ts +42 -8
- package/dist/index.mjs +51 -14
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -3
package/dist/index.d.cts
CHANGED
|
@@ -120,6 +120,14 @@ type ExperimentParams<Input = any, ExpectedOutput = any, Metadata extends Record
|
|
|
120
120
|
* Choose a descriptive name that identifies the experiment's purpose.
|
|
121
121
|
*/
|
|
122
122
|
name: string;
|
|
123
|
+
/**
|
|
124
|
+
* Optional exact name for the experiment run.
|
|
125
|
+
*
|
|
126
|
+
* If provided, this will be used as the exact dataset run name if the data
|
|
127
|
+
* contains Langfuse dataset items. If not provided, this will default to
|
|
128
|
+
* the experiment name appended with an ISO timestamp.
|
|
129
|
+
*/
|
|
130
|
+
runName?: string;
|
|
123
131
|
/**
|
|
124
132
|
* Optional description explaining the experiment's purpose.
|
|
125
133
|
*
|
|
@@ -227,10 +235,10 @@ type ExperimentItemResult<Input = any, ExpectedOutput = any, Metadata extends Re
|
|
|
227
235
|
* console.log(`Average score: ${avgScore?.value}`);
|
|
228
236
|
*
|
|
229
237
|
* // Print formatted results
|
|
230
|
-
* console.log(await result.
|
|
238
|
+
* console.log(await result.format());
|
|
231
239
|
*
|
|
232
240
|
* // Print summary with individual item results
|
|
233
|
-
* console.log(await result.
|
|
241
|
+
* console.log(await result.format({ includeItemResults: true }));
|
|
234
242
|
*
|
|
235
243
|
* // Link to dataset run (if available)
|
|
236
244
|
* if (result.datasetRunUrl) {
|
|
@@ -241,6 +249,13 @@ type ExperimentItemResult<Input = any, ExpectedOutput = any, Metadata extends Re
|
|
|
241
249
|
* @public
|
|
242
250
|
*/
|
|
243
251
|
type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record<string, any> = Record<string, any>> = {
|
|
252
|
+
/**
|
|
253
|
+
* The experiment run name.
|
|
254
|
+
*
|
|
255
|
+
* This is equal to the dataset run name if experiment was on Langfuse dataset.
|
|
256
|
+
* Either the provided runName parameter or generated name (experiment name + timestamp).
|
|
257
|
+
*/
|
|
258
|
+
runName: string;
|
|
244
259
|
/**
|
|
245
260
|
* ID of the dataset run in Langfuse (only for experiments on Langfuse datasets).
|
|
246
261
|
*
|
|
@@ -273,7 +288,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
|
|
|
273
288
|
*/
|
|
274
289
|
runEvaluations: Evaluation[];
|
|
275
290
|
/**
|
|
276
|
-
* Function to format
|
|
291
|
+
* Function to format experiment results in a human-readable format.
|
|
277
292
|
*
|
|
278
293
|
* Generates a comprehensive, nicely formatted summary including individual results,
|
|
279
294
|
* aggregate statistics, evaluation scores, and links to traces and dataset runs.
|
|
@@ -282,7 +297,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
|
|
|
282
297
|
* @param options.includeItemResults - Whether to include individual item details (default: false)
|
|
283
298
|
* @returns Promise resolving to formatted string representation
|
|
284
299
|
*/
|
|
285
|
-
|
|
300
|
+
format: (options?: {
|
|
286
301
|
includeItemResults?: boolean;
|
|
287
302
|
}) => Promise<string>;
|
|
288
303
|
};
|
|
@@ -301,6 +316,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
|
|
|
301
316
|
* const dataset = await langfuse.dataset.get("my-dataset");
|
|
302
317
|
* const result = await dataset.runExperiment({
|
|
303
318
|
* name: "Model Evaluation",
|
|
319
|
+
* runName: "Model Evaluation Run 1", // optional
|
|
304
320
|
* task: myTask,
|
|
305
321
|
* evaluators: [myEvaluator]
|
|
306
322
|
* });
|
|
@@ -469,6 +485,7 @@ declare class DatasetManager {
|
|
|
469
485
|
*
|
|
470
486
|
* const result = await dataset.runExperiment({
|
|
471
487
|
* name: "GPT-4 Benchmark",
|
|
488
|
+
* runName: "GPT-4 Benchmark v1.2", // optional exact run name
|
|
472
489
|
* description: "Evaluating GPT-4 on our benchmark tasks",
|
|
473
490
|
* task: async ({ input }) => {
|
|
474
491
|
* const response = await openai.chat.completions.create({
|
|
@@ -485,7 +502,7 @@ declare class DatasetManager {
|
|
|
485
502
|
* ]
|
|
486
503
|
* });
|
|
487
504
|
*
|
|
488
|
-
* console.log(await result.
|
|
505
|
+
* console.log(await result.format());
|
|
489
506
|
* ```
|
|
490
507
|
*
|
|
491
508
|
* @example Handling large datasets
|
|
@@ -549,7 +566,7 @@ declare class DatasetManager {
|
|
|
549
566
|
* ]
|
|
550
567
|
* });
|
|
551
568
|
*
|
|
552
|
-
* console.log(await result.
|
|
569
|
+
* console.log(await result.format());
|
|
553
570
|
* ```
|
|
554
571
|
*
|
|
555
572
|
* @example Using with Langfuse datasets
|
|
@@ -597,6 +614,7 @@ declare class ExperimentManager {
|
|
|
597
614
|
*
|
|
598
615
|
* @param config - The experiment configuration
|
|
599
616
|
* @param config.name - Human-readable name for the experiment
|
|
617
|
+
* @param config.runName - Optional exact name for the experiment run (defaults to name + timestamp)
|
|
600
618
|
* @param config.description - Optional description of the experiment's purpose
|
|
601
619
|
* @param config.metadata - Optional metadata to attach to the experiment run
|
|
602
620
|
* @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
|
|
@@ -606,10 +624,11 @@ declare class ExperimentManager {
|
|
|
606
624
|
* @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
|
|
607
625
|
*
|
|
608
626
|
* @returns Promise that resolves to experiment results including:
|
|
627
|
+
* - runName: The experiment run name (either provided or generated)
|
|
609
628
|
* - itemResults: Results for each processed data item
|
|
610
629
|
* - runEvaluations: Results from run-level evaluators
|
|
611
630
|
* - datasetRunId: ID of the dataset run (if using Langfuse datasets)
|
|
612
|
-
* -
|
|
631
|
+
* - format: Function to format results for display
|
|
613
632
|
*
|
|
614
633
|
* @throws {Error} When task execution fails and cannot be handled gracefully
|
|
615
634
|
* @throws {Error} When required evaluators fail critically
|
|
@@ -669,6 +688,7 @@ declare class ExperimentManager {
|
|
|
669
688
|
*
|
|
670
689
|
* @param params - Parameters for item execution
|
|
671
690
|
* @param params.experimentName - Name of the parent experiment
|
|
691
|
+
* @param params.experimentRunName - Run name for the parent experiment
|
|
672
692
|
* @param params.experimentDescription - Description of the parent experiment
|
|
673
693
|
* @param params.experimentMetadata - Metadata for the parent experiment
|
|
674
694
|
* @param params.item - The data item to process
|
|
@@ -758,6 +778,20 @@ declare class ExperimentManager {
|
|
|
758
778
|
*/
|
|
759
779
|
private formatValue;
|
|
760
780
|
private isOtelRegistered;
|
|
781
|
+
/**
|
|
782
|
+
* Creates an experiment run name based on provided parameters.
|
|
783
|
+
*
|
|
784
|
+
* If runName is provided, returns it directly. Otherwise, generates
|
|
785
|
+
* a name by combining the experiment name with an ISO timestamp.
|
|
786
|
+
*
|
|
787
|
+
* @param params - Parameters for run name creation
|
|
788
|
+
* @param params.name - The experiment name
|
|
789
|
+
* @param params.runName - Optional provided run name
|
|
790
|
+
* @returns The final run name to use
|
|
791
|
+
*
|
|
792
|
+
* @internal
|
|
793
|
+
*/
|
|
794
|
+
private createExperimentRunName;
|
|
761
795
|
}
|
|
762
796
|
|
|
763
797
|
/**
|
|
@@ -1501,7 +1535,7 @@ declare class LangfuseClient {
|
|
|
1501
1535
|
* ]
|
|
1502
1536
|
* });
|
|
1503
1537
|
*
|
|
1504
|
-
* console.log(await result.
|
|
1538
|
+
* console.log(await result.format());
|
|
1505
1539
|
* ```
|
|
1506
1540
|
*
|
|
1507
1541
|
* @example Using with datasets
|
package/dist/index.d.ts
CHANGED
|
@@ -120,6 +120,14 @@ type ExperimentParams<Input = any, ExpectedOutput = any, Metadata extends Record
|
|
|
120
120
|
* Choose a descriptive name that identifies the experiment's purpose.
|
|
121
121
|
*/
|
|
122
122
|
name: string;
|
|
123
|
+
/**
|
|
124
|
+
* Optional exact name for the experiment run.
|
|
125
|
+
*
|
|
126
|
+
* If provided, this will be used as the exact dataset run name if the data
|
|
127
|
+
* contains Langfuse dataset items. If not provided, this will default to
|
|
128
|
+
* the experiment name appended with an ISO timestamp.
|
|
129
|
+
*/
|
|
130
|
+
runName?: string;
|
|
123
131
|
/**
|
|
124
132
|
* Optional description explaining the experiment's purpose.
|
|
125
133
|
*
|
|
@@ -227,10 +235,10 @@ type ExperimentItemResult<Input = any, ExpectedOutput = any, Metadata extends Re
|
|
|
227
235
|
* console.log(`Average score: ${avgScore?.value}`);
|
|
228
236
|
*
|
|
229
237
|
* // Print formatted results
|
|
230
|
-
* console.log(await result.
|
|
238
|
+
* console.log(await result.format());
|
|
231
239
|
*
|
|
232
240
|
* // Print summary with individual item results
|
|
233
|
-
* console.log(await result.
|
|
241
|
+
* console.log(await result.format({ includeItemResults: true }));
|
|
234
242
|
*
|
|
235
243
|
* // Link to dataset run (if available)
|
|
236
244
|
* if (result.datasetRunUrl) {
|
|
@@ -241,6 +249,13 @@ type ExperimentItemResult<Input = any, ExpectedOutput = any, Metadata extends Re
|
|
|
241
249
|
* @public
|
|
242
250
|
*/
|
|
243
251
|
type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record<string, any> = Record<string, any>> = {
|
|
252
|
+
/**
|
|
253
|
+
* The experiment run name.
|
|
254
|
+
*
|
|
255
|
+
* This is equal to the dataset run name if experiment was on Langfuse dataset.
|
|
256
|
+
* Either the provided runName parameter or generated name (experiment name + timestamp).
|
|
257
|
+
*/
|
|
258
|
+
runName: string;
|
|
244
259
|
/**
|
|
245
260
|
* ID of the dataset run in Langfuse (only for experiments on Langfuse datasets).
|
|
246
261
|
*
|
|
@@ -273,7 +288,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
|
|
|
273
288
|
*/
|
|
274
289
|
runEvaluations: Evaluation[];
|
|
275
290
|
/**
|
|
276
|
-
* Function to format
|
|
291
|
+
* Function to format experiment results in a human-readable format.
|
|
277
292
|
*
|
|
278
293
|
* Generates a comprehensive, nicely formatted summary including individual results,
|
|
279
294
|
* aggregate statistics, evaluation scores, and links to traces and dataset runs.
|
|
@@ -282,7 +297,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
|
|
|
282
297
|
* @param options.includeItemResults - Whether to include individual item details (default: false)
|
|
283
298
|
* @returns Promise resolving to formatted string representation
|
|
284
299
|
*/
|
|
285
|
-
|
|
300
|
+
format: (options?: {
|
|
286
301
|
includeItemResults?: boolean;
|
|
287
302
|
}) => Promise<string>;
|
|
288
303
|
};
|
|
@@ -301,6 +316,7 @@ type ExperimentResult<Input = any, ExpectedOutput = any, Metadata extends Record
|
|
|
301
316
|
* const dataset = await langfuse.dataset.get("my-dataset");
|
|
302
317
|
* const result = await dataset.runExperiment({
|
|
303
318
|
* name: "Model Evaluation",
|
|
319
|
+
* runName: "Model Evaluation Run 1", // optional
|
|
304
320
|
* task: myTask,
|
|
305
321
|
* evaluators: [myEvaluator]
|
|
306
322
|
* });
|
|
@@ -469,6 +485,7 @@ declare class DatasetManager {
|
|
|
469
485
|
*
|
|
470
486
|
* const result = await dataset.runExperiment({
|
|
471
487
|
* name: "GPT-4 Benchmark",
|
|
488
|
+
* runName: "GPT-4 Benchmark v1.2", // optional exact run name
|
|
472
489
|
* description: "Evaluating GPT-4 on our benchmark tasks",
|
|
473
490
|
* task: async ({ input }) => {
|
|
474
491
|
* const response = await openai.chat.completions.create({
|
|
@@ -485,7 +502,7 @@ declare class DatasetManager {
|
|
|
485
502
|
* ]
|
|
486
503
|
* });
|
|
487
504
|
*
|
|
488
|
-
* console.log(await result.
|
|
505
|
+
* console.log(await result.format());
|
|
489
506
|
* ```
|
|
490
507
|
*
|
|
491
508
|
* @example Handling large datasets
|
|
@@ -549,7 +566,7 @@ declare class DatasetManager {
|
|
|
549
566
|
* ]
|
|
550
567
|
* });
|
|
551
568
|
*
|
|
552
|
-
* console.log(await result.
|
|
569
|
+
* console.log(await result.format());
|
|
553
570
|
* ```
|
|
554
571
|
*
|
|
555
572
|
* @example Using with Langfuse datasets
|
|
@@ -597,6 +614,7 @@ declare class ExperimentManager {
|
|
|
597
614
|
*
|
|
598
615
|
* @param config - The experiment configuration
|
|
599
616
|
* @param config.name - Human-readable name for the experiment
|
|
617
|
+
* @param config.runName - Optional exact name for the experiment run (defaults to name + timestamp)
|
|
600
618
|
* @param config.description - Optional description of the experiment's purpose
|
|
601
619
|
* @param config.metadata - Optional metadata to attach to the experiment run
|
|
602
620
|
* @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
|
|
@@ -606,10 +624,11 @@ declare class ExperimentManager {
|
|
|
606
624
|
* @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
|
|
607
625
|
*
|
|
608
626
|
* @returns Promise that resolves to experiment results including:
|
|
627
|
+
* - runName: The experiment run name (either provided or generated)
|
|
609
628
|
* - itemResults: Results for each processed data item
|
|
610
629
|
* - runEvaluations: Results from run-level evaluators
|
|
611
630
|
* - datasetRunId: ID of the dataset run (if using Langfuse datasets)
|
|
612
|
-
* -
|
|
631
|
+
* - format: Function to format results for display
|
|
613
632
|
*
|
|
614
633
|
* @throws {Error} When task execution fails and cannot be handled gracefully
|
|
615
634
|
* @throws {Error} When required evaluators fail critically
|
|
@@ -669,6 +688,7 @@ declare class ExperimentManager {
|
|
|
669
688
|
*
|
|
670
689
|
* @param params - Parameters for item execution
|
|
671
690
|
* @param params.experimentName - Name of the parent experiment
|
|
691
|
+
* @param params.experimentRunName - Run name for the parent experiment
|
|
672
692
|
* @param params.experimentDescription - Description of the parent experiment
|
|
673
693
|
* @param params.experimentMetadata - Metadata for the parent experiment
|
|
674
694
|
* @param params.item - The data item to process
|
|
@@ -758,6 +778,20 @@ declare class ExperimentManager {
|
|
|
758
778
|
*/
|
|
759
779
|
private formatValue;
|
|
760
780
|
private isOtelRegistered;
|
|
781
|
+
/**
|
|
782
|
+
* Creates an experiment run name based on provided parameters.
|
|
783
|
+
*
|
|
784
|
+
* If runName is provided, returns it directly. Otherwise, generates
|
|
785
|
+
* a name by combining the experiment name with an ISO timestamp.
|
|
786
|
+
*
|
|
787
|
+
* @param params - Parameters for run name creation
|
|
788
|
+
* @param params.name - The experiment name
|
|
789
|
+
* @param params.runName - Optional provided run name
|
|
790
|
+
* @returns The final run name to use
|
|
791
|
+
*
|
|
792
|
+
* @internal
|
|
793
|
+
*/
|
|
794
|
+
private createExperimentRunName;
|
|
761
795
|
}
|
|
762
796
|
|
|
763
797
|
/**
|
|
@@ -1501,7 +1535,7 @@ declare class LangfuseClient {
|
|
|
1501
1535
|
* ]
|
|
1502
1536
|
* });
|
|
1503
1537
|
*
|
|
1504
|
-
* console.log(await result.
|
|
1538
|
+
* console.log(await result.format());
|
|
1505
1539
|
* ```
|
|
1506
1540
|
*
|
|
1507
1541
|
* @example Using with datasets
|
package/dist/index.mjs
CHANGED
|
@@ -59,6 +59,7 @@ var DatasetManager = class {
|
|
|
59
59
|
*
|
|
60
60
|
* const result = await dataset.runExperiment({
|
|
61
61
|
* name: "GPT-4 Benchmark",
|
|
62
|
+
* runName: "GPT-4 Benchmark v1.2", // optional exact run name
|
|
62
63
|
* description: "Evaluating GPT-4 on our benchmark tasks",
|
|
63
64
|
* task: async ({ input }) => {
|
|
64
65
|
* const response = await openai.chat.completions.create({
|
|
@@ -75,7 +76,7 @@ var DatasetManager = class {
|
|
|
75
76
|
* ]
|
|
76
77
|
* });
|
|
77
78
|
*
|
|
78
|
-
* console.log(await result.
|
|
79
|
+
* console.log(await result.format());
|
|
79
80
|
* ```
|
|
80
81
|
*
|
|
81
82
|
* @example Handling large datasets
|
|
@@ -184,6 +185,7 @@ var ExperimentManager = class {
|
|
|
184
185
|
*
|
|
185
186
|
* @param config - The experiment configuration
|
|
186
187
|
* @param config.name - Human-readable name for the experiment
|
|
188
|
+
* @param config.runName - Optional exact name for the experiment run (defaults to name + timestamp)
|
|
187
189
|
* @param config.description - Optional description of the experiment's purpose
|
|
188
190
|
* @param config.metadata - Optional metadata to attach to the experiment run
|
|
189
191
|
* @param config.data - Array of data items to process (ExperimentItem[] or DatasetItem[])
|
|
@@ -193,10 +195,11 @@ var ExperimentManager = class {
|
|
|
193
195
|
* @param config.maxConcurrency - Maximum number of concurrent task executions (default: Infinity)
|
|
194
196
|
*
|
|
195
197
|
* @returns Promise that resolves to experiment results including:
|
|
198
|
+
* - runName: The experiment run name (either provided or generated)
|
|
196
199
|
* - itemResults: Results for each processed data item
|
|
197
200
|
* - runEvaluations: Results from run-level evaluators
|
|
198
201
|
* - datasetRunId: ID of the dataset run (if using Langfuse datasets)
|
|
199
|
-
* -
|
|
202
|
+
* - format: Function to format results for display
|
|
200
203
|
*
|
|
201
204
|
* @throws {Error} When task execution fails and cannot be handled gracefully
|
|
202
205
|
* @throws {Error} When required evaluators fail critically
|
|
@@ -249,11 +252,16 @@ var ExperimentManager = class {
|
|
|
249
252
|
evaluators,
|
|
250
253
|
task,
|
|
251
254
|
name,
|
|
255
|
+
runName: providedRunName,
|
|
252
256
|
description,
|
|
253
257
|
metadata,
|
|
254
258
|
maxConcurrency: batchSize = Infinity,
|
|
255
259
|
runEvaluators
|
|
256
260
|
} = config;
|
|
261
|
+
const runName = this.createExperimentRunName({
|
|
262
|
+
name,
|
|
263
|
+
runName: providedRunName
|
|
264
|
+
});
|
|
257
265
|
if (!this.isOtelRegistered()) {
|
|
258
266
|
this.logger.warn(
|
|
259
267
|
"OpenTelemetry has not been set up. Traces will not be sent to Langfuse.See our docs on how to set up OpenTelemetry: https://langfuse.com/docs/observability/sdk/typescript/setup#tracing-setup"
|
|
@@ -268,6 +276,7 @@ var ExperimentManager = class {
|
|
|
268
276
|
evaluators,
|
|
269
277
|
task,
|
|
270
278
|
experimentName: name,
|
|
279
|
+
experimentRunName: runName,
|
|
271
280
|
experimentDescription: description,
|
|
272
281
|
experimentMetadata: metadata
|
|
273
282
|
});
|
|
@@ -325,11 +334,12 @@ var ExperimentManager = class {
|
|
|
325
334
|
}
|
|
326
335
|
await this.langfuseClient.score.flush();
|
|
327
336
|
return {
|
|
337
|
+
runName,
|
|
328
338
|
itemResults,
|
|
329
339
|
datasetRunId,
|
|
330
340
|
datasetRunUrl,
|
|
331
341
|
runEvaluations,
|
|
332
|
-
|
|
342
|
+
format: async (options) => {
|
|
333
343
|
var _a;
|
|
334
344
|
return await this.prettyPrintResults({
|
|
335
345
|
datasetRunUrl,
|
|
@@ -337,6 +347,7 @@ var ExperimentManager = class {
|
|
|
337
347
|
originalData: data,
|
|
338
348
|
runEvaluations,
|
|
339
349
|
name: config.name,
|
|
350
|
+
runName,
|
|
340
351
|
description: config.description,
|
|
341
352
|
includeItemResults: (_a = options == null ? void 0 : options.includeItemResults) != null ? _a : false
|
|
342
353
|
});
|
|
@@ -355,6 +366,7 @@ var ExperimentManager = class {
|
|
|
355
366
|
*
|
|
356
367
|
* @param params - Parameters for item execution
|
|
357
368
|
* @param params.experimentName - Name of the parent experiment
|
|
369
|
+
* @param params.experimentRunName - Run name for the parent experiment
|
|
358
370
|
* @param params.experimentDescription - Description of the parent experiment
|
|
359
371
|
* @param params.experimentMetadata - Metadata for the parent experiment
|
|
360
372
|
* @param params.item - The data item to process
|
|
@@ -369,7 +381,7 @@ var ExperimentManager = class {
|
|
|
369
381
|
*/
|
|
370
382
|
async runItem(params) {
|
|
371
383
|
const { item, evaluators = [], task, experimentMetadata = {} } = params;
|
|
372
|
-
const { output, traceId } = await startActiveObservation(
|
|
384
|
+
const { output, traceId, observationId } = await startActiveObservation(
|
|
373
385
|
"experiment-item-run",
|
|
374
386
|
async (span) => {
|
|
375
387
|
var _a;
|
|
@@ -378,7 +390,8 @@ var ExperimentManager = class {
|
|
|
378
390
|
input: item.input,
|
|
379
391
|
output: output2,
|
|
380
392
|
metadata: {
|
|
381
|
-
|
|
393
|
+
experiment_name: params.experimentName,
|
|
394
|
+
experiment_run_name: params.experimentRunName,
|
|
382
395
|
...experimentMetadata,
|
|
383
396
|
...(_a = item.metadata) != null ? _a : {},
|
|
384
397
|
..."id" in item && "datasetId" in item ? {
|
|
@@ -387,17 +400,18 @@ var ExperimentManager = class {
|
|
|
387
400
|
} : {}
|
|
388
401
|
}
|
|
389
402
|
});
|
|
390
|
-
return { output: output2, traceId: span.traceId };
|
|
403
|
+
return { output: output2, traceId: span.traceId, observationId: span.id };
|
|
391
404
|
}
|
|
392
405
|
);
|
|
393
406
|
let datasetRunId = void 0;
|
|
394
407
|
if ("id" in item) {
|
|
395
408
|
await this.langfuseClient.api.datasetRunItems.create({
|
|
396
|
-
runName: params.
|
|
409
|
+
runName: params.experimentRunName,
|
|
397
410
|
runDescription: params.experimentDescription,
|
|
398
411
|
metadata: params.experimentMetadata,
|
|
399
412
|
datasetItemId: item.id,
|
|
400
|
-
traceId
|
|
413
|
+
traceId,
|
|
414
|
+
observationId
|
|
401
415
|
}).then((result) => {
|
|
402
416
|
datasetRunId = result.datasetRunId;
|
|
403
417
|
}).catch(
|
|
@@ -519,6 +533,7 @@ ${JSON.stringify(params2)}
|
|
|
519
533
|
originalData,
|
|
520
534
|
runEvaluations,
|
|
521
535
|
name,
|
|
536
|
+
runName,
|
|
522
537
|
description,
|
|
523
538
|
includeItemResults = false
|
|
524
539
|
} = params;
|
|
@@ -576,7 +591,7 @@ ${index + 1}. Item ${index + 1}:
|
|
|
576
591
|
} else {
|
|
577
592
|
output += `Individual Results: Hidden (${itemResults.length} items)
|
|
578
593
|
`;
|
|
579
|
-
output += "\u{1F4A1} Call
|
|
594
|
+
output += "\u{1F4A1} Call format({ includeItemResults: true }) to view them\n";
|
|
580
595
|
}
|
|
581
596
|
const totalItems = itemResults.length;
|
|
582
597
|
const evaluationNames = new Set(
|
|
@@ -585,7 +600,9 @@ ${index + 1}. Item ${index + 1}:
|
|
|
585
600
|
output += `
|
|
586
601
|
${"\u2500".repeat(50)}
|
|
587
602
|
`;
|
|
588
|
-
output += `\u{
|
|
603
|
+
output += `\u{1F9EA} Experiment: ${name}`;
|
|
604
|
+
output += `
|
|
605
|
+
\u{1F4CB} Run name: ${runName}`;
|
|
589
606
|
if (description) {
|
|
590
607
|
output += ` - ${description}`;
|
|
591
608
|
}
|
|
@@ -660,6 +677,26 @@ Run Evaluations:`;
|
|
|
660
677
|
}
|
|
661
678
|
return tracerProvider.constructor.name !== "NoopTracerProvider";
|
|
662
679
|
}
|
|
680
|
+
/**
|
|
681
|
+
* Creates an experiment run name based on provided parameters.
|
|
682
|
+
*
|
|
683
|
+
* If runName is provided, returns it directly. Otherwise, generates
|
|
684
|
+
* a name by combining the experiment name with an ISO timestamp.
|
|
685
|
+
*
|
|
686
|
+
* @param params - Parameters for run name creation
|
|
687
|
+
* @param params.name - The experiment name
|
|
688
|
+
* @param params.runName - Optional provided run name
|
|
689
|
+
* @returns The final run name to use
|
|
690
|
+
*
|
|
691
|
+
* @internal
|
|
692
|
+
*/
|
|
693
|
+
createExperimentRunName(params) {
|
|
694
|
+
if (params.runName) {
|
|
695
|
+
return params.runName;
|
|
696
|
+
}
|
|
697
|
+
const isoTimestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
698
|
+
return `${params.name} - ${isoTimestamp}`;
|
|
699
|
+
}
|
|
663
700
|
};
|
|
664
701
|
|
|
665
702
|
// src/media/index.ts
|
|
@@ -1180,10 +1217,10 @@ var ChatPromptClient = class _ChatPromptClient extends BasePromptClient {
|
|
|
1180
1217
|
JSON.stringify(placeholderValue)
|
|
1181
1218
|
);
|
|
1182
1219
|
} else {
|
|
1183
|
-
messagesWithPlaceholdersReplaced.push(
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1220
|
+
messagesWithPlaceholdersReplaced.push([
|
|
1221
|
+
"placeholder",
|
|
1222
|
+
`{${item.name}}`
|
|
1223
|
+
]);
|
|
1187
1224
|
}
|
|
1188
1225
|
} else if ("role" in item && "content" in item && item.type === "chatmessage" /* ChatMessage */) {
|
|
1189
1226
|
messagesWithPlaceholdersReplaced.push({
|