langsmith 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/client.cjs +69 -29
- package/dist/client.d.ts +7 -3
- package/dist/client.js +46 -6
- package/dist/evaluation/_random_name.cjs +730 -0
- package/dist/evaluation/_random_name.d.ts +5 -0
- package/dist/evaluation/_random_name.js +726 -0
- package/dist/evaluation/_runner.cjs +709 -0
- package/dist/evaluation/_runner.d.ts +158 -0
- package/dist/evaluation/_runner.js +705 -0
- package/dist/evaluation/evaluator.cjs +86 -0
- package/dist/evaluation/evaluator.d.ts +31 -27
- package/dist/evaluation/evaluator.js +83 -1
- package/dist/evaluation/index.cjs +3 -1
- package/dist/evaluation/index.d.ts +1 -0
- package/dist/evaluation/index.js +1 -0
- package/dist/index.cjs +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/run_trees.d.ts +1 -0
- package/dist/schemas.d.ts +22 -1
- package/dist/traceable.cjs +64 -7
- package/dist/traceable.d.ts +2 -0
- package/dist/traceable.js +62 -6
- package/dist/utils/_git.cjs +72 -0
- package/dist/utils/_git.d.ts +14 -0
- package/dist/utils/_git.js +67 -0
- package/dist/utils/_uuid.cjs +33 -0
- package/dist/utils/_uuid.d.ts +1 -0
- package/dist/utils/_uuid.js +6 -0
- package/dist/utils/atee.cjs +24 -0
- package/dist/utils/atee.d.ts +1 -0
- package/dist/utils/atee.js +20 -0
- package/package.json +1 -1
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.evaluate = void 0;
|
|
4
|
+
const index_js_1 = require("../index.cjs");
|
|
5
|
+
const traceable_js_1 = require("../traceable.cjs");
|
|
6
|
+
const _git_js_1 = require("../utils/_git.cjs");
|
|
7
|
+
const _uuid_js_1 = require("../utils/_uuid.cjs");
|
|
8
|
+
const async_caller_js_1 = require("../utils/async_caller.cjs");
|
|
9
|
+
const atee_js_1 = require("../utils/atee.cjs");
|
|
10
|
+
const env_js_1 = require("../utils/env.cjs");
|
|
11
|
+
const _random_name_js_1 = require("./_random_name.cjs");
|
|
12
|
+
const evaluator_js_1 = require("./evaluator.cjs");
|
|
13
|
+
const uuid_1 = require("uuid");
|
|
14
|
+
function evaluate(
|
|
15
|
+
/**
|
|
16
|
+
* The target system or function to evaluate.
|
|
17
|
+
*/
|
|
18
|
+
target, options) {
|
|
19
|
+
return _evaluate(target, options);
|
|
20
|
+
}
|
|
21
|
+
exports.evaluate = evaluate;
|
|
22
|
+
/**
|
|
23
|
+
* Manage the execution of experiments.
|
|
24
|
+
*
|
|
25
|
+
* Supports lazily running predictions and evaluations in parallel to facilitate
|
|
26
|
+
* result streaming and early debugging.
|
|
27
|
+
*/
|
|
28
|
+
class _ExperimentManager {
|
|
29
|
+
get experimentName() {
|
|
30
|
+
if (this._experimentName) {
|
|
31
|
+
return this._experimentName;
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
throw new Error("Experiment name not provided, and experiment not yet started.");
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
async getExamples() {
|
|
38
|
+
if (!this._examples) {
|
|
39
|
+
if (!this._data) {
|
|
40
|
+
throw new Error("Data not provided in this experiment.");
|
|
41
|
+
}
|
|
42
|
+
const unresolvedData = _resolveData(this._data, { client: this.client });
|
|
43
|
+
if (!this._examples) {
|
|
44
|
+
this._examples = [];
|
|
45
|
+
}
|
|
46
|
+
const exs = [];
|
|
47
|
+
for await (const example of unresolvedData) {
|
|
48
|
+
exs.push(example);
|
|
49
|
+
}
|
|
50
|
+
this.setExamples(exs);
|
|
51
|
+
}
|
|
52
|
+
return this._examples;
|
|
53
|
+
}
|
|
54
|
+
setExamples(examples) {
|
|
55
|
+
this._examples = examples;
|
|
56
|
+
}
|
|
57
|
+
get datasetId() {
|
|
58
|
+
return this.getExamples().then((examples) => {
|
|
59
|
+
if (examples.length === 0) {
|
|
60
|
+
throw new Error("No examples found in the dataset.");
|
|
61
|
+
}
|
|
62
|
+
if (this._experiment && this._experiment.reference_dataset_id) {
|
|
63
|
+
return this._experiment.reference_dataset_id;
|
|
64
|
+
}
|
|
65
|
+
return examples[0].dataset_id;
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
get evaluationResults() {
|
|
69
|
+
if (this._evaluationResults === undefined) {
|
|
70
|
+
return async function* () {
|
|
71
|
+
for (const _ of await this.getExamples()) {
|
|
72
|
+
yield { results: [] };
|
|
73
|
+
}
|
|
74
|
+
}.call(this);
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
return this._evaluationResults;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
get runs() {
|
|
81
|
+
if (this._runsArray && this._runsArray.length > 0) {
|
|
82
|
+
throw new Error("Runs already provided as an array.");
|
|
83
|
+
}
|
|
84
|
+
if (this._runs === undefined) {
|
|
85
|
+
throw new Error("Runs not provided in this experiment. Please predict first.");
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
return this._runs;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
constructor(args) {
|
|
92
|
+
Object.defineProperty(this, "_data", {
|
|
93
|
+
enumerable: true,
|
|
94
|
+
configurable: true,
|
|
95
|
+
writable: true,
|
|
96
|
+
value: void 0
|
|
97
|
+
});
|
|
98
|
+
Object.defineProperty(this, "_runs", {
|
|
99
|
+
enumerable: true,
|
|
100
|
+
configurable: true,
|
|
101
|
+
writable: true,
|
|
102
|
+
value: void 0
|
|
103
|
+
});
|
|
104
|
+
Object.defineProperty(this, "_evaluationResults", {
|
|
105
|
+
enumerable: true,
|
|
106
|
+
configurable: true,
|
|
107
|
+
writable: true,
|
|
108
|
+
value: void 0
|
|
109
|
+
});
|
|
110
|
+
Object.defineProperty(this, "_summaryResults", {
|
|
111
|
+
enumerable: true,
|
|
112
|
+
configurable: true,
|
|
113
|
+
writable: true,
|
|
114
|
+
value: void 0
|
|
115
|
+
});
|
|
116
|
+
Object.defineProperty(this, "_examples", {
|
|
117
|
+
enumerable: true,
|
|
118
|
+
configurable: true,
|
|
119
|
+
writable: true,
|
|
120
|
+
value: void 0
|
|
121
|
+
});
|
|
122
|
+
Object.defineProperty(this, "_runsArray", {
|
|
123
|
+
enumerable: true,
|
|
124
|
+
configurable: true,
|
|
125
|
+
writable: true,
|
|
126
|
+
value: void 0
|
|
127
|
+
});
|
|
128
|
+
Object.defineProperty(this, "client", {
|
|
129
|
+
enumerable: true,
|
|
130
|
+
configurable: true,
|
|
131
|
+
writable: true,
|
|
132
|
+
value: void 0
|
|
133
|
+
});
|
|
134
|
+
Object.defineProperty(this, "_experiment", {
|
|
135
|
+
enumerable: true,
|
|
136
|
+
configurable: true,
|
|
137
|
+
writable: true,
|
|
138
|
+
value: void 0
|
|
139
|
+
});
|
|
140
|
+
Object.defineProperty(this, "_experimentName", {
|
|
141
|
+
enumerable: true,
|
|
142
|
+
configurable: true,
|
|
143
|
+
writable: true,
|
|
144
|
+
value: void 0
|
|
145
|
+
});
|
|
146
|
+
Object.defineProperty(this, "_metadata", {
|
|
147
|
+
enumerable: true,
|
|
148
|
+
configurable: true,
|
|
149
|
+
writable: true,
|
|
150
|
+
value: void 0
|
|
151
|
+
});
|
|
152
|
+
this.client = args.client ?? new index_js_1.Client();
|
|
153
|
+
if (!args.experiment) {
|
|
154
|
+
this._experimentName = (0, _random_name_js_1.randomName)();
|
|
155
|
+
}
|
|
156
|
+
else if (typeof args.experiment === "string") {
|
|
157
|
+
this._experimentName = `${args.experiment}-${(0, uuid_1.v4)().slice(0, 8)}`;
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
if (!args.experiment.name) {
|
|
161
|
+
throw new Error("Experiment must have a name");
|
|
162
|
+
}
|
|
163
|
+
this._experimentName = args.experiment.name;
|
|
164
|
+
this._experiment = args.experiment;
|
|
165
|
+
}
|
|
166
|
+
let metadata = args.metadata || {};
|
|
167
|
+
if (!("revision_id" in metadata)) {
|
|
168
|
+
metadata = {
|
|
169
|
+
revision_id: (0, env_js_1.getLangChainEnvVarsMetadata)().revision_id,
|
|
170
|
+
...metadata,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
this._metadata = metadata;
|
|
174
|
+
if (args.examples && args.examples.length) {
|
|
175
|
+
this.setExamples(args.examples);
|
|
176
|
+
}
|
|
177
|
+
this._data = args.data;
|
|
178
|
+
if (args._runsArray && args._runsArray.length) {
|
|
179
|
+
this._runsArray = args._runsArray;
|
|
180
|
+
}
|
|
181
|
+
this._runs = args.runs;
|
|
182
|
+
this._evaluationResults = args.evaluationResults;
|
|
183
|
+
this._summaryResults = args.summaryResults;
|
|
184
|
+
}
|
|
185
|
+
_getExperiment() {
|
|
186
|
+
if (!this._experiment) {
|
|
187
|
+
throw new Error("Experiment not yet started.");
|
|
188
|
+
}
|
|
189
|
+
return this._experiment;
|
|
190
|
+
}
|
|
191
|
+
async _getExperimentMetadata() {
|
|
192
|
+
let projectMetadata = this._metadata ?? {};
|
|
193
|
+
const gitInfo = await (0, _git_js_1.getGitInfo)();
|
|
194
|
+
if (gitInfo) {
|
|
195
|
+
projectMetadata = {
|
|
196
|
+
...projectMetadata,
|
|
197
|
+
git: gitInfo,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
if (this._experiment) {
|
|
201
|
+
const experimentMetadata = this._experiment.extra && "metadata" in this._experiment.extra
|
|
202
|
+
? this._experiment.extra.metadata
|
|
203
|
+
: {};
|
|
204
|
+
projectMetadata = {
|
|
205
|
+
...experimentMetadata,
|
|
206
|
+
...projectMetadata,
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
return projectMetadata;
|
|
210
|
+
}
|
|
211
|
+
async _getProject(firstExample) {
|
|
212
|
+
let project;
|
|
213
|
+
if (!this._experiment) {
|
|
214
|
+
try {
|
|
215
|
+
const projectMetadata = await this._getExperimentMetadata();
|
|
216
|
+
project = await this.client.createProject({
|
|
217
|
+
projectName: this.experimentName,
|
|
218
|
+
referenceDatasetId: firstExample.dataset_id,
|
|
219
|
+
metadata: projectMetadata,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
catch (e) {
|
|
223
|
+
if (String(e).includes("already exists")) {
|
|
224
|
+
throw e;
|
|
225
|
+
}
|
|
226
|
+
throw new Error(`Experiment ${this._experimentName} already exists. Please use a different name.`);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
else {
|
|
230
|
+
project = this._experiment;
|
|
231
|
+
}
|
|
232
|
+
return project;
|
|
233
|
+
}
|
|
234
|
+
_printExperimentStart() {
|
|
235
|
+
// @TODO log with experiment URL
|
|
236
|
+
console.log(`Starting evaluation of experiment: ${this.experimentName}`);
|
|
237
|
+
}
|
|
238
|
+
async start() {
|
|
239
|
+
const examples = await this.getExamples();
|
|
240
|
+
const firstExample = examples[0];
|
|
241
|
+
const project = await this._getProject(firstExample);
|
|
242
|
+
this._printExperimentStart();
|
|
243
|
+
return new _ExperimentManager({
|
|
244
|
+
examples,
|
|
245
|
+
experiment: project,
|
|
246
|
+
metadata: this._metadata,
|
|
247
|
+
client: this.client,
|
|
248
|
+
evaluationResults: this._evaluationResults,
|
|
249
|
+
summaryResults: this._summaryResults,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
async withPredictions(target, options) {
|
|
253
|
+
const experimentResults = this._predict(target, options);
|
|
254
|
+
return new _ExperimentManager({
|
|
255
|
+
examples: await this.getExamples(),
|
|
256
|
+
experiment: this._experiment,
|
|
257
|
+
metadata: this._metadata,
|
|
258
|
+
client: this.client,
|
|
259
|
+
runs: (async function* () {
|
|
260
|
+
for await (const pred of experimentResults) {
|
|
261
|
+
yield pred.run;
|
|
262
|
+
}
|
|
263
|
+
})(),
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
async withEvaluators(evaluators, options) {
|
|
267
|
+
const resolvedEvaluators = _resolveEvaluators(evaluators);
|
|
268
|
+
const experimentResults = this._score(resolvedEvaluators, options);
|
|
269
|
+
const [r1, r2] = (0, atee_js_1.atee)(experimentResults);
|
|
270
|
+
return new _ExperimentManager({
|
|
271
|
+
examples: await this.getExamples(),
|
|
272
|
+
experiment: this._experiment,
|
|
273
|
+
metadata: this._metadata,
|
|
274
|
+
client: this.client,
|
|
275
|
+
runs: (async function* () {
|
|
276
|
+
for await (const result of r1) {
|
|
277
|
+
yield result.run;
|
|
278
|
+
}
|
|
279
|
+
})(),
|
|
280
|
+
evaluationResults: (async function* () {
|
|
281
|
+
for await (const result of r2) {
|
|
282
|
+
yield result.evaluationResults;
|
|
283
|
+
}
|
|
284
|
+
})(),
|
|
285
|
+
summaryResults: this._summaryResults,
|
|
286
|
+
});
|
|
287
|
+
}
|
|
288
|
+
async withSummaryEvaluators(summaryEvaluators) {
|
|
289
|
+
const aggregateFeedbackGen = this._applySummaryEvaluators(summaryEvaluators);
|
|
290
|
+
return new _ExperimentManager({
|
|
291
|
+
examples: await this.getExamples(),
|
|
292
|
+
experiment: this._experiment,
|
|
293
|
+
metadata: this._metadata,
|
|
294
|
+
client: this.client,
|
|
295
|
+
runs: this.runs,
|
|
296
|
+
_runsArray: this._runsArray,
|
|
297
|
+
evaluationResults: this._evaluationResults,
|
|
298
|
+
summaryResults: aggregateFeedbackGen,
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
async *getResults() {
|
|
302
|
+
const examples = await this.getExamples();
|
|
303
|
+
const evaluationResults = [];
|
|
304
|
+
if (!this._runsArray) {
|
|
305
|
+
this._runsArray = [];
|
|
306
|
+
for await (const run of this.runs) {
|
|
307
|
+
this._runsArray.push(run);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
for await (const evaluationResult of this.evaluationResults) {
|
|
311
|
+
evaluationResults.push(evaluationResult);
|
|
312
|
+
}
|
|
313
|
+
for (let i = 0; i < this._runsArray.length; i++) {
|
|
314
|
+
yield {
|
|
315
|
+
run: this._runsArray[i],
|
|
316
|
+
example: examples[i],
|
|
317
|
+
evaluationResults: evaluationResults[i],
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
async getSummaryScores() {
|
|
322
|
+
if (!this._summaryResults) {
|
|
323
|
+
return { results: [] };
|
|
324
|
+
}
|
|
325
|
+
const results = [];
|
|
326
|
+
for await (const evaluationResultsGenerator of this._summaryResults) {
|
|
327
|
+
if (typeof evaluationResultsGenerator === "function") {
|
|
328
|
+
// This is because runs array is not available until after this generator
|
|
329
|
+
// is set, so we need to pass it like so.
|
|
330
|
+
for await (const evaluationResults of evaluationResultsGenerator(this._runsArray ?? [])) {
|
|
331
|
+
results.push(...evaluationResults.results);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
return { results };
|
|
336
|
+
}
|
|
337
|
+
// Private methods
|
|
338
|
+
/**
|
|
339
|
+
* Run the target function on the examples.
|
|
340
|
+
* @param {TargetNoInvoke} target The target function to evaluate.
|
|
341
|
+
* @param options
|
|
342
|
+
* @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
|
|
343
|
+
*/
|
|
344
|
+
async *_predict(target, options) {
|
|
345
|
+
const maxConcurrency = options?.maxConcurrency ?? 0;
|
|
346
|
+
const examples = await this.getExamples();
|
|
347
|
+
if (maxConcurrency === 0) {
|
|
348
|
+
for (const example of examples) {
|
|
349
|
+
yield await _forward(target, example, this.experimentName, this._metadata, this.client);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
else {
|
|
353
|
+
const caller = new async_caller_js_1.AsyncCaller({
|
|
354
|
+
maxConcurrency,
|
|
355
|
+
});
|
|
356
|
+
const futures = [];
|
|
357
|
+
for await (const example of examples) {
|
|
358
|
+
futures.push(caller.call(_forward, target, example, this.experimentName, this._metadata, this.client));
|
|
359
|
+
}
|
|
360
|
+
for await (const future of futures) {
|
|
361
|
+
yield future;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
// Close out the project.
|
|
365
|
+
await this._end();
|
|
366
|
+
}
|
|
367
|
+
async _runEvaluators(evaluators, currentResults, fields) {
|
|
368
|
+
const { run, example, evaluationResults } = currentResults;
|
|
369
|
+
for (const evaluator of evaluators) {
|
|
370
|
+
try {
|
|
371
|
+
const options = {
|
|
372
|
+
reference_example_id: example.id,
|
|
373
|
+
project_name: fields.experimentName,
|
|
374
|
+
metadata: {
|
|
375
|
+
example_version: example.modified_at
|
|
376
|
+
? new Date(example.modified_at).toISOString()
|
|
377
|
+
: new Date(example.created_at).toISOString(),
|
|
378
|
+
},
|
|
379
|
+
client: fields.client,
|
|
380
|
+
};
|
|
381
|
+
const evaluatorResponse = await evaluator.evaluateRun(run, example, options);
|
|
382
|
+
evaluationResults.results.push(...(await fields.client.logEvaluationFeedback(evaluatorResponse, run)));
|
|
383
|
+
}
|
|
384
|
+
catch (e) {
|
|
385
|
+
console.error(`Error running evaluator ${evaluator.evaluateRun.name} on run ${run.id}: ${e}`);
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
return {
|
|
389
|
+
run,
|
|
390
|
+
example,
|
|
391
|
+
evaluationResults,
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
/**
|
|
395
|
+
* Run the evaluators on the prediction stream.
|
|
396
|
+
* Expects runs to be available in the manager.
|
|
397
|
+
* (e.g. from a previous prediction step)
|
|
398
|
+
* @param {Array<RunEvaluator>} evaluators
|
|
399
|
+
* @param {number} maxConcurrency
|
|
400
|
+
*/
|
|
401
|
+
async *_score(evaluators, options) {
|
|
402
|
+
const { maxConcurrency = 0 } = options || {};
|
|
403
|
+
if (maxConcurrency === 0) {
|
|
404
|
+
for await (const currentResults of this.getResults()) {
|
|
405
|
+
yield this._runEvaluators(evaluators, currentResults, {
|
|
406
|
+
experimentName: this.experimentName,
|
|
407
|
+
client: this.client,
|
|
408
|
+
});
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
else {
|
|
412
|
+
const caller = new async_caller_js_1.AsyncCaller({
|
|
413
|
+
maxConcurrency,
|
|
414
|
+
});
|
|
415
|
+
const futures = [];
|
|
416
|
+
for await (const currentResults of this.getResults()) {
|
|
417
|
+
futures.push(caller.call(this._runEvaluators, evaluators, currentResults, {
|
|
418
|
+
experimentName: this.experimentName,
|
|
419
|
+
client: this.client,
|
|
420
|
+
}));
|
|
421
|
+
}
|
|
422
|
+
for (const result of futures) {
|
|
423
|
+
yield result;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
async *_applySummaryEvaluators(summaryEvaluators) {
|
|
428
|
+
const projectId = this._getExperiment().id;
|
|
429
|
+
const examples = await this.getExamples();
|
|
430
|
+
const options = Array.from({ length: summaryEvaluators.length }).map(() => ({
|
|
431
|
+
project_name: "evaluators",
|
|
432
|
+
experiment: this.experimentName,
|
|
433
|
+
projectId: projectId,
|
|
434
|
+
}));
|
|
435
|
+
const wrappedEvaluators = await wrapSummaryEvaluators(summaryEvaluators, options);
|
|
436
|
+
yield async function* (runsArray) {
|
|
437
|
+
const aggregateFeedback = [];
|
|
438
|
+
for (const evaluator of wrappedEvaluators) {
|
|
439
|
+
try {
|
|
440
|
+
const summaryEvalResult = await evaluator(runsArray, examples);
|
|
441
|
+
const flattenedResults = this.client._selectEvalResults(summaryEvalResult);
|
|
442
|
+
aggregateFeedback.push(...flattenedResults);
|
|
443
|
+
for (const result of flattenedResults) {
|
|
444
|
+
const { targetRunId, ...feedback } = result;
|
|
445
|
+
const evaluatorInfo = feedback.evaluatorInfo;
|
|
446
|
+
delete feedback.evaluatorInfo;
|
|
447
|
+
await this.client.createFeedback(null, "key", {
|
|
448
|
+
...feedback,
|
|
449
|
+
projectId: projectId,
|
|
450
|
+
sourceInfo: evaluatorInfo,
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
catch (e) {
|
|
455
|
+
console.error(`Error running summary evaluator ${evaluator.name}: ${JSON.stringify(e, null, 2)}`);
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
yield {
|
|
459
|
+
results: aggregateFeedback,
|
|
460
|
+
};
|
|
461
|
+
}.bind(this);
|
|
462
|
+
}
|
|
463
|
+
async _getDatasetVersion() {
|
|
464
|
+
const examples = await this.getExamples();
|
|
465
|
+
const modifiedAt = examples.map((ex) => ex.modified_at);
|
|
466
|
+
const maxModifiedAt = modifiedAt.length > 0
|
|
467
|
+
? new Date(Math.max(...modifiedAt.map((date) => new Date(date).getTime())))
|
|
468
|
+
: undefined;
|
|
469
|
+
return maxModifiedAt?.toISOString();
|
|
470
|
+
}
|
|
471
|
+
async _end() {
|
|
472
|
+
const experiment = this._experiment;
|
|
473
|
+
if (!experiment) {
|
|
474
|
+
throw new Error("Experiment not yet started.");
|
|
475
|
+
}
|
|
476
|
+
const projectMetadata = await this._getExperimentMetadata();
|
|
477
|
+
projectMetadata["dataset_version"] = await this._getDatasetVersion();
|
|
478
|
+
// Update revision_id if not already set
|
|
479
|
+
if (!projectMetadata["revision_id"]) {
|
|
480
|
+
projectMetadata["revision_id"] = await (0, _git_js_1.getDefaultRevisionId)();
|
|
481
|
+
}
|
|
482
|
+
await this.client.updateProject(experiment.id, {
|
|
483
|
+
endTime: new Date().toISOString(),
|
|
484
|
+
metadata: projectMetadata,
|
|
485
|
+
});
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* Represents the results of an evaluate() call.
|
|
490
|
+
* This class provides an iterator interface to iterate over the experiment results
|
|
491
|
+
* as they become available. It also provides methods to access the experiment name,
|
|
492
|
+
* the number of results, and to wait for the results to be processed.
|
|
493
|
+
*/
|
|
494
|
+
class ExperimentResults {
|
|
495
|
+
constructor(experimentManager) {
|
|
496
|
+
Object.defineProperty(this, "manager", {
|
|
497
|
+
enumerable: true,
|
|
498
|
+
configurable: true,
|
|
499
|
+
writable: true,
|
|
500
|
+
value: void 0
|
|
501
|
+
});
|
|
502
|
+
Object.defineProperty(this, "results", {
|
|
503
|
+
enumerable: true,
|
|
504
|
+
configurable: true,
|
|
505
|
+
writable: true,
|
|
506
|
+
value: []
|
|
507
|
+
});
|
|
508
|
+
Object.defineProperty(this, "processedCount", {
|
|
509
|
+
enumerable: true,
|
|
510
|
+
configurable: true,
|
|
511
|
+
writable: true,
|
|
512
|
+
value: 0
|
|
513
|
+
});
|
|
514
|
+
Object.defineProperty(this, "summaryResults", {
|
|
515
|
+
enumerable: true,
|
|
516
|
+
configurable: true,
|
|
517
|
+
writable: true,
|
|
518
|
+
value: void 0
|
|
519
|
+
});
|
|
520
|
+
this.manager = experimentManager;
|
|
521
|
+
}
|
|
522
|
+
get experimentName() {
|
|
523
|
+
return this.manager.experimentName;
|
|
524
|
+
}
|
|
525
|
+
[Symbol.asyncIterator]() {
|
|
526
|
+
return this;
|
|
527
|
+
}
|
|
528
|
+
async next() {
|
|
529
|
+
if (this.processedCount < this.results.length) {
|
|
530
|
+
const result = this.results[this.processedCount];
|
|
531
|
+
this.processedCount++;
|
|
532
|
+
return Promise.resolve({ value: result, done: false });
|
|
533
|
+
}
|
|
534
|
+
else {
|
|
535
|
+
return Promise.resolve({ value: undefined, done: true });
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
async processData(manager) {
|
|
539
|
+
for await (const item of manager.getResults()) {
|
|
540
|
+
this.results.push(item);
|
|
541
|
+
this.processedCount++;
|
|
542
|
+
}
|
|
543
|
+
this.summaryResults = await manager.getSummaryScores();
|
|
544
|
+
}
|
|
545
|
+
get length() {
|
|
546
|
+
return this.results.length;
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
function convertInvokeToTopLevel(fn) {
|
|
550
|
+
if ("invoke" in fn) {
|
|
551
|
+
return fn.invoke.bind(fn);
|
|
552
|
+
}
|
|
553
|
+
return fn;
|
|
554
|
+
}
|
|
555
|
+
async function _evaluate(target, fields) {
|
|
556
|
+
const client = fields.client ?? new index_js_1.Client();
|
|
557
|
+
const runs = _isCallable(target) ? null : target;
|
|
558
|
+
const [experiment_, newRuns] = await _resolveExperiment(fields.experiment ?? null, runs, client);
|
|
559
|
+
let manager = await new _ExperimentManager({
|
|
560
|
+
data: Array.isArray(fields.data) ? undefined : fields.data,
|
|
561
|
+
examples: Array.isArray(fields.data) ? fields.data : undefined,
|
|
562
|
+
client,
|
|
563
|
+
metadata: fields.metadata,
|
|
564
|
+
experiment: experiment_ ?? fields.experimentPrefix,
|
|
565
|
+
runs: newRuns ?? undefined,
|
|
566
|
+
}).start();
|
|
567
|
+
if (_isCallable(target)) {
|
|
568
|
+
manager = await manager.withPredictions(convertInvokeToTopLevel(target), {
|
|
569
|
+
maxConcurrency: fields.maxConcurrency,
|
|
570
|
+
});
|
|
571
|
+
}
|
|
572
|
+
if (fields.evaluators) {
|
|
573
|
+
manager = await manager.withEvaluators(fields.evaluators, {
|
|
574
|
+
maxConcurrency: fields.maxConcurrency,
|
|
575
|
+
});
|
|
576
|
+
}
|
|
577
|
+
if (fields.summaryEvaluators) {
|
|
578
|
+
manager = await manager.withSummaryEvaluators(fields.summaryEvaluators);
|
|
579
|
+
}
|
|
580
|
+
// Start consuming the results.
|
|
581
|
+
const results = new ExperimentResults(manager);
|
|
582
|
+
await results.processData(manager);
|
|
583
|
+
return results;
|
|
584
|
+
}
|
|
585
|
+
async function _forward(fn, example, experimentName, metadata, client) {
|
|
586
|
+
let run = null;
|
|
587
|
+
const _getRun = (r) => {
|
|
588
|
+
run = r;
|
|
589
|
+
};
|
|
590
|
+
const options = {
|
|
591
|
+
reference_example_id: example.id,
|
|
592
|
+
on_end: _getRun,
|
|
593
|
+
project_name: experimentName,
|
|
594
|
+
metadata: {
|
|
595
|
+
...metadata,
|
|
596
|
+
example_version: example.modified_at
|
|
597
|
+
? new Date(example.modified_at).toISOString()
|
|
598
|
+
: new Date(example.created_at).toISOString(),
|
|
599
|
+
},
|
|
600
|
+
client,
|
|
601
|
+
};
|
|
602
|
+
const wrappedFn = (0, traceable_js_1.traceable)(fn, {
|
|
603
|
+
...options,
|
|
604
|
+
tracingEnabled: true,
|
|
605
|
+
});
|
|
606
|
+
try {
|
|
607
|
+
await wrappedFn(example.inputs);
|
|
608
|
+
}
|
|
609
|
+
catch (e) {
|
|
610
|
+
console.error(`Error running target function: ${e}`);
|
|
611
|
+
}
|
|
612
|
+
if (!run) {
|
|
613
|
+
throw new Error(`Run not created by target function.
|
|
614
|
+
This is most likely due to tracing not being enabled.\n
|
|
615
|
+
Try setting "LANGCHAIN_TRACING_V2=true" in your environment.`);
|
|
616
|
+
}
|
|
617
|
+
return {
|
|
618
|
+
run,
|
|
619
|
+
example,
|
|
620
|
+
};
|
|
621
|
+
}
|
|
622
|
+
function _resolveData(data, options) {
|
|
623
|
+
let isUUID = false;
|
|
624
|
+
try {
|
|
625
|
+
if (typeof data === "string") {
|
|
626
|
+
(0, _uuid_js_1.assertUuid)(data);
|
|
627
|
+
isUUID = true;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
catch (_) {
|
|
631
|
+
isUUID = false;
|
|
632
|
+
}
|
|
633
|
+
if (typeof data === "string" && isUUID) {
|
|
634
|
+
return options.client.listExamples({
|
|
635
|
+
datasetId: data,
|
|
636
|
+
});
|
|
637
|
+
}
|
|
638
|
+
if (typeof data === "string") {
|
|
639
|
+
return options.client.listExamples({
|
|
640
|
+
datasetName: data,
|
|
641
|
+
});
|
|
642
|
+
}
|
|
643
|
+
return data;
|
|
644
|
+
}
|
|
645
|
+
async function wrapSummaryEvaluators(evaluators, optionsArray) {
|
|
646
|
+
async function _wrap(evaluator) {
|
|
647
|
+
const evalName = evaluator.name || "BatchEvaluator";
|
|
648
|
+
const wrapperInner = (runs, examples) => {
|
|
649
|
+
const wrapperSuperInner = (0, traceable_js_1.traceable)((_runs_, _examples_) => {
|
|
650
|
+
return Promise.resolve(evaluator(runs, examples));
|
|
651
|
+
}, { ...optionsArray, name: evalName });
|
|
652
|
+
return Promise.resolve(wrapperSuperInner(`Runs[] (Length=${runs.length})`, `Examples[] (Length=${examples.length})`));
|
|
653
|
+
};
|
|
654
|
+
return wrapperInner;
|
|
655
|
+
}
|
|
656
|
+
const results = [];
|
|
657
|
+
for (let i = 0; i < evaluators.length; i++) {
|
|
658
|
+
results.push(await _wrap(evaluators[i]));
|
|
659
|
+
}
|
|
660
|
+
return results;
|
|
661
|
+
}
|
|
662
|
+
function _resolveEvaluators(evaluators) {
|
|
663
|
+
const results = [];
|
|
664
|
+
for (const evaluator of evaluators) {
|
|
665
|
+
if ("evaluateRun" in evaluator) {
|
|
666
|
+
results.push(evaluator);
|
|
667
|
+
// todo fix this by porting LangChainStringEvaluator to langsmith sdk
|
|
668
|
+
}
|
|
669
|
+
else if (evaluator.name === "LangChainStringEvaluator") {
|
|
670
|
+
throw new Error("Not yet implemented");
|
|
671
|
+
}
|
|
672
|
+
else {
|
|
673
|
+
results.push((0, evaluator_js_1.runEvaluator)(evaluator));
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
return results;
|
|
677
|
+
}
|
|
678
|
+
async function _resolveExperiment(experiment, runs, client) {
|
|
679
|
+
// TODO: Remove this, handle outside the manager
|
|
680
|
+
if (experiment !== null) {
|
|
681
|
+
if (!experiment.name) {
|
|
682
|
+
throw new Error("Experiment name must be defined if provided.");
|
|
683
|
+
}
|
|
684
|
+
return [experiment, undefined];
|
|
685
|
+
}
|
|
686
|
+
// If we have runs, that means the experiment was already started.
|
|
687
|
+
if (runs !== null) {
|
|
688
|
+
const results = [];
|
|
689
|
+
for await (const item of (0, atee_js_1.atee)(runs)) {
|
|
690
|
+
results.push(item);
|
|
691
|
+
}
|
|
692
|
+
const [runsClone, runsOriginal] = results;
|
|
693
|
+
const runsCloneIterator = runsClone[Symbol.asyncIterator]();
|
|
694
|
+
// todo: this is `any`. does it work properly?
|
|
695
|
+
const firstRun = await runsCloneIterator
|
|
696
|
+
.next()
|
|
697
|
+
.then((result) => result.value);
|
|
698
|
+
const retrievedExperiment = await client.readProject(firstRun.sessionId);
|
|
699
|
+
if (!retrievedExperiment.name) {
|
|
700
|
+
throw new Error("Experiment name not found for provided runs.");
|
|
701
|
+
}
|
|
702
|
+
return [retrievedExperiment, runsOriginal];
|
|
703
|
+
}
|
|
704
|
+
return [undefined, undefined];
|
|
705
|
+
}
|
|
706
|
+
function _isCallable(target) {
|
|
707
|
+
return Boolean(typeof target === "function" ||
|
|
708
|
+
("invoke" in target && typeof target.invoke === "function"));
|
|
709
|
+
}
|