langsmith 0.5.0-rc.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.cjs +23 -1
- package/dist/client.d.ts +21 -12
- package/dist/client.js +23 -1
- package/dist/evaluation/_runner.cjs +168 -57
- package/dist/evaluation/_runner.d.ts +39 -5
- package/dist/evaluation/_runner.js +166 -57
- package/dist/index.cjs +3 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +2 -2
- package/dist/utils/async_caller.cjs +2 -11
- package/dist/utils/async_caller.d.ts +2 -1
- package/dist/utils/async_caller.js +2 -11
- package/dist/utils/fast-safe-stringify/index.d.ts +1 -1
- package/dist/utils/p-queue.cjs +9 -0
- package/dist/utils/p-queue.d.ts +2 -0
- package/dist/utils/p-queue.js +3 -0
- package/dist/utils/prompt_cache/index.cjs +14 -2
- package/dist/utils/prompt_cache/index.d.ts +8 -0
- package/dist/utils/prompt_cache/index.js +12 -1
- package/dist/wrappers/anthropic.cjs +28 -0
- package/dist/wrappers/anthropic.js +28 -0
- package/package.json +10 -6
package/dist/client.cjs
CHANGED
|
@@ -495,7 +495,29 @@ class Client {
|
|
|
495
495
|
// Cache metadata env vars once during construction to avoid repeatedly scanning process.env
|
|
496
496
|
this.cachedLSEnvVarsForMetadata = (0, env_js_1.getLangSmithEnvVarsMetadata)();
|
|
497
497
|
// Initialize prompt cache
|
|
498
|
-
|
|
498
|
+
// Handle backwards compatibility for deprecated `cache` parameter
|
|
499
|
+
if (config.cache !== undefined && config.disablePromptCache) {
|
|
500
|
+
(0, warn_js_1.warnOnce)("Both 'cache' and 'disablePromptCache' were provided. " +
|
|
501
|
+
"The 'cache' parameter is deprecated and will be removed in a future version. " +
|
|
502
|
+
"Using 'cache' parameter value.");
|
|
503
|
+
}
|
|
504
|
+
if (config.cache !== undefined) {
|
|
505
|
+
(0, warn_js_1.warnOnce)("The 'cache' parameter is deprecated and will be removed in a future version. " +
|
|
506
|
+
"Use 'configureGlobalPromptCache()' to configure the global cache, or " +
|
|
507
|
+
"'disablePromptCache: true' to disable caching for this client.");
|
|
508
|
+
// Handle old cache parameter
|
|
509
|
+
if (config.cache === false) {
|
|
510
|
+
this._promptCache = undefined;
|
|
511
|
+
}
|
|
512
|
+
else if (config.cache === true) {
|
|
513
|
+
this._promptCache = index_js_2.promptCacheSingleton;
|
|
514
|
+
}
|
|
515
|
+
else {
|
|
516
|
+
// Custom PromptCache instance provided
|
|
517
|
+
this._promptCache = config.cache;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
else if (!config.disablePromptCache) {
|
|
499
521
|
// Use the global singleton instance
|
|
500
522
|
this._promptCache = index_js_2.promptCacheSingleton;
|
|
501
523
|
}
|
package/dist/client.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { OTELContext } from "./experimental/otel/types.js";
|
|
|
2
2
|
import { AsyncCallerParams } from "./utils/async_caller.js";
|
|
3
3
|
import { ComparativeExperiment, DataType, Dataset, DatasetDiffInfo, DatasetShareSchema, Example, ExampleCreate, ExampleUpdate, ExampleUpdateWithoutId, Feedback, FeedbackConfig, FeedbackIngestToken, KVMap, LangChainBaseMessage, LangSmithSettings, LikePromptResponse, Prompt, PromptCommit, PromptSortField, Run, RunCreate, RunUpdate, ScoreType, ExampleSearch, TimeDelta, TracerSession, TracerSessionResult, ValueType, AnnotationQueue, RunWithAnnotationQueueInfo, Attachments, UploadExamplesResponse, UpdateExamplesResponse, DatasetVersion, AnnotationQueueWithDetails } from "./schemas.js";
|
|
4
4
|
import { EvaluationResult, EvaluationResults } from "./evaluation/evaluator.js";
|
|
5
|
+
import { PromptCache } from "./utils/prompt_cache/index.js";
|
|
5
6
|
export interface ClientConfig {
|
|
6
7
|
apiUrl?: string;
|
|
7
8
|
apiKey?: string;
|
|
@@ -50,31 +51,39 @@ export interface ClientConfig {
|
|
|
50
51
|
*/
|
|
51
52
|
fetchImplementation?: typeof fetch;
|
|
52
53
|
/**
|
|
54
|
+
* Disable prompt caching for this client.
|
|
55
|
+
* By default, prompt caching is enabled globally.
|
|
56
|
+
*/
|
|
57
|
+
disablePromptCache?: boolean;
|
|
58
|
+
/**
|
|
59
|
+
* @deprecated Use `configureGlobalPromptCache()` to configure caching, or
|
|
60
|
+
* `disablePromptCache: true` to disable it. This parameter is deprecated.
|
|
61
|
+
*
|
|
53
62
|
* Configuration for caching. Can be:
|
|
54
|
-
* - `true`: Enable caching with default settings
|
|
55
|
-
* - `Cache` instance: Use custom cache configuration
|
|
56
|
-
* - `
|
|
63
|
+
* - `true`: Enable caching with default settings (uses global singleton)
|
|
64
|
+
* - `Cache`/`PromptCache` instance: Use custom cache configuration
|
|
65
|
+
* - `false`: Disable caching (equivalent to `disablePromptCache: true`)
|
|
57
66
|
*
|
|
58
67
|
* @example
|
|
59
68
|
* ```typescript
|
|
60
|
-
* import { Client, Cache } from "langsmith";
|
|
69
|
+
* import { Client, Cache, configureGlobalPromptCache } from "langsmith";
|
|
61
70
|
*
|
|
62
71
|
* // Enable with defaults
|
|
63
|
-
* const client1 = new Client({
|
|
72
|
+
* const client1 = new Client({});
|
|
64
73
|
*
|
|
65
74
|
* // Or use custom configuration
|
|
66
|
-
*
|
|
75
|
+
* import { configureGlobalPromptCache } from "langsmith";
|
|
76
|
+
* configureGlobalPromptCache({
|
|
67
77
|
* maxSize: 100,
|
|
68
78
|
* ttlSeconds: 3600, // 1 hour, or null for infinite TTL
|
|
69
79
|
* });
|
|
70
|
-
* const client2 = new Client({
|
|
80
|
+
* const client2 = new Client({});
|
|
81
|
+
*
|
|
82
|
+
* // Or disable for a specific client
|
|
83
|
+
* const client3 = new Client({ disablePromptCache: true });
|
|
71
84
|
* ```
|
|
72
85
|
*/
|
|
73
|
-
|
|
74
|
-
* Disable prompt caching for this client.
|
|
75
|
-
* By default, prompt caching is enabled globally.
|
|
76
|
-
*/
|
|
77
|
-
disablePromptCache?: boolean;
|
|
86
|
+
cache?: boolean | PromptCache;
|
|
78
87
|
}
|
|
79
88
|
/**
|
|
80
89
|
* Represents the parameters for listing runs (spans) from the Langsmith server.
|
package/dist/client.js
CHANGED
|
@@ -457,7 +457,29 @@ export class Client {
|
|
|
457
457
|
// Cache metadata env vars once during construction to avoid repeatedly scanning process.env
|
|
458
458
|
this.cachedLSEnvVarsForMetadata = getLangSmithEnvVarsMetadata();
|
|
459
459
|
// Initialize prompt cache
|
|
460
|
-
|
|
460
|
+
// Handle backwards compatibility for deprecated `cache` parameter
|
|
461
|
+
if (config.cache !== undefined && config.disablePromptCache) {
|
|
462
|
+
warnOnce("Both 'cache' and 'disablePromptCache' were provided. " +
|
|
463
|
+
"The 'cache' parameter is deprecated and will be removed in a future version. " +
|
|
464
|
+
"Using 'cache' parameter value.");
|
|
465
|
+
}
|
|
466
|
+
if (config.cache !== undefined) {
|
|
467
|
+
warnOnce("The 'cache' parameter is deprecated and will be removed in a future version. " +
|
|
468
|
+
"Use 'configureGlobalPromptCache()' to configure the global cache, or " +
|
|
469
|
+
"'disablePromptCache: true' to disable caching for this client.");
|
|
470
|
+
// Handle old cache parameter
|
|
471
|
+
if (config.cache === false) {
|
|
472
|
+
this._promptCache = undefined;
|
|
473
|
+
}
|
|
474
|
+
else if (config.cache === true) {
|
|
475
|
+
this._promptCache = promptCacheSingleton;
|
|
476
|
+
}
|
|
477
|
+
else {
|
|
478
|
+
// Custom PromptCache instance provided
|
|
479
|
+
this._promptCache = config.cache;
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
else if (!config.disablePromptCache) {
|
|
461
483
|
// Use the global singleton instance
|
|
462
484
|
this._promptCache = promptCacheSingleton;
|
|
463
485
|
}
|
|
@@ -2,11 +2,12 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports._ExperimentManager = void 0;
|
|
4
4
|
exports.evaluate = evaluate;
|
|
5
|
+
exports._reorderResultRowsByExampleIndex = _reorderResultRowsByExampleIndex;
|
|
6
|
+
exports._mapWithConcurrency = _mapWithConcurrency;
|
|
5
7
|
const index_js_1 = require("../index.cjs");
|
|
6
8
|
const traceable_js_1 = require("../traceable.cjs");
|
|
7
9
|
const _git_js_1 = require("../utils/_git.cjs");
|
|
8
10
|
const _uuid_js_1 = require("../utils/_uuid.cjs");
|
|
9
|
-
const async_caller_js_1 = require("../utils/async_caller.cjs");
|
|
10
11
|
const atee_js_1 = require("../utils/atee.cjs");
|
|
11
12
|
const env_js_1 = require("../utils/env.cjs");
|
|
12
13
|
const error_js_1 = require("../utils/error.cjs");
|
|
@@ -14,10 +15,22 @@ const _random_name_js_1 = require("./_random_name.cjs");
|
|
|
14
15
|
const evaluator_js_1 = require("./evaluator.cjs");
|
|
15
16
|
const uuid_1 = require("uuid");
|
|
16
17
|
const evaluate_comparative_js_1 = require("./evaluate_comparative.cjs");
|
|
18
|
+
const p_queue_js_1 = require("../utils/p-queue.cjs");
|
|
17
19
|
// Implementation signature
|
|
18
20
|
function evaluate(target, options) {
|
|
19
21
|
return _evaluate(target, options);
|
|
20
22
|
}
|
|
23
|
+
function _reorderResultRowsByExampleIndex(rows) {
|
|
24
|
+
const sortedRows = [...rows].sort((a, b) => a.exampleIndex - b.exampleIndex);
|
|
25
|
+
return {
|
|
26
|
+
orderedRows: sortedRows.map(({ run, example, evaluationResults }) => ({
|
|
27
|
+
run,
|
|
28
|
+
example,
|
|
29
|
+
evaluationResults,
|
|
30
|
+
})),
|
|
31
|
+
orderedRuns: sortedRows.map((row) => row.run),
|
|
32
|
+
};
|
|
33
|
+
}
|
|
21
34
|
/**
|
|
22
35
|
* Manage the execution of experiments.
|
|
23
36
|
*
|
|
@@ -124,6 +137,12 @@ class _ExperimentManager {
|
|
|
124
137
|
writable: true,
|
|
125
138
|
value: void 0
|
|
126
139
|
});
|
|
140
|
+
Object.defineProperty(this, "_resultRows", {
|
|
141
|
+
enumerable: true,
|
|
142
|
+
configurable: true,
|
|
143
|
+
writable: true,
|
|
144
|
+
value: void 0
|
|
145
|
+
});
|
|
127
146
|
Object.defineProperty(this, "_examples", {
|
|
128
147
|
enumerable: true,
|
|
129
148
|
configurable: true,
|
|
@@ -210,6 +229,7 @@ class _ExperimentManager {
|
|
|
210
229
|
this._runs = args.runs;
|
|
211
230
|
this._evaluationResults = args.evaluationResults;
|
|
212
231
|
this._summaryResults = args.summaryResults;
|
|
232
|
+
this._resultRows = args.resultRows;
|
|
213
233
|
this._numRepetitions = args.numRepetitions;
|
|
214
234
|
this._includeAttachments = args.includeAttachments;
|
|
215
235
|
}
|
|
@@ -304,13 +324,24 @@ class _ExperimentManager {
|
|
|
304
324
|
}
|
|
305
325
|
async withPredictions(target, options) {
|
|
306
326
|
const experimentResults = this._predict(target, options);
|
|
327
|
+
const [rowsForResults, rowsForRuns] = (0, atee_js_1.atee)(experimentResults);
|
|
307
328
|
return new _ExperimentManager({
|
|
308
329
|
examples: await this.getExamples(),
|
|
309
330
|
experiment: this._experiment,
|
|
310
331
|
metadata: this._metadata,
|
|
311
332
|
client: this.client,
|
|
333
|
+
resultRows: (async function* () {
|
|
334
|
+
for await (const pred of rowsForResults) {
|
|
335
|
+
yield {
|
|
336
|
+
run: pred.run,
|
|
337
|
+
example: pred.example,
|
|
338
|
+
evaluationResults: { results: [] },
|
|
339
|
+
exampleIndex: pred.exampleIndex,
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
})(),
|
|
312
343
|
runs: (async function* () {
|
|
313
|
-
for await (const pred of
|
|
344
|
+
for await (const pred of rowsForRuns) {
|
|
314
345
|
yield pred.run;
|
|
315
346
|
}
|
|
316
347
|
})(),
|
|
@@ -320,19 +351,20 @@ class _ExperimentManager {
|
|
|
320
351
|
async withEvaluators(evaluators, options) {
|
|
321
352
|
const resolvedEvaluators = _resolveEvaluators(evaluators);
|
|
322
353
|
const experimentResults = this._score(resolvedEvaluators, options);
|
|
323
|
-
const [
|
|
354
|
+
const [rowsForResults, rowsForRuns, rowsForEvaluations] = (0, atee_js_1.atee)(experimentResults, 3);
|
|
324
355
|
return new _ExperimentManager({
|
|
325
356
|
examples: await this.getExamples(),
|
|
326
357
|
experiment: this._experiment,
|
|
327
358
|
metadata: this._metadata,
|
|
328
359
|
client: this.client,
|
|
360
|
+
resultRows: rowsForResults,
|
|
329
361
|
runs: (async function* () {
|
|
330
|
-
for await (const result of
|
|
362
|
+
for await (const result of rowsForRuns) {
|
|
331
363
|
yield result.run;
|
|
332
364
|
}
|
|
333
365
|
})(),
|
|
334
366
|
evaluationResults: (async function* () {
|
|
335
|
-
for await (const result of
|
|
367
|
+
for await (const result of rowsForEvaluations) {
|
|
336
368
|
yield result.evaluationResults;
|
|
337
369
|
}
|
|
338
370
|
})(),
|
|
@@ -347,30 +379,43 @@ class _ExperimentManager {
|
|
|
347
379
|
experiment: this._experiment,
|
|
348
380
|
metadata: this._metadata,
|
|
349
381
|
client: this.client,
|
|
350
|
-
runs: this.
|
|
382
|
+
runs: this._runs,
|
|
351
383
|
_runsArray: this._runsArray,
|
|
352
384
|
evaluationResults: this._evaluationResults,
|
|
385
|
+
resultRows: this._resultRows,
|
|
353
386
|
summaryResults: aggregateFeedbackGen,
|
|
354
387
|
includeAttachments: this._includeAttachments,
|
|
355
388
|
});
|
|
356
389
|
}
|
|
357
390
|
async *getResults() {
|
|
358
|
-
const examples = await this.getExamples();
|
|
359
|
-
const evaluationResults = [];
|
|
360
391
|
if (!this._runsArray) {
|
|
361
392
|
this._runsArray = [];
|
|
362
|
-
for await (const run of this.runs) {
|
|
363
|
-
this._runsArray.push(run);
|
|
364
|
-
}
|
|
365
393
|
}
|
|
366
|
-
|
|
367
|
-
|
|
394
|
+
if (this._resultRows) {
|
|
395
|
+
for await (const result of this._resultRows) {
|
|
396
|
+
this._runsArray.push(result.run);
|
|
397
|
+
yield result;
|
|
398
|
+
}
|
|
399
|
+
return;
|
|
368
400
|
}
|
|
369
|
-
|
|
401
|
+
const examples = await this.getExamples();
|
|
402
|
+
const runsIterator = this.runs[Symbol.asyncIterator]();
|
|
403
|
+
const evaluationIterator = this.evaluationResults[Symbol.asyncIterator]();
|
|
404
|
+
for (let i = 0; i < examples.length; i++) {
|
|
405
|
+
const runResult = await runsIterator.next();
|
|
406
|
+
if (runResult.done) {
|
|
407
|
+
break;
|
|
408
|
+
}
|
|
409
|
+
const evaluationResult = await evaluationIterator.next();
|
|
410
|
+
const evaluationResults = evaluationResult.done
|
|
411
|
+
? { results: [] }
|
|
412
|
+
: evaluationResult.value;
|
|
413
|
+
this._runsArray.push(runResult.value);
|
|
370
414
|
yield {
|
|
371
|
-
run:
|
|
415
|
+
run: runResult.value,
|
|
372
416
|
example: examples[i],
|
|
373
|
-
evaluationResults
|
|
417
|
+
evaluationResults,
|
|
418
|
+
exampleIndex: i,
|
|
374
419
|
};
|
|
375
420
|
}
|
|
376
421
|
}
|
|
@@ -400,29 +445,52 @@ class _ExperimentManager {
|
|
|
400
445
|
async *_predict(target, options) {
|
|
401
446
|
const maxConcurrency = options?.maxConcurrency ?? 0;
|
|
402
447
|
const examples = await this.getExamples();
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
448
|
+
let hadPredictionError = false;
|
|
449
|
+
let shouldThrowEndError = false;
|
|
450
|
+
let endErrorToThrow;
|
|
451
|
+
try {
|
|
452
|
+
// maxConcurrency: 0 means sequential execution (matching Python behavior)
|
|
453
|
+
const queue = options?.queue ??
|
|
454
|
+
new p_queue_js_1.PQueue({
|
|
455
|
+
concurrency: maxConcurrency === 0 ? 1 : maxConcurrency,
|
|
456
|
+
});
|
|
457
|
+
const examplesWithIndex = examples.map((example, i) => ({
|
|
458
|
+
example,
|
|
459
|
+
exampleIndex: i,
|
|
460
|
+
}));
|
|
461
|
+
for await (const result of _mapWithConcurrency(examplesWithIndex, queue, (item) => _forward(target, item.example, this.experimentName, this._metadata, this.client, this._includeAttachments).then((forwardResult) => ({
|
|
462
|
+
...forwardResult,
|
|
463
|
+
exampleIndex: item.exampleIndex,
|
|
464
|
+
})))) {
|
|
465
|
+
yield result;
|
|
406
466
|
}
|
|
407
467
|
}
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
468
|
+
catch (error) {
|
|
469
|
+
hadPredictionError = true;
|
|
470
|
+
throw error;
|
|
471
|
+
}
|
|
472
|
+
finally {
|
|
473
|
+
try {
|
|
474
|
+
// Always attempt to close out the project metadata, even on prediction errors.
|
|
475
|
+
await this._end();
|
|
416
476
|
}
|
|
417
|
-
|
|
418
|
-
|
|
477
|
+
catch (endError) {
|
|
478
|
+
if (hadPredictionError) {
|
|
479
|
+
console.error(`Error finalizing experiment: ${endError}`);
|
|
480
|
+
(0, error_js_1.printErrorStackTrace)(endError);
|
|
481
|
+
}
|
|
482
|
+
else {
|
|
483
|
+
shouldThrowEndError = true;
|
|
484
|
+
endErrorToThrow = endError;
|
|
485
|
+
}
|
|
419
486
|
}
|
|
420
487
|
}
|
|
421
|
-
|
|
422
|
-
|
|
488
|
+
if (shouldThrowEndError) {
|
|
489
|
+
throw endErrorToThrow;
|
|
490
|
+
}
|
|
423
491
|
}
|
|
424
492
|
async _runEvaluators(evaluators, currentResults, fields) {
|
|
425
|
-
const { run, example, evaluationResults } = currentResults;
|
|
493
|
+
const { run, example, evaluationResults, exampleIndex } = currentResults;
|
|
426
494
|
for (const evaluator of evaluators) {
|
|
427
495
|
try {
|
|
428
496
|
const options = {
|
|
@@ -448,6 +516,7 @@ class _ExperimentManager {
|
|
|
448
516
|
run,
|
|
449
517
|
example,
|
|
450
518
|
evaluationResults,
|
|
519
|
+
exampleIndex,
|
|
451
520
|
};
|
|
452
521
|
}
|
|
453
522
|
/**
|
|
@@ -458,28 +527,16 @@ class _ExperimentManager {
|
|
|
458
527
|
* @param {number} maxConcurrency
|
|
459
528
|
*/
|
|
460
529
|
async *_score(evaluators, options) {
|
|
461
|
-
const { maxConcurrency = 0 } = options || {};
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
});
|
|
467
|
-
}
|
|
468
|
-
}
|
|
469
|
-
else {
|
|
470
|
-
const caller = new async_caller_js_1.AsyncCaller({
|
|
471
|
-
maxConcurrency,
|
|
472
|
-
debug: this.client.debug,
|
|
530
|
+
const { maxConcurrency = 0, queue: providedQueue } = options || {};
|
|
531
|
+
// maxConcurrency: 0 means sequential execution (matching Python behavior)
|
|
532
|
+
const queue = providedQueue ??
|
|
533
|
+
new p_queue_js_1.PQueue({
|
|
534
|
+
concurrency: maxConcurrency === 0 ? 1 : maxConcurrency,
|
|
473
535
|
});
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
}));
|
|
479
|
-
}
|
|
480
|
-
for (const result of futures) {
|
|
481
|
-
yield result;
|
|
482
|
-
}
|
|
536
|
+
for await (const result of _mapWithConcurrency(this.getResults(), queue, (currentResults) => this._runEvaluators(evaluators, currentResults, {
|
|
537
|
+
client: this.client,
|
|
538
|
+
}))) {
|
|
539
|
+
yield result;
|
|
483
540
|
}
|
|
484
541
|
}
|
|
485
542
|
async *_applySummaryEvaluators(summaryEvaluators) {
|
|
@@ -632,10 +689,14 @@ class ExperimentResults {
|
|
|
632
689
|
}
|
|
633
690
|
}
|
|
634
691
|
async processData(manager) {
|
|
692
|
+
const unorderedResults = [];
|
|
635
693
|
for await (const item of manager.getResults()) {
|
|
636
|
-
|
|
637
|
-
this.processedCount++;
|
|
694
|
+
unorderedResults.push(item);
|
|
638
695
|
}
|
|
696
|
+
const { orderedRows, orderedRuns } = _reorderResultRowsByExampleIndex(unorderedResults);
|
|
697
|
+
manager._runsArray = orderedRuns;
|
|
698
|
+
this.results = orderedRows;
|
|
699
|
+
this.processedCount = this.results.length;
|
|
639
700
|
this.summaryResults = await manager.getSummaryScores();
|
|
640
701
|
}
|
|
641
702
|
get length() {
|
|
@@ -676,14 +737,40 @@ async function _evaluate(target, fields) {
|
|
|
676
737
|
numRepetitions: fields.numRepetitions ?? 1,
|
|
677
738
|
includeAttachments: standardFields.includeAttachments,
|
|
678
739
|
}).start();
|
|
740
|
+
const targetConcurrency = standardFields.targetConcurrency ?? standardFields.maxConcurrency ?? 0;
|
|
741
|
+
const evaluationConcurrency = standardFields.evaluationConcurrency ?? standardFields.maxConcurrency ?? 0;
|
|
742
|
+
// Determine if we should use separate queues or a shared queue
|
|
743
|
+
const useSeparateQueues = standardFields.targetConcurrency !== undefined &&
|
|
744
|
+
standardFields.evaluationConcurrency !== undefined;
|
|
745
|
+
let sharedQueue;
|
|
746
|
+
let targetQueue;
|
|
747
|
+
let evaluationQueue;
|
|
748
|
+
if (useSeparateQueues) {
|
|
749
|
+
// Create separate queues for target and evaluation
|
|
750
|
+
if (targetConcurrency > 0) {
|
|
751
|
+
targetQueue = new p_queue_js_1.PQueue({ concurrency: targetConcurrency });
|
|
752
|
+
}
|
|
753
|
+
if (evaluationConcurrency > 0) {
|
|
754
|
+
evaluationQueue = new p_queue_js_1.PQueue({ concurrency: evaluationConcurrency });
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
else {
|
|
758
|
+
// Use a shared queue
|
|
759
|
+
const sharedConcurrency = standardFields.maxConcurrency ?? 0;
|
|
760
|
+
if (sharedConcurrency > 0) {
|
|
761
|
+
sharedQueue = new p_queue_js_1.PQueue({ concurrency: sharedConcurrency });
|
|
762
|
+
}
|
|
763
|
+
}
|
|
679
764
|
if (_isCallable(target)) {
|
|
680
765
|
manager = await manager.withPredictions(target, {
|
|
681
|
-
maxConcurrency:
|
|
766
|
+
maxConcurrency: targetConcurrency,
|
|
767
|
+
queue: useSeparateQueues ? targetQueue : sharedQueue,
|
|
682
768
|
});
|
|
683
769
|
}
|
|
684
770
|
if (standardFields.evaluators) {
|
|
685
771
|
manager = await manager.withEvaluators(standardFields.evaluators, {
|
|
686
|
-
maxConcurrency:
|
|
772
|
+
maxConcurrency: evaluationConcurrency,
|
|
773
|
+
queue: useSeparateQueues ? evaluationQueue : sharedQueue,
|
|
687
774
|
});
|
|
688
775
|
}
|
|
689
776
|
if (standardFields.summaryEvaluators) {
|
|
@@ -868,6 +955,30 @@ async function _resolveExperiment(experiment, runs, client) {
|
|
|
868
955
|
}
|
|
869
956
|
return [undefined, undefined];
|
|
870
957
|
}
|
|
958
|
+
/**
|
|
959
|
+
* Map over an iterable with bounded concurrency using p-queue.
|
|
960
|
+
* Results are yielded as soon as they resolve (input order is not preserved).
|
|
961
|
+
* The queue handles concurrency limits internally.
|
|
962
|
+
*/
|
|
963
|
+
async function* _mapWithConcurrency(iterable, queue, mapper) {
|
|
964
|
+
const pending = new Set();
|
|
965
|
+
// Add all tasks to p-queue immediately (p-queue handles concurrency)
|
|
966
|
+
for await (const input of iterable) {
|
|
967
|
+
const task = queue
|
|
968
|
+
.add(() => mapper(input))
|
|
969
|
+
.then((value) => ({
|
|
970
|
+
value,
|
|
971
|
+
self: task,
|
|
972
|
+
}));
|
|
973
|
+
pending.add(task);
|
|
974
|
+
}
|
|
975
|
+
// Yield results as they complete
|
|
976
|
+
while (pending.size > 0) {
|
|
977
|
+
const { value, self } = await Promise.race(pending);
|
|
978
|
+
pending.delete(self);
|
|
979
|
+
yield value;
|
|
980
|
+
}
|
|
981
|
+
}
|
|
871
982
|
function _isCallable(target) {
|
|
872
983
|
return Boolean(typeof target === "function" ||
|
|
873
984
|
("invoke" in target && typeof target.invoke === "function"));
|
|
@@ -2,6 +2,7 @@ import { Client } from "../index.js";
|
|
|
2
2
|
import { AttachmentInfo, Example, KVMap, Run, TracerSession } from "../schemas.js";
|
|
3
3
|
import { EvaluationResult, EvaluationResults, RunEvaluator } from "./evaluator.js";
|
|
4
4
|
import { ComparisonEvaluationResults, ComparativeEvaluator } from "./evaluate_comparative.js";
|
|
5
|
+
import { PQueueType } from "../utils/p-queue.js";
|
|
5
6
|
export type TargetConfigT = KVMap & {
|
|
6
7
|
attachments?: Record<string, AttachmentInfo>;
|
|
7
8
|
callbacks?: any;
|
|
@@ -55,6 +56,7 @@ export type EvaluatorT = DeprecatedRunEvaluator | DeprecatedFunctionEvaluator |
|
|
|
55
56
|
interface _ForwardResults {
|
|
56
57
|
run: Run;
|
|
57
58
|
example: Example;
|
|
59
|
+
exampleIndex: number;
|
|
58
60
|
}
|
|
59
61
|
interface _ExperimentManagerArgs {
|
|
60
62
|
data?: DataT;
|
|
@@ -67,6 +69,7 @@ interface _ExperimentManagerArgs {
|
|
|
67
69
|
examples?: Example[];
|
|
68
70
|
numRepetitions?: number;
|
|
69
71
|
_runsArray?: Run[];
|
|
72
|
+
resultRows?: AsyncGenerator<_ExperimentResultRowWithIndex>;
|
|
70
73
|
includeAttachments?: boolean;
|
|
71
74
|
}
|
|
72
75
|
type BaseEvaluateOptions = {
|
|
@@ -85,7 +88,8 @@ type BaseEvaluateOptions = {
|
|
|
85
88
|
*/
|
|
86
89
|
description?: string;
|
|
87
90
|
/**
|
|
88
|
-
* The maximum
|
|
91
|
+
* The maximum concurrency to use for predictions/evaluations when a more
|
|
92
|
+
* specific concurrency option is not provided.
|
|
89
93
|
* @default undefined
|
|
90
94
|
*/
|
|
91
95
|
maxConcurrency?: number;
|
|
@@ -102,6 +106,18 @@ type BaseEvaluateOptions = {
|
|
|
102
106
|
numRepetitions?: number;
|
|
103
107
|
};
|
|
104
108
|
export interface EvaluateOptions extends BaseEvaluateOptions {
|
|
109
|
+
/**
|
|
110
|
+
* The maximum number of concurrent predictions to run.
|
|
111
|
+
* If not provided, defaults to `maxConcurrency` when set.
|
|
112
|
+
* @default undefined
|
|
113
|
+
*/
|
|
114
|
+
targetConcurrency?: number;
|
|
115
|
+
/**
|
|
116
|
+
* The maximum number of concurrent evaluators to run.
|
|
117
|
+
* If not provided, defaults to `maxConcurrency` when set.
|
|
118
|
+
* @default undefined
|
|
119
|
+
*/
|
|
120
|
+
evaluationConcurrency?: number;
|
|
105
121
|
/**
|
|
106
122
|
* A list of evaluators to run on each example.
|
|
107
123
|
* @default undefined
|
|
@@ -146,6 +162,13 @@ export interface ExperimentResultRow {
|
|
|
146
162
|
example: Example;
|
|
147
163
|
evaluationResults: EvaluationResults;
|
|
148
164
|
}
|
|
165
|
+
interface _ExperimentResultRowWithIndex extends ExperimentResultRow {
|
|
166
|
+
exampleIndex: number;
|
|
167
|
+
}
|
|
168
|
+
export declare function _reorderResultRowsByExampleIndex(rows: _ExperimentResultRowWithIndex[]): {
|
|
169
|
+
orderedRows: ExperimentResultRow[];
|
|
170
|
+
orderedRuns: Run[];
|
|
171
|
+
};
|
|
149
172
|
/**
|
|
150
173
|
* Manage the execution of experiments.
|
|
151
174
|
*
|
|
@@ -157,6 +180,7 @@ export declare class _ExperimentManager {
|
|
|
157
180
|
_runs?: AsyncGenerator<Run>;
|
|
158
181
|
_evaluationResults?: AsyncGenerator<EvaluationResults>;
|
|
159
182
|
_summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
|
|
183
|
+
_resultRows?: AsyncGenerator<_ExperimentResultRowWithIndex>;
|
|
160
184
|
_examples?: Example[];
|
|
161
185
|
_numRepetitions?: number;
|
|
162
186
|
_runsArray?: Run[];
|
|
@@ -181,12 +205,14 @@ export declare class _ExperimentManager {
|
|
|
181
205
|
start(): Promise<_ExperimentManager>;
|
|
182
206
|
withPredictions(target: StandardTargetT, options?: {
|
|
183
207
|
maxConcurrency?: number;
|
|
208
|
+
queue?: PQueueType;
|
|
184
209
|
}): Promise<_ExperimentManager>;
|
|
185
210
|
withEvaluators(evaluators: Array<EvaluatorT | RunEvaluator>, options?: {
|
|
186
211
|
maxConcurrency?: number;
|
|
212
|
+
queue?: PQueueType;
|
|
187
213
|
}): Promise<_ExperimentManager>;
|
|
188
214
|
withSummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): Promise<_ExperimentManager>;
|
|
189
|
-
getResults(): AsyncGenerator<
|
|
215
|
+
getResults(): AsyncGenerator<_ExperimentResultRowWithIndex>;
|
|
190
216
|
getSummaryScores(): Promise<EvaluationResults>;
|
|
191
217
|
/**
|
|
192
218
|
* Run the target function or runnable on the examples.
|
|
@@ -196,10 +222,11 @@ export declare class _ExperimentManager {
|
|
|
196
222
|
*/
|
|
197
223
|
_predict(target: StandardTargetT, options?: {
|
|
198
224
|
maxConcurrency?: number;
|
|
225
|
+
queue?: PQueueType;
|
|
199
226
|
}): AsyncGenerator<_ForwardResults>;
|
|
200
|
-
_runEvaluators(evaluators: Array<RunEvaluator>, currentResults:
|
|
227
|
+
_runEvaluators(evaluators: Array<RunEvaluator>, currentResults: _ExperimentResultRowWithIndex, fields: {
|
|
201
228
|
client: Client;
|
|
202
|
-
}): Promise<
|
|
229
|
+
}): Promise<_ExperimentResultRowWithIndex>;
|
|
203
230
|
/**
|
|
204
231
|
* Run the evaluators on the prediction stream.
|
|
205
232
|
* Expects runs to be available in the manager.
|
|
@@ -209,7 +236,8 @@ export declare class _ExperimentManager {
|
|
|
209
236
|
*/
|
|
210
237
|
_score(evaluators: Array<RunEvaluator>, options?: {
|
|
211
238
|
maxConcurrency?: number;
|
|
212
|
-
|
|
239
|
+
queue?: PQueueType;
|
|
240
|
+
}): AsyncGenerator<_ExperimentResultRowWithIndex>;
|
|
213
241
|
_applySummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults>>;
|
|
214
242
|
_getDatasetVersion(): Promise<string | undefined>;
|
|
215
243
|
_getDatasetSplits(): Promise<string[] | undefined>;
|
|
@@ -233,4 +261,10 @@ declare class ExperimentResults implements AsyncIterableIterator<ExperimentResul
|
|
|
233
261
|
processData(manager: _ExperimentManager): Promise<void>;
|
|
234
262
|
get length(): number;
|
|
235
263
|
}
|
|
264
|
+
/**
|
|
265
|
+
* Map over an iterable with bounded concurrency using p-queue.
|
|
266
|
+
* Results are yielded as soon as they resolve (input order is not preserved).
|
|
267
|
+
* The queue handles concurrency limits internally.
|
|
268
|
+
*/
|
|
269
|
+
export declare function _mapWithConcurrency<TInput, TOutput>(iterable: Iterable<TInput> | AsyncIterable<TInput>, queue: PQueueType, mapper: (value: TInput) => Promise<TOutput>): AsyncGenerator<TOutput>;
|
|
236
270
|
export {};
|