langsmith 0.1.21 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,158 @@
1
+ import { Client } from "../index.js";
2
+ import { Example, KVMap, Run, TracerSession } from "../schemas.js";
3
+ import { EvaluationResult, EvaluationResults, RunEvaluator } from "./evaluator.js";
4
+ type TargetT = ((input: KVMap, config?: KVMap) => Promise<KVMap>) | ((input: KVMap, config?: KVMap) => KVMap) | {
5
+ invoke: (input: KVMap, config?: KVMap) => KVMap;
6
+ } | {
7
+ invoke: (input: KVMap, config?: KVMap) => Promise<KVMap>;
8
+ };
9
+ type TargetNoInvoke = ((input: KVMap, config?: KVMap) => Promise<KVMap>) | ((input: KVMap, config?: KVMap) => KVMap);
10
+ type DataT = string | AsyncIterable<Example> | Example[];
11
+ type SummaryEvaluatorT = ((runs: Array<Run>, examples: Array<Example>) => Promise<EvaluationResult | EvaluationResults>) | ((runs: Array<Run>, examples: Array<Example>) => EvaluationResult | EvaluationResults);
12
+ type EvaluatorT = RunEvaluator | ((run: Run, example?: Example) => EvaluationResult) | ((run: Run, example?: Example) => Promise<EvaluationResult>);
13
+ interface _ForwardResults {
14
+ run: Run;
15
+ example: Example;
16
+ }
17
+ interface _ExperimentManagerArgs {
18
+ data?: DataT;
19
+ experiment?: TracerSession | string;
20
+ metadata?: KVMap;
21
+ client?: Client;
22
+ runs?: AsyncGenerator<Run>;
23
+ evaluationResults?: AsyncGenerator<EvaluationResults>;
24
+ summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
25
+ examples?: Example[];
26
+ _runsArray?: Run[];
27
+ }
28
+ export interface EvaluateOptions {
29
+ /**
30
+ * The dataset to evaluate on. Can be a dataset name, a list of
31
+ * examples, or a generator of examples.
32
+ */
33
+ data: DataT;
34
+ /**
35
+ * A list of evaluators to run on each example.
36
+ * @default undefined
37
+ */
38
+ evaluators?: Array<EvaluatorT>;
39
+ /**
40
+ * A list of summary evaluators to run on the entire dataset.
41
+ * @default undefined
42
+ */
43
+ summaryEvaluators?: Array<SummaryEvaluatorT>;
44
+ /**
45
+ * Metadata to attach to the experiment.
46
+ * @default undefined
47
+ */
48
+ metadata?: KVMap;
49
+ /**
50
+ * A prefix to provide for your experiment name.
51
+ * @default undefined
52
+ */
53
+ experimentPrefix?: string;
54
+ /**
55
+ * The maximum number of concurrent evaluations to run.
56
+ * @default undefined
57
+ */
58
+ maxConcurrency?: number;
59
+ /**
60
+ * The LangSmith client to use.
61
+ * @default undefined
62
+ */
63
+ client?: Client;
64
+ }
65
+ export declare function evaluate(
66
+ /**
67
+ * The target system or function to evaluate.
68
+ */
69
+ target: TargetT, options: EvaluateOptions): Promise<ExperimentResults>;
70
+ interface ExperimentResultRow {
71
+ run: Run;
72
+ example: Example;
73
+ evaluationResults: EvaluationResults;
74
+ }
75
+ /**
76
+ * Manage the execution of experiments.
77
+ *
78
+ * Supports lazily running predictions and evaluations in parallel to facilitate
79
+ * result streaming and early debugging.
80
+ */
81
+ declare class _ExperimentManager {
82
+ _data?: DataT;
83
+ _runs?: AsyncGenerator<Run>;
84
+ _evaluationResults?: AsyncGenerator<EvaluationResults>;
85
+ _summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
86
+ _examples?: Example[];
87
+ _runsArray?: Run[];
88
+ client: Client;
89
+ _experiment?: TracerSession;
90
+ _experimentName: string;
91
+ _metadata: KVMap;
92
+ get experimentName(): string;
93
+ getExamples(): Promise<Array<Example>>;
94
+ setExamples(examples: Example[]): void;
95
+ get datasetId(): Promise<string>;
96
+ get evaluationResults(): AsyncGenerator<EvaluationResults>;
97
+ get runs(): AsyncGenerator<Run>;
98
+ constructor(args: _ExperimentManagerArgs);
99
+ _getExperiment(): TracerSession;
100
+ _getExperimentMetadata(): Promise<KVMap>;
101
+ _getProject(firstExample: Example): Promise<TracerSession>;
102
+ _printExperimentStart(): void;
103
+ start(): Promise<_ExperimentManager>;
104
+ withPredictions(target: TargetNoInvoke, options?: {
105
+ maxConcurrency?: number;
106
+ }): Promise<_ExperimentManager>;
107
+ withEvaluators(evaluators: Array<EvaluatorT | RunEvaluator>, options?: {
108
+ maxConcurrency?: number;
109
+ }): Promise<_ExperimentManager>;
110
+ withSummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): Promise<_ExperimentManager>;
111
+ getResults(): AsyncGenerator<ExperimentResultRow>;
112
+ getSummaryScores(): Promise<EvaluationResults>;
113
+ /**
114
+ * Run the target function on the examples.
115
+ * @param {TargetNoInvoke} target The target function to evaluate.
116
+ * @param options
117
+ * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
118
+ */
119
+ _predict(target: TargetNoInvoke, options?: {
120
+ maxConcurrency?: number;
121
+ }): AsyncGenerator<_ForwardResults>;
122
+ _runEvaluators(evaluators: Array<RunEvaluator>, currentResults: ExperimentResultRow, fields: {
123
+ experimentName: string;
124
+ client: Client;
125
+ }): Promise<ExperimentResultRow>;
126
+ /**
127
+ * Run the evaluators on the prediction stream.
128
+ * Expects runs to be available in the manager.
129
+ * (e.g. from a previous prediction step)
130
+ * @param {Array<RunEvaluator>} evaluators
131
+ * @param {number} maxConcurrency
132
+ */
133
+ _score(evaluators: Array<RunEvaluator>, options?: {
134
+ maxConcurrency?: number;
135
+ }): AsyncGenerator<ExperimentResultRow>;
136
+ _applySummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults>>;
137
+ _getDatasetVersion(): Promise<string | undefined>;
138
+ _end(): Promise<void>;
139
+ }
140
+ /**
141
+ * Represents the results of an evaluate() call.
142
+ * This class provides an iterator interface to iterate over the experiment results
143
+ * as they become available. It also provides methods to access the experiment name,
144
+ * the number of results, and to wait for the results to be processed.
145
+ */
146
+ declare class ExperimentResults implements AsyncIterableIterator<ExperimentResultRow> {
147
+ private manager;
148
+ results: ExperimentResultRow[];
149
+ processedCount: number;
150
+ summaryResults: EvaluationResults;
151
+ constructor(experimentManager: _ExperimentManager);
152
+ get experimentName(): string;
153
+ [Symbol.asyncIterator](): AsyncIterableIterator<ExperimentResultRow>;
154
+ next(): Promise<IteratorResult<ExperimentResultRow>>;
155
+ processData(manager: _ExperimentManager): Promise<void>;
156
+ get length(): number;
157
+ }
158
+ export {};