langsmith 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/client.cjs +69 -29
- package/dist/client.d.ts +7 -3
- package/dist/client.js +46 -6
- package/dist/evaluation/_random_name.cjs +730 -0
- package/dist/evaluation/_random_name.d.ts +5 -0
- package/dist/evaluation/_random_name.js +726 -0
- package/dist/evaluation/_runner.cjs +709 -0
- package/dist/evaluation/_runner.d.ts +158 -0
- package/dist/evaluation/_runner.js +705 -0
- package/dist/evaluation/evaluator.cjs +86 -0
- package/dist/evaluation/evaluator.d.ts +31 -27
- package/dist/evaluation/evaluator.js +83 -1
- package/dist/evaluation/index.cjs +3 -1
- package/dist/evaluation/index.d.ts +1 -0
- package/dist/evaluation/index.js +1 -0
- package/dist/index.cjs +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/run_trees.d.ts +1 -0
- package/dist/schemas.d.ts +22 -1
- package/dist/traceable.cjs +64 -7
- package/dist/traceable.d.ts +2 -0
- package/dist/traceable.js +62 -6
- package/dist/utils/_git.cjs +72 -0
- package/dist/utils/_git.d.ts +14 -0
- package/dist/utils/_git.js +67 -0
- package/dist/utils/_uuid.cjs +33 -0
- package/dist/utils/_uuid.d.ts +1 -0
- package/dist/utils/_uuid.js +6 -0
- package/dist/utils/atee.cjs +24 -0
- package/dist/utils/atee.d.ts +1 -0
- package/dist/utils/atee.js +20 -0
- package/package.json +1 -1
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { Client } from "../index.js";
|
|
2
|
+
import { Example, KVMap, Run, TracerSession } from "../schemas.js";
|
|
3
|
+
import { EvaluationResult, EvaluationResults, RunEvaluator } from "./evaluator.js";
|
|
4
|
+
type TargetT = ((input: KVMap, config?: KVMap) => Promise<KVMap>) | ((input: KVMap, config?: KVMap) => KVMap) | {
|
|
5
|
+
invoke: (input: KVMap, config?: KVMap) => KVMap;
|
|
6
|
+
} | {
|
|
7
|
+
invoke: (input: KVMap, config?: KVMap) => Promise<KVMap>;
|
|
8
|
+
};
|
|
9
|
+
type TargetNoInvoke = ((input: KVMap, config?: KVMap) => Promise<KVMap>) | ((input: KVMap, config?: KVMap) => KVMap);
|
|
10
|
+
type DataT = string | AsyncIterable<Example> | Example[];
|
|
11
|
+
type SummaryEvaluatorT = ((runs: Array<Run>, examples: Array<Example>) => Promise<EvaluationResult | EvaluationResults>) | ((runs: Array<Run>, examples: Array<Example>) => EvaluationResult | EvaluationResults);
|
|
12
|
+
type EvaluatorT = RunEvaluator | ((run: Run, example?: Example) => EvaluationResult) | ((run: Run, example?: Example) => Promise<EvaluationResult>);
|
|
13
|
+
interface _ForwardResults {
|
|
14
|
+
run: Run;
|
|
15
|
+
example: Example;
|
|
16
|
+
}
|
|
17
|
+
interface _ExperimentManagerArgs {
|
|
18
|
+
data?: DataT;
|
|
19
|
+
experiment?: TracerSession | string;
|
|
20
|
+
metadata?: KVMap;
|
|
21
|
+
client?: Client;
|
|
22
|
+
runs?: AsyncGenerator<Run>;
|
|
23
|
+
evaluationResults?: AsyncGenerator<EvaluationResults>;
|
|
24
|
+
summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
|
|
25
|
+
examples?: Example[];
|
|
26
|
+
_runsArray?: Run[];
|
|
27
|
+
}
|
|
28
|
+
export interface EvaluateOptions {
|
|
29
|
+
/**
|
|
30
|
+
* The dataset to evaluate on. Can be a dataset name, a list of
|
|
31
|
+
* examples, or a generator of examples.
|
|
32
|
+
*/
|
|
33
|
+
data: DataT;
|
|
34
|
+
/**
|
|
35
|
+
* A list of evaluators to run on each example.
|
|
36
|
+
* @default undefined
|
|
37
|
+
*/
|
|
38
|
+
evaluators?: Array<EvaluatorT>;
|
|
39
|
+
/**
|
|
40
|
+
* A list of summary evaluators to run on the entire dataset.
|
|
41
|
+
* @default undefined
|
|
42
|
+
*/
|
|
43
|
+
summaryEvaluators?: Array<SummaryEvaluatorT>;
|
|
44
|
+
/**
|
|
45
|
+
* Metadata to attach to the experiment.
|
|
46
|
+
* @default undefined
|
|
47
|
+
*/
|
|
48
|
+
metadata?: KVMap;
|
|
49
|
+
/**
|
|
50
|
+
* A prefix to provide for your experiment name.
|
|
51
|
+
* @default undefined
|
|
52
|
+
*/
|
|
53
|
+
experimentPrefix?: string;
|
|
54
|
+
/**
|
|
55
|
+
* The maximum number of concurrent evaluations to run.
|
|
56
|
+
* @default undefined
|
|
57
|
+
*/
|
|
58
|
+
maxConcurrency?: number;
|
|
59
|
+
/**
|
|
60
|
+
* The LangSmith client to use.
|
|
61
|
+
* @default undefined
|
|
62
|
+
*/
|
|
63
|
+
client?: Client;
|
|
64
|
+
}
|
|
65
|
+
export declare function evaluate(
|
|
66
|
+
/**
|
|
67
|
+
* The target system or function to evaluate.
|
|
68
|
+
*/
|
|
69
|
+
target: TargetT, options: EvaluateOptions): Promise<ExperimentResults>;
|
|
70
|
+
interface ExperimentResultRow {
|
|
71
|
+
run: Run;
|
|
72
|
+
example: Example;
|
|
73
|
+
evaluationResults: EvaluationResults;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Manage the execution of experiments.
|
|
77
|
+
*
|
|
78
|
+
* Supports lazily running predictions and evaluations in parallel to facilitate
|
|
79
|
+
* result streaming and early debugging.
|
|
80
|
+
*/
|
|
81
|
+
declare class _ExperimentManager {
|
|
82
|
+
_data?: DataT;
|
|
83
|
+
_runs?: AsyncGenerator<Run>;
|
|
84
|
+
_evaluationResults?: AsyncGenerator<EvaluationResults>;
|
|
85
|
+
_summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
|
|
86
|
+
_examples?: Example[];
|
|
87
|
+
_runsArray?: Run[];
|
|
88
|
+
client: Client;
|
|
89
|
+
_experiment?: TracerSession;
|
|
90
|
+
_experimentName: string;
|
|
91
|
+
_metadata: KVMap;
|
|
92
|
+
get experimentName(): string;
|
|
93
|
+
getExamples(): Promise<Array<Example>>;
|
|
94
|
+
setExamples(examples: Example[]): void;
|
|
95
|
+
get datasetId(): Promise<string>;
|
|
96
|
+
get evaluationResults(): AsyncGenerator<EvaluationResults>;
|
|
97
|
+
get runs(): AsyncGenerator<Run>;
|
|
98
|
+
constructor(args: _ExperimentManagerArgs);
|
|
99
|
+
_getExperiment(): TracerSession;
|
|
100
|
+
_getExperimentMetadata(): Promise<KVMap>;
|
|
101
|
+
_getProject(firstExample: Example): Promise<TracerSession>;
|
|
102
|
+
_printExperimentStart(): void;
|
|
103
|
+
start(): Promise<_ExperimentManager>;
|
|
104
|
+
withPredictions(target: TargetNoInvoke, options?: {
|
|
105
|
+
maxConcurrency?: number;
|
|
106
|
+
}): Promise<_ExperimentManager>;
|
|
107
|
+
withEvaluators(evaluators: Array<EvaluatorT | RunEvaluator>, options?: {
|
|
108
|
+
maxConcurrency?: number;
|
|
109
|
+
}): Promise<_ExperimentManager>;
|
|
110
|
+
withSummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): Promise<_ExperimentManager>;
|
|
111
|
+
getResults(): AsyncGenerator<ExperimentResultRow>;
|
|
112
|
+
getSummaryScores(): Promise<EvaluationResults>;
|
|
113
|
+
/**
|
|
114
|
+
* Run the target function on the examples.
|
|
115
|
+
* @param {TargetNoInvoke} target The target function to evaluate.
|
|
116
|
+
* @param options
|
|
117
|
+
* @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
|
|
118
|
+
*/
|
|
119
|
+
_predict(target: TargetNoInvoke, options?: {
|
|
120
|
+
maxConcurrency?: number;
|
|
121
|
+
}): AsyncGenerator<_ForwardResults>;
|
|
122
|
+
_runEvaluators(evaluators: Array<RunEvaluator>, currentResults: ExperimentResultRow, fields: {
|
|
123
|
+
experimentName: string;
|
|
124
|
+
client: Client;
|
|
125
|
+
}): Promise<ExperimentResultRow>;
|
|
126
|
+
/**
|
|
127
|
+
* Run the evaluators on the prediction stream.
|
|
128
|
+
* Expects runs to be available in the manager.
|
|
129
|
+
* (e.g. from a previous prediction step)
|
|
130
|
+
* @param {Array<RunEvaluator>} evaluators
|
|
131
|
+
* @param {number} maxConcurrency
|
|
132
|
+
*/
|
|
133
|
+
_score(evaluators: Array<RunEvaluator>, options?: {
|
|
134
|
+
maxConcurrency?: number;
|
|
135
|
+
}): AsyncGenerator<ExperimentResultRow>;
|
|
136
|
+
_applySummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults>>;
|
|
137
|
+
_getDatasetVersion(): Promise<string | undefined>;
|
|
138
|
+
_end(): Promise<void>;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Represents the results of an evaluate() call.
|
|
142
|
+
* This class provides an iterator interface to iterate over the experiment results
|
|
143
|
+
* as they become available. It also provides methods to access the experiment name,
|
|
144
|
+
* the number of results, and to wait for the results to be processed.
|
|
145
|
+
*/
|
|
146
|
+
declare class ExperimentResults implements AsyncIterableIterator<ExperimentResultRow> {
|
|
147
|
+
private manager;
|
|
148
|
+
results: ExperimentResultRow[];
|
|
149
|
+
processedCount: number;
|
|
150
|
+
summaryResults: EvaluationResults;
|
|
151
|
+
constructor(experimentManager: _ExperimentManager);
|
|
152
|
+
get experimentName(): string;
|
|
153
|
+
[Symbol.asyncIterator](): AsyncIterableIterator<ExperimentResultRow>;
|
|
154
|
+
next(): Promise<IteratorResult<ExperimentResultRow>>;
|
|
155
|
+
processData(manager: _ExperimentManager): Promise<void>;
|
|
156
|
+
get length(): number;
|
|
157
|
+
}
|
|
158
|
+
export {};
|