langchain 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/load/import_map.cjs +2 -1
- package/dist/load/import_map.d.ts +1 -0
- package/dist/load/import_map.js +1 -0
- package/dist/smith/config.cjs +2 -0
- package/dist/smith/config.d.ts +166 -0
- package/dist/smith/config.js +1 -0
- package/dist/smith/index.cjs +5 -0
- package/dist/smith/index.d.ts +3 -0
- package/dist/smith/index.js +2 -0
- package/dist/smith/name_generation.cjs +726 -0
- package/dist/smith/name_generation.d.ts +1 -0
- package/dist/smith/name_generation.js +722 -0
- package/dist/smith/progress.cjs +69 -0
- package/dist/smith/progress.d.ts +22 -0
- package/dist/smith/progress.js +65 -0
- package/dist/smith/runner_utils.cjs +353 -0
- package/dist/smith/runner_utils.d.ts +77 -0
- package/dist/smith/runner_utils.js +349 -0
- package/package.json +11 -3
- package/smith.cjs +1 -0
- package/smith.d.ts +1 -0
- package/smith.js +1 -0
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
import { mapStoredMessagesToChatMessages } from "@langchain/core/messages";
|
|
2
|
+
import { Runnable, RunnableLambda } from "@langchain/core/runnables";
|
|
3
|
+
import { RunCollectorCallbackHandler } from "@langchain/core/tracers/run_collector";
|
|
4
|
+
import { LangChainTracer } from "@langchain/core/tracers/tracer_langchain";
|
|
5
|
+
import { Client } from "langsmith";
|
|
6
|
+
import { loadEvaluator } from "../evaluation/loader.js";
|
|
7
|
+
import { randomName } from "./name_generation.js";
|
|
8
|
+
import { ProgressBar } from "./progress.js";
|
|
9
|
+
/**
|
|
10
|
+
* Wraps an evaluator function + implements the RunEvaluator interface.
|
|
11
|
+
*/
|
|
12
|
+
class DynamicRunEvaluator {
|
|
13
|
+
constructor(evaluator) {
|
|
14
|
+
Object.defineProperty(this, "evaluator", {
|
|
15
|
+
enumerable: true,
|
|
16
|
+
configurable: true,
|
|
17
|
+
writable: true,
|
|
18
|
+
value: void 0
|
|
19
|
+
});
|
|
20
|
+
this.evaluator = new RunnableLambda({ func: evaluator });
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Evaluates a run with an optional example and returns the evaluation result.
|
|
24
|
+
* @param run The run to evaluate.
|
|
25
|
+
* @param example The optional example to use for evaluation.
|
|
26
|
+
* @returns A promise that resolves to the evaluation result.
|
|
27
|
+
*/
|
|
28
|
+
async evaluateRun(run, example) {
|
|
29
|
+
return await this.evaluator.invoke({ run, example });
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
function isLLMStringEvaluator(evaluator) {
|
|
33
|
+
return evaluator && typeof evaluator.evaluateStrings === "function";
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Wraps an off-the-shelf evaluator (loaded using loadEvaluator; of EvaluatorType[T])
|
|
37
|
+
* and composes with a prepareData function so the user can prepare the trace and
|
|
38
|
+
* dataset data for the evaluator.
|
|
39
|
+
*/
|
|
40
|
+
class PreparedRunEvaluator {
|
|
41
|
+
constructor(evaluator, evaluationName, formatEvaluatorInputs) {
|
|
42
|
+
Object.defineProperty(this, "evaluator", {
|
|
43
|
+
enumerable: true,
|
|
44
|
+
configurable: true,
|
|
45
|
+
writable: true,
|
|
46
|
+
value: void 0
|
|
47
|
+
});
|
|
48
|
+
Object.defineProperty(this, "formatEvaluatorInputs", {
|
|
49
|
+
enumerable: true,
|
|
50
|
+
configurable: true,
|
|
51
|
+
writable: true,
|
|
52
|
+
value: void 0
|
|
53
|
+
});
|
|
54
|
+
Object.defineProperty(this, "isStringEvaluator", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
configurable: true,
|
|
57
|
+
writable: true,
|
|
58
|
+
value: void 0
|
|
59
|
+
});
|
|
60
|
+
Object.defineProperty(this, "evaluationName", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
configurable: true,
|
|
63
|
+
writable: true,
|
|
64
|
+
value: void 0
|
|
65
|
+
});
|
|
66
|
+
this.evaluator = evaluator;
|
|
67
|
+
this.isStringEvaluator = typeof evaluator?.evaluateStrings === "function";
|
|
68
|
+
this.evaluationName = evaluationName;
|
|
69
|
+
this.formatEvaluatorInputs = formatEvaluatorInputs;
|
|
70
|
+
}
|
|
71
|
+
static async fromEvalConfig(config) {
|
|
72
|
+
const evaluatorType = typeof config === "string" ? config : config.evaluatorType;
|
|
73
|
+
const evalConfig = typeof config === "string" ? {} : config;
|
|
74
|
+
const evaluator = await loadEvaluator(evaluatorType, evalConfig);
|
|
75
|
+
const feedbackKey = evalConfig?.feedbackKey ?? evaluator?.evaluationName;
|
|
76
|
+
if (!feedbackKey) {
|
|
77
|
+
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
|
|
78
|
+
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
|
|
79
|
+
}
|
|
80
|
+
if (!isLLMStringEvaluator(evaluator)) {
|
|
81
|
+
throw new Error(`Evaluator of type ${evaluatorType} not yet supported. ` +
|
|
82
|
+
"Please use a string evaluator, or implement your " +
|
|
83
|
+
"evaluation logic as a customEvaluator.");
|
|
84
|
+
}
|
|
85
|
+
return new PreparedRunEvaluator(evaluator, feedbackKey, evalConfig?.formatEvaluatorInputs);
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Evaluates a run with an optional example and returns the evaluation result.
|
|
89
|
+
* @param run The run to evaluate.
|
|
90
|
+
* @param example The optional example to use for evaluation.
|
|
91
|
+
* @returns A promise that resolves to the evaluation result.
|
|
92
|
+
*/
|
|
93
|
+
async evaluateRun(run, example) {
|
|
94
|
+
const { prediction, input, reference } = this.formatEvaluatorInputs({
|
|
95
|
+
rawInput: run.inputs,
|
|
96
|
+
rawPrediction: run.outputs,
|
|
97
|
+
rawReferenceOutput: example?.outputs,
|
|
98
|
+
run,
|
|
99
|
+
});
|
|
100
|
+
if (this.isStringEvaluator) {
|
|
101
|
+
const evalResult = await this.evaluator.evaluateStrings({
|
|
102
|
+
prediction: prediction,
|
|
103
|
+
reference: reference,
|
|
104
|
+
input: input,
|
|
105
|
+
});
|
|
106
|
+
return {
|
|
107
|
+
key: this.evaluationName,
|
|
108
|
+
comment: evalResult?.reasoning,
|
|
109
|
+
...evalResult,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
throw new Error("Evaluator not yet supported. " +
|
|
113
|
+
"Please use a string evaluator, or implement your " +
|
|
114
|
+
"evaluation logic as a customEvaluator.");
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
class LoadedEvalConfig {
|
|
118
|
+
constructor(evaluators) {
|
|
119
|
+
Object.defineProperty(this, "evaluators", {
|
|
120
|
+
enumerable: true,
|
|
121
|
+
configurable: true,
|
|
122
|
+
writable: true,
|
|
123
|
+
value: evaluators
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
static async fromRunEvalConfig(config) {
|
|
127
|
+
// Custom evaluators are applied "as-is"
|
|
128
|
+
const customEvaluators = config?.customEvaluators?.map((evaluator) => {
|
|
129
|
+
if (typeof evaluator === "function") {
|
|
130
|
+
return new DynamicRunEvaluator(evaluator);
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
return evaluator;
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
const offTheShelfEvaluators = await Promise.all(config?.evaluators?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
|
|
137
|
+
return new LoadedEvalConfig((customEvaluators ?? []).concat(offTheShelfEvaluators ?? []));
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Internals expect a constructor () -> Runnable. This function wraps/coerces
|
|
142
|
+
* the provided LangChain object, custom function, or factory function into
|
|
143
|
+
* a constructor of a runnable.
|
|
144
|
+
* @param modelOrFactory The model or factory to create a wrapped model from.
|
|
145
|
+
* @returns A function that returns the wrapped model.
|
|
146
|
+
* @throws Error if the modelOrFactory is invalid.
|
|
147
|
+
*/
|
|
148
|
+
const createWrappedModel = async (modelOrFactory) => {
|
|
149
|
+
if (Runnable.isRunnable(modelOrFactory)) {
|
|
150
|
+
return () => modelOrFactory;
|
|
151
|
+
}
|
|
152
|
+
if (typeof modelOrFactory === "function") {
|
|
153
|
+
try {
|
|
154
|
+
// If it works with no arguments, assume it's a factory
|
|
155
|
+
let res = modelOrFactory();
|
|
156
|
+
if (res &&
|
|
157
|
+
typeof res.then === "function") {
|
|
158
|
+
res = await res;
|
|
159
|
+
}
|
|
160
|
+
return modelOrFactory;
|
|
161
|
+
}
|
|
162
|
+
catch (err) {
|
|
163
|
+
// Otherwise, it's a custom UDF, and we'll wrap
|
|
164
|
+
// in a lambda
|
|
165
|
+
const wrappedModel = new RunnableLambda({ func: modelOrFactory });
|
|
166
|
+
return () => wrappedModel;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
throw new Error("Invalid modelOrFactory");
|
|
170
|
+
};
|
|
171
|
+
const loadExamples = async ({ datasetName, client, projectName, }) => {
|
|
172
|
+
const exampleIterator = client.listExamples({ datasetName });
|
|
173
|
+
const configs = [];
|
|
174
|
+
const runCollectors = [];
|
|
175
|
+
const examples = [];
|
|
176
|
+
for await (const example of exampleIterator) {
|
|
177
|
+
const runCollector = new RunCollectorCallbackHandler({
|
|
178
|
+
exampleId: example.id,
|
|
179
|
+
});
|
|
180
|
+
configs.push({
|
|
181
|
+
callbacks: [
|
|
182
|
+
new LangChainTracer({ exampleId: example.id, projectName }),
|
|
183
|
+
runCollector,
|
|
184
|
+
],
|
|
185
|
+
});
|
|
186
|
+
examples.push(example);
|
|
187
|
+
runCollectors.push(runCollector);
|
|
188
|
+
}
|
|
189
|
+
return {
|
|
190
|
+
configs,
|
|
191
|
+
examples,
|
|
192
|
+
runCollectors,
|
|
193
|
+
};
|
|
194
|
+
};
|
|
195
|
+
const applyEvaluators = async ({ evaluation, runs, examples, client, }) => {
|
|
196
|
+
// TODO: Parallelize and/or put in callbacks to speed up evals.
|
|
197
|
+
const { evaluators } = evaluation;
|
|
198
|
+
const progress = new ProgressBar({
|
|
199
|
+
total: examples.length,
|
|
200
|
+
format: "Running Evaluators: {bar} {percentage}% | {value}/{total}\n",
|
|
201
|
+
});
|
|
202
|
+
const results = {};
|
|
203
|
+
for (let i = 0; i < runs.length; i += 1) {
|
|
204
|
+
const run = runs[i];
|
|
205
|
+
const example = examples[i];
|
|
206
|
+
const evaluatorResults = await Promise.all(evaluators.map((evaluator) => client.evaluateRun(run, evaluator, {
|
|
207
|
+
referenceExample: example,
|
|
208
|
+
loadChildRuns: false,
|
|
209
|
+
})));
|
|
210
|
+
progress.increment();
|
|
211
|
+
results[example.id] = {
|
|
212
|
+
execution_time: run?.end_time && run.start_time
|
|
213
|
+
? run.end_time - run.start_time
|
|
214
|
+
: undefined,
|
|
215
|
+
feedback: evaluatorResults,
|
|
216
|
+
run_id: run.id,
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
return results;
|
|
220
|
+
};
|
|
221
|
+
const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
222
|
+
if (dataType === "chat") {
|
|
223
|
+
// For some batty reason, we store the chat dataset differently.
|
|
224
|
+
// { type: "system", data: { content: inputs.input } },
|
|
225
|
+
// But we need to create AIMesage, SystemMessage, etc.
|
|
226
|
+
return examples.map(({ inputs }) => mapStoredMessagesToChatMessages(inputs.input));
|
|
227
|
+
}
|
|
228
|
+
// If it's a language model and ALL example inputs have a single value,
|
|
229
|
+
// then we can be friendly and flatten the inputs to a list of strings.
|
|
230
|
+
const isLanguageModel = typeof chainOrFactory === "object" &&
|
|
231
|
+
typeof chainOrFactory._llmType === "function";
|
|
232
|
+
if (isLanguageModel &&
|
|
233
|
+
examples.every(({ inputs }) => Object.keys(inputs).length === 1)) {
|
|
234
|
+
return examples.map(({ inputs }) => Object.values(inputs)[0]);
|
|
235
|
+
}
|
|
236
|
+
return examples.map(({ inputs }) => inputs);
|
|
237
|
+
};
|
|
238
|
+
/**
|
|
239
|
+
* Evaluates a given model or chain against a specified LangSmith dataset.
|
|
240
|
+
*
|
|
241
|
+
* This function fetches example records from the specified dataset,
|
|
242
|
+
* runs the model or chain against each example, and returns the evaluation
|
|
243
|
+
* results.
|
|
244
|
+
*
|
|
245
|
+
* @param chainOrFactory - A model or factory/constructor function to be evaluated. It can be a
|
|
246
|
+
* Runnable instance, a factory function that returns a Runnable, or a user-defined
|
|
247
|
+
* function or factory.
|
|
248
|
+
*
|
|
249
|
+
* @param datasetName - The name of the dataset against which the evaluation will be
|
|
250
|
+
* performed. This dataset should already be defined and contain the relevant data
|
|
251
|
+
* for evaluation.
|
|
252
|
+
*
|
|
253
|
+
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
254
|
+
* - `evaluation` (RunEvalConfig): Configuration for the evaluation, including
|
|
255
|
+
* standard and custom evaluators.
|
|
256
|
+
* - `projectName` (string): Name of the project for logging and tracking.
|
|
257
|
+
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
258
|
+
* - `client` (Client): Client instance for LangChain service interaction.
|
|
259
|
+
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
|
|
260
|
+
*
|
|
261
|
+
* @returns A promise that resolves to an `EvalResults` object. This object includes
|
|
262
|
+
* detailed results of the evaluation, such as execution time, run IDs, and feedback
|
|
263
|
+
* for each entry in the dataset.
|
|
264
|
+
*
|
|
265
|
+
* @example
|
|
266
|
+
* ```typescript
|
|
267
|
+
* // Example usage for evaluating a model on a dataset
|
|
268
|
+
* async function evaluateModel() {
|
|
269
|
+
* const chain = /* ...create your model or chain...*\//
|
|
270
|
+
* const datasetName = 'example-dataset';
|
|
271
|
+
* const client = new Client(/* ...config... *\//);
|
|
272
|
+
*
|
|
273
|
+
* const evaluationConfig = new RunEvalConfig({
|
|
274
|
+
* evaluators: [/* ...evaluators... *\//],
|
|
275
|
+
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
276
|
+
* });
|
|
277
|
+
*
|
|
278
|
+
* const results = await runOnDataset(chain, datasetName, {
|
|
279
|
+
* evaluationConfig,
|
|
280
|
+
* client,
|
|
281
|
+
* });
|
|
282
|
+
*
|
|
283
|
+
* console.log('Evaluation Results:', results);
|
|
284
|
+
* }
|
|
285
|
+
*
|
|
286
|
+
* evaluateModel();
|
|
287
|
+
* ```
|
|
288
|
+
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
|
|
289
|
+
* a dataset named 'example-dataset'. The evaluation process is configured using `RunEvalConfig`, which can
|
|
290
|
+
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
291
|
+
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
292
|
+
*/
|
|
293
|
+
export const runOnDataset = async (chainOrFactory, datasetName, { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, }) => {
|
|
294
|
+
const wrappedModel = await createWrappedModel(chainOrFactory);
|
|
295
|
+
const testClient = client ?? new Client();
|
|
296
|
+
const testProjectName = projectName ?? randomName();
|
|
297
|
+
const dataset = await testClient.readDataset({ datasetName });
|
|
298
|
+
const datasetId = dataset.id;
|
|
299
|
+
const testConcurrency = maxConcurrency ?? 5;
|
|
300
|
+
const { configs, examples, runCollectors } = await loadExamples({
|
|
301
|
+
datasetName,
|
|
302
|
+
client: testClient,
|
|
303
|
+
projectName: testProjectName,
|
|
304
|
+
maxConcurrency: testConcurrency,
|
|
305
|
+
});
|
|
306
|
+
await testClient.createProject({
|
|
307
|
+
projectName: testProjectName,
|
|
308
|
+
referenceDatasetId: datasetId,
|
|
309
|
+
projectExtra: { metadata: { ...projectMetadata } },
|
|
310
|
+
});
|
|
311
|
+
const wrappedRunnable = new RunnableLambda({
|
|
312
|
+
func: wrappedModel,
|
|
313
|
+
}).withConfig({ runName: "evaluationRun" });
|
|
314
|
+
const runInputs = getExamplesInputs(examples, chainOrFactory, dataset.data_type);
|
|
315
|
+
const progress = new ProgressBar({
|
|
316
|
+
total: runInputs.length,
|
|
317
|
+
format: "Predicting: {bar} {percentage}% | {value}/{total}",
|
|
318
|
+
});
|
|
319
|
+
// TODO: Collect the runs as well.
|
|
320
|
+
await wrappedRunnable
|
|
321
|
+
.withListeners({
|
|
322
|
+
onEnd: () => progress.increment(),
|
|
323
|
+
})
|
|
324
|
+
// TODO: Insert evaluation inline for immediate feedback.
|
|
325
|
+
.batch(runInputs, configs, {
|
|
326
|
+
maxConcurrency,
|
|
327
|
+
returnExceptions: true,
|
|
328
|
+
});
|
|
329
|
+
progress.complete();
|
|
330
|
+
const runs = [];
|
|
331
|
+
for (let i = 0; i < examples.length; i += 1) {
|
|
332
|
+
runs.push(runCollectors[i].tracedRuns[0]);
|
|
333
|
+
}
|
|
334
|
+
let evalResults = {};
|
|
335
|
+
if (evaluationConfig) {
|
|
336
|
+
const loadedEvalConfig = await LoadedEvalConfig.fromRunEvalConfig(evaluationConfig);
|
|
337
|
+
evalResults = await applyEvaluators({
|
|
338
|
+
evaluation: loadedEvalConfig,
|
|
339
|
+
runs,
|
|
340
|
+
examples,
|
|
341
|
+
client: testClient,
|
|
342
|
+
});
|
|
343
|
+
}
|
|
344
|
+
const results = {
|
|
345
|
+
projectName: testProjectName,
|
|
346
|
+
results: evalResults ?? {},
|
|
347
|
+
};
|
|
348
|
+
return results;
|
|
349
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -865,6 +865,9 @@
|
|
|
865
865
|
"evaluation.cjs",
|
|
866
866
|
"evaluation.js",
|
|
867
867
|
"evaluation.d.ts",
|
|
868
|
+
"smith.cjs",
|
|
869
|
+
"smith.js",
|
|
870
|
+
"smith.d.ts",
|
|
868
871
|
"runnables.cjs",
|
|
869
872
|
"runnables.js",
|
|
870
873
|
"runnables.d.ts",
|
|
@@ -1001,7 +1004,7 @@
|
|
|
1001
1004
|
"@google-ai/generativelanguage": "^0.2.1",
|
|
1002
1005
|
"@google-cloud/storage": "^6.10.1",
|
|
1003
1006
|
"@notionhq/client": "^2.2.10",
|
|
1004
|
-
"@pinecone-database/pinecone": "
|
|
1007
|
+
"@pinecone-database/pinecone": "*",
|
|
1005
1008
|
"@supabase/supabase-js": "^2.10.0",
|
|
1006
1009
|
"@vercel/kv": "^0.2.3",
|
|
1007
1010
|
"@xata.io/client": "^0.28.0",
|
|
@@ -1211,7 +1214,7 @@
|
|
|
1211
1214
|
"js-yaml": "^4.1.0",
|
|
1212
1215
|
"jsonpointer": "^5.0.1",
|
|
1213
1216
|
"langchainhub": "~0.0.6",
|
|
1214
|
-
"langsmith": "~0.0.
|
|
1217
|
+
"langsmith": "~0.0.59",
|
|
1215
1218
|
"ml-distance": "^4.0.0",
|
|
1216
1219
|
"openapi-types": "^12.1.3",
|
|
1217
1220
|
"p-retry": "4",
|
|
@@ -2663,6 +2666,11 @@
|
|
|
2663
2666
|
"import": "./evaluation.js",
|
|
2664
2667
|
"require": "./evaluation.cjs"
|
|
2665
2668
|
},
|
|
2669
|
+
"./smith": {
|
|
2670
|
+
"types": "./smith.d.ts",
|
|
2671
|
+
"import": "./smith.js",
|
|
2672
|
+
"require": "./smith.cjs"
|
|
2673
|
+
},
|
|
2666
2674
|
"./runnables": {
|
|
2667
2675
|
"types": "./runnables.d.ts",
|
|
2668
2676
|
"import": "./runnables.js",
|
package/smith.cjs
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('./dist/smith/index.cjs');
|
package/smith.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './dist/smith/index.js'
|
package/smith.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './dist/smith/index.js'
|