langchain 0.1.19-rc.0 → 0.1.19-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/smith/config.cjs +55 -0
- package/dist/smith/config.d.ts +34 -4
- package/dist/smith/config.js +50 -1
- package/dist/smith/runner_utils.cjs +79 -60
- package/dist/smith/runner_utils.d.ts +7 -3
- package/dist/smith/runner_utils.js +81 -62
- package/package.json +2 -2
package/dist/smith/config.cjs
CHANGED
|
@@ -1,2 +1,57 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.LabeledCriteria = exports.Criteria = exports.isCustomEvaluator = exports.isOffTheShelfEvaluator = void 0;
|
|
4
|
+
function isOffTheShelfEvaluator(evaluator) {
|
|
5
|
+
return typeof evaluator === "string" || "evaluatorType" in evaluator;
|
|
6
|
+
}
|
|
7
|
+
exports.isOffTheShelfEvaluator = isOffTheShelfEvaluator;
|
|
8
|
+
function isCustomEvaluator(evaluator) {
|
|
9
|
+
return !isOffTheShelfEvaluator(evaluator);
|
|
10
|
+
}
|
|
11
|
+
exports.isCustomEvaluator = isCustomEvaluator;
|
|
12
|
+
const isStringifiableValue = (value) => typeof value === "string" ||
|
|
13
|
+
typeof value === "number" ||
|
|
14
|
+
typeof value === "boolean" ||
|
|
15
|
+
typeof value === "bigint";
|
|
16
|
+
const getSingleStringifiedValue = (value) => {
|
|
17
|
+
if (isStringifiableValue(value)) {
|
|
18
|
+
return `${value}`;
|
|
19
|
+
}
|
|
20
|
+
if (typeof value === "object" && value != null && !Array.isArray(value)) {
|
|
21
|
+
const entries = Object.entries(value);
|
|
22
|
+
if (entries.length === 1 && isStringifiableValue(entries[0][1])) {
|
|
23
|
+
return `${entries[0][1]}`;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
console.warn("Non-stringifiable value found when coercing", value);
|
|
27
|
+
return `${value}`;
|
|
28
|
+
};
|
|
29
|
+
function Criteria(criteria, config) {
|
|
30
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
31
|
+
((payload) => ({
|
|
32
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
33
|
+
input: getSingleStringifiedValue(payload.rawInput),
|
|
34
|
+
}));
|
|
35
|
+
return {
|
|
36
|
+
evaluatorType: "criteria",
|
|
37
|
+
criteria,
|
|
38
|
+
feedbackKey: config?.feedbackKey ?? criteria,
|
|
39
|
+
formatEvaluatorInputs,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
exports.Criteria = Criteria;
|
|
43
|
+
function LabeledCriteria(criteria, config) {
|
|
44
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
45
|
+
((payload) => ({
|
|
46
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
47
|
+
input: getSingleStringifiedValue(payload.rawInput),
|
|
48
|
+
reference: getSingleStringifiedValue(payload.rawReferenceOutput),
|
|
49
|
+
}));
|
|
50
|
+
return {
|
|
51
|
+
evaluatorType: "labeled_criteria",
|
|
52
|
+
criteria,
|
|
53
|
+
feedbackKey: config?.feedbackKey ?? criteria,
|
|
54
|
+
formatEvaluatorInputs,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
exports.LabeledCriteria = LabeledCriteria;
|
package/dist/smith/config.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { BaseLanguageModel } from "@langchain/core/language_models/base";
|
|
|
2
2
|
import { RunnableConfig } from "@langchain/core/runnables";
|
|
3
3
|
import { Example, Run } from "langsmith";
|
|
4
4
|
import { EvaluationResult, RunEvaluator } from "langsmith/evaluation";
|
|
5
|
-
import { Criteria } from "../evaluation/index.js";
|
|
5
|
+
import { Criteria as CriteriaType } from "../evaluation/index.js";
|
|
6
6
|
import { LoadEvaluatorOptions } from "../evaluation/loader.js";
|
|
7
7
|
import { EvaluatorType } from "../evaluation/types.js";
|
|
8
8
|
export type EvaluatorInputs = {
|
|
@@ -33,6 +33,8 @@ export type RunEvaluatorLike = ((props: DynamicRunEvaluatorParams, options?: {
|
|
|
33
33
|
}) => Promise<EvaluationResult>) | ((props: DynamicRunEvaluatorParams, options?: {
|
|
34
34
|
config?: RunnableConfig;
|
|
35
35
|
}) => EvaluationResult);
|
|
36
|
+
export declare function isOffTheShelfEvaluator<T extends keyof EvaluatorType, U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike>(evaluator: T | EvalConfig | U): evaluator is T | EvalConfig;
|
|
37
|
+
export declare function isCustomEvaluator<T extends keyof EvaluatorType, U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike>(evaluator: T | EvalConfig | U): evaluator is U;
|
|
36
38
|
/**
|
|
37
39
|
* Configuration class for running evaluations on datasets.
|
|
38
40
|
*
|
|
@@ -48,6 +50,8 @@ export type RunEvalConfig<T extends keyof EvaluatorType = keyof EvaluatorType, U
|
|
|
48
50
|
* Each evaluator is provided with a run trace containing the model
|
|
49
51
|
* outputs, as well as an "example" object representing a record
|
|
50
52
|
* in the dataset.
|
|
53
|
+
*
|
|
54
|
+
* @deprecated Use `evaluators` instead.
|
|
51
55
|
*/
|
|
52
56
|
customEvaluators?: U[];
|
|
53
57
|
/**
|
|
@@ -55,7 +59,7 @@ export type RunEvalConfig<T extends keyof EvaluatorType = keyof EvaluatorType, U
|
|
|
55
59
|
* You can optionally specify these by name, or by
|
|
56
60
|
* configuring them with an EvalConfig object.
|
|
57
61
|
*/
|
|
58
|
-
evaluators?: (T | EvalConfig)[];
|
|
62
|
+
evaluators?: (T | EvalConfig | U)[];
|
|
59
63
|
/**
|
|
60
64
|
* Convert the evaluation data into formats that can be used by the evaluator.
|
|
61
65
|
* This should most commonly be a string.
|
|
@@ -155,7 +159,7 @@ export type CriteriaEvalChainConfig = EvalConfig & {
|
|
|
155
159
|
* https://smith.langchain.com/hub/langchain-ai/criteria-evaluator
|
|
156
160
|
* for more information.
|
|
157
161
|
*/
|
|
158
|
-
criteria?:
|
|
162
|
+
criteria?: CriteriaType | Record<string, string>;
|
|
159
163
|
/**
|
|
160
164
|
* The feedback (or metric) name to use for the logged
|
|
161
165
|
* evaluation results. If none provided, we default to
|
|
@@ -202,7 +206,7 @@ export type LabeledCriteria = EvalConfig & {
|
|
|
202
206
|
* https://smith.langchain.com/hub/langchain-ai/labeled-criteria
|
|
203
207
|
* for more information.
|
|
204
208
|
*/
|
|
205
|
-
criteria?:
|
|
209
|
+
criteria?: CriteriaType | Record<string, string>;
|
|
206
210
|
/**
|
|
207
211
|
* The feedback (or metric) name to use for the logged
|
|
208
212
|
* evaluation results. If none provided, we default to
|
|
@@ -214,3 +218,29 @@ export type LabeledCriteria = EvalConfig & {
|
|
|
214
218
|
*/
|
|
215
219
|
llm?: BaseLanguageModel;
|
|
216
220
|
};
|
|
221
|
+
export declare function Criteria(criteria: CriteriaType, config?: {
|
|
222
|
+
formatEvaluatorInputs?: EvaluatorInputFormatter;
|
|
223
|
+
feedbackKey?: string;
|
|
224
|
+
}): {
|
|
225
|
+
evaluatorType: "criteria";
|
|
226
|
+
criteria: CriteriaType;
|
|
227
|
+
feedbackKey: string;
|
|
228
|
+
formatEvaluatorInputs: EvaluatorInputFormatter | ((payload: {
|
|
229
|
+
rawInput: any;
|
|
230
|
+
rawPrediction: any;
|
|
231
|
+
rawReferenceOutput?: any;
|
|
232
|
+
run: Run;
|
|
233
|
+
}) => {
|
|
234
|
+
prediction: string;
|
|
235
|
+
input: string;
|
|
236
|
+
});
|
|
237
|
+
};
|
|
238
|
+
export declare function LabeledCriteria(criteria: CriteriaType, config?: {
|
|
239
|
+
formatEvaluatorInputs?: EvaluatorInputFormatter;
|
|
240
|
+
feedbackKey?: string;
|
|
241
|
+
}): {
|
|
242
|
+
evaluatorType: "labeled_criteria";
|
|
243
|
+
criteria: CriteriaType;
|
|
244
|
+
feedbackKey: string;
|
|
245
|
+
formatEvaluatorInputs: EvaluatorInputFormatter;
|
|
246
|
+
};
|
package/dist/smith/config.js
CHANGED
|
@@ -1 +1,50 @@
|
|
|
1
|
-
export {
|
|
1
|
+
export function isOffTheShelfEvaluator(evaluator) {
|
|
2
|
+
return typeof evaluator === "string" || "evaluatorType" in evaluator;
|
|
3
|
+
}
|
|
4
|
+
export function isCustomEvaluator(evaluator) {
|
|
5
|
+
return !isOffTheShelfEvaluator(evaluator);
|
|
6
|
+
}
|
|
7
|
+
const isStringifiableValue = (value) => typeof value === "string" ||
|
|
8
|
+
typeof value === "number" ||
|
|
9
|
+
typeof value === "boolean" ||
|
|
10
|
+
typeof value === "bigint";
|
|
11
|
+
const getSingleStringifiedValue = (value) => {
|
|
12
|
+
if (isStringifiableValue(value)) {
|
|
13
|
+
return `${value}`;
|
|
14
|
+
}
|
|
15
|
+
if (typeof value === "object" && value != null && !Array.isArray(value)) {
|
|
16
|
+
const entries = Object.entries(value);
|
|
17
|
+
if (entries.length === 1 && isStringifiableValue(entries[0][1])) {
|
|
18
|
+
return `${entries[0][1]}`;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
console.warn("Non-stringifiable value found when coercing", value);
|
|
22
|
+
return `${value}`;
|
|
23
|
+
};
|
|
24
|
+
export function Criteria(criteria, config) {
|
|
25
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
26
|
+
((payload) => ({
|
|
27
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
28
|
+
input: getSingleStringifiedValue(payload.rawInput),
|
|
29
|
+
}));
|
|
30
|
+
return {
|
|
31
|
+
evaluatorType: "criteria",
|
|
32
|
+
criteria,
|
|
33
|
+
feedbackKey: config?.feedbackKey ?? criteria,
|
|
34
|
+
formatEvaluatorInputs,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
export function LabeledCriteria(criteria, config) {
|
|
38
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
39
|
+
((payload) => ({
|
|
40
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
41
|
+
input: getSingleStringifiedValue(payload.rawInput),
|
|
42
|
+
reference: getSingleStringifiedValue(payload.rawReferenceOutput),
|
|
43
|
+
}));
|
|
44
|
+
return {
|
|
45
|
+
evaluatorType: "labeled_criteria",
|
|
46
|
+
criteria,
|
|
47
|
+
feedbackKey: config?.feedbackKey ?? criteria,
|
|
48
|
+
formatEvaluatorInputs,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
@@ -7,6 +7,7 @@ const tracer_langchain_1 = require("@langchain/core/tracers/tracer_langchain");
|
|
|
7
7
|
const base_1 = require("@langchain/core/tracers/base");
|
|
8
8
|
const langsmith_1 = require("langsmith");
|
|
9
9
|
const loader_js_1 = require("../evaluation/loader.cjs");
|
|
10
|
+
const config_js_1 = require("./config.cjs");
|
|
10
11
|
const name_generation_js_1 = require("./name_generation.cjs");
|
|
11
12
|
const progress_js_1 = require("./progress.cjs");
|
|
12
13
|
class SingleRunIdExtractor {
|
|
@@ -114,6 +115,67 @@ class DynamicRunEvaluator {
|
|
|
114
115
|
function isLLMStringEvaluator(evaluator) {
|
|
115
116
|
return evaluator && typeof evaluator.evaluateStrings === "function";
|
|
116
117
|
}
|
|
118
|
+
class RunnableTraceable extends runnables_1.Runnable {
|
|
119
|
+
constructor(fields) {
|
|
120
|
+
super(fields);
|
|
121
|
+
Object.defineProperty(this, "lc_serializable", {
|
|
122
|
+
enumerable: true,
|
|
123
|
+
configurable: true,
|
|
124
|
+
writable: true,
|
|
125
|
+
value: false
|
|
126
|
+
});
|
|
127
|
+
Object.defineProperty(this, "lc_namespace", {
|
|
128
|
+
enumerable: true,
|
|
129
|
+
configurable: true,
|
|
130
|
+
writable: true,
|
|
131
|
+
value: ["langchain_core", "runnables"]
|
|
132
|
+
});
|
|
133
|
+
Object.defineProperty(this, "func", {
|
|
134
|
+
enumerable: true,
|
|
135
|
+
configurable: true,
|
|
136
|
+
writable: true,
|
|
137
|
+
value: void 0
|
|
138
|
+
});
|
|
139
|
+
if (!isLangsmithTraceableFunction(fields.func)) {
|
|
140
|
+
throw new Error("RunnableTraceable requires a function that is wrapped in traceable higher-order function");
|
|
141
|
+
}
|
|
142
|
+
this.func = fields.func;
|
|
143
|
+
}
|
|
144
|
+
async invoke(input, options) {
|
|
145
|
+
const [config] = this._getOptionsList(options ?? {}, 1);
|
|
146
|
+
const callbackManager = await (0, runnables_1.getCallbackManagerForConfig)(config);
|
|
147
|
+
const partialConfig = "langsmith:traceable" in this.func
|
|
148
|
+
? this.func["langsmith:traceable"]
|
|
149
|
+
: { name: "<lambda>" };
|
|
150
|
+
const runTree = new langsmith_1.RunTree({
|
|
151
|
+
...partialConfig,
|
|
152
|
+
parent_run: callbackManager?._parentRunId
|
|
153
|
+
? new langsmith_1.RunTree({ name: "<parent>", id: callbackManager?._parentRunId })
|
|
154
|
+
: undefined,
|
|
155
|
+
});
|
|
156
|
+
if (typeof input === "object" &&
|
|
157
|
+
input != null &&
|
|
158
|
+
Object.keys(input).length === 1) {
|
|
159
|
+
if ("args" in input && Array.isArray(input)) {
|
|
160
|
+
return (await this.func(runTree, ...input));
|
|
161
|
+
}
|
|
162
|
+
if ("input" in input &&
|
|
163
|
+
!(typeof input === "object" &&
|
|
164
|
+
input != null &&
|
|
165
|
+
!Array.isArray(input) &&
|
|
166
|
+
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
167
|
+
!(input instanceof Date))) {
|
|
168
|
+
try {
|
|
169
|
+
return (await this.func(runTree, input.input));
|
|
170
|
+
}
|
|
171
|
+
catch (err) {
|
|
172
|
+
return (await this.func(runTree, input));
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return (await this.func(runTree, input));
|
|
177
|
+
}
|
|
178
|
+
}
|
|
117
179
|
/**
|
|
118
180
|
* Wraps an off-the-shelf evaluator (loaded using loadEvaluator; of EvaluatorType[T])
|
|
119
181
|
* and composes with a prepareData function so the user can prepare the trace and
|
|
@@ -213,7 +275,7 @@ class LoadedEvalConfig {
|
|
|
213
275
|
}
|
|
214
276
|
static async fromRunEvalConfig(config) {
|
|
215
277
|
// Custom evaluators are applied "as-is"
|
|
216
|
-
const customEvaluators = config?.customEvaluators?.map((evaluator) => {
|
|
278
|
+
const customEvaluators = (config?.customEvaluators ?? config.evaluators?.filter(config_js_1.isCustomEvaluator))?.map((evaluator) => {
|
|
217
279
|
if (typeof evaluator === "function") {
|
|
218
280
|
return new DynamicRunEvaluator(evaluator);
|
|
219
281
|
}
|
|
@@ -221,7 +283,9 @@ class LoadedEvalConfig {
|
|
|
221
283
|
return evaluator;
|
|
222
284
|
}
|
|
223
285
|
});
|
|
224
|
-
const offTheShelfEvaluators = await Promise.all(config?.evaluators
|
|
286
|
+
const offTheShelfEvaluators = await Promise.all(config?.evaluators
|
|
287
|
+
?.filter(config_js_1.isOffTheShelfEvaluator)
|
|
288
|
+
?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
|
|
225
289
|
return new LoadedEvalConfig((customEvaluators ?? []).concat(offTheShelfEvaluators ?? []));
|
|
226
290
|
}
|
|
227
291
|
}
|
|
@@ -249,7 +313,11 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
249
313
|
}
|
|
250
314
|
catch (err) {
|
|
251
315
|
// Otherwise, it's a custom UDF, and we'll wrap
|
|
252
|
-
// in a lambda
|
|
316
|
+
// in a lambda or a traceable function
|
|
317
|
+
if (isLangsmithTraceableFunction(modelOrFactory)) {
|
|
318
|
+
const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
|
|
319
|
+
return () => wrappedModel;
|
|
320
|
+
}
|
|
253
321
|
const wrappedModel = new runnables_1.RunnableLambda({ func: modelOrFactory });
|
|
254
322
|
return () => wrappedModel;
|
|
255
323
|
}
|
|
@@ -321,62 +389,10 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
321
389
|
}
|
|
322
390
|
return examples.map(({ inputs }) => inputs);
|
|
323
391
|
};
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
* runs the model or chain against each example, and returns the evaluation
|
|
329
|
-
* results.
|
|
330
|
-
*
|
|
331
|
-
* @param chainOrFactory - A model or factory/constructor function to be evaluated. It can be a
|
|
332
|
-
* Runnable instance, a factory function that returns a Runnable, or a user-defined
|
|
333
|
-
* function or factory.
|
|
334
|
-
*
|
|
335
|
-
* @param datasetName - The name of the dataset against which the evaluation will be
|
|
336
|
-
* performed. This dataset should already be defined and contain the relevant data
|
|
337
|
-
* for evaluation.
|
|
338
|
-
*
|
|
339
|
-
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
340
|
-
* - `evaluationConfig` (RunEvalConfig): Configuration for the evaluation, including
|
|
341
|
-
* standard and custom evaluators.
|
|
342
|
-
* - `projectName` (string): Name of the project for logging and tracking.
|
|
343
|
-
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
344
|
-
* - `client` (Client): Client instance for LangChain service interaction.
|
|
345
|
-
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
|
|
346
|
-
*
|
|
347
|
-
* @returns A promise that resolves to an `EvalResults` object. This object includes
|
|
348
|
-
* detailed results of the evaluation, such as execution time, run IDs, and feedback
|
|
349
|
-
* for each entry in the dataset.
|
|
350
|
-
*
|
|
351
|
-
* @example
|
|
352
|
-
* ```typescript
|
|
353
|
-
* // Example usage for evaluating a model on a dataset
|
|
354
|
-
* async function evaluateModel() {
|
|
355
|
-
* const chain = /* ...create your model or chain...*\//
|
|
356
|
-
* const datasetName = 'example-dataset';
|
|
357
|
-
* const client = new Client(/* ...config... *\//);
|
|
358
|
-
*
|
|
359
|
-
* const evaluationConfig = {
|
|
360
|
-
* evaluators: [/* ...evaluators... *\//],
|
|
361
|
-
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
362
|
-
* };
|
|
363
|
-
*
|
|
364
|
-
* const results = await runOnDataset(chain, datasetName, {
|
|
365
|
-
* evaluationConfig,
|
|
366
|
-
* client,
|
|
367
|
-
* });
|
|
368
|
-
*
|
|
369
|
-
* console.log('Evaluation Results:', results);
|
|
370
|
-
* }
|
|
371
|
-
*
|
|
372
|
-
* evaluateModel();
|
|
373
|
-
* ```
|
|
374
|
-
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
|
|
375
|
-
* a dataset named 'example-dataset'. The evaluation process is configured using `RunEvalConfig`, which can
|
|
376
|
-
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
377
|
-
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
378
|
-
*/
|
|
379
|
-
const runOnDataset = async (chainOrFactory, datasetName, { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, }) => {
|
|
392
|
+
async function runOnDataset(chainOrFactory, datasetName, options) {
|
|
393
|
+
const { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, } = Array.isArray(options)
|
|
394
|
+
? { evaluationConfig: { evaluators: options } }
|
|
395
|
+
: options ?? {};
|
|
380
396
|
const wrappedModel = await createWrappedModel(chainOrFactory);
|
|
381
397
|
const testClient = client ?? new langsmith_1.Client();
|
|
382
398
|
const testProjectName = projectName ?? (0, name_generation_js_1.randomName)();
|
|
@@ -432,5 +448,8 @@ const runOnDataset = async (chainOrFactory, datasetName, { evaluationConfig, pro
|
|
|
432
448
|
results: evalResults ?? {},
|
|
433
449
|
};
|
|
434
450
|
return results;
|
|
435
|
-
}
|
|
451
|
+
}
|
|
436
452
|
exports.runOnDataset = runOnDataset;
|
|
453
|
+
function isLangsmithTraceableFunction(x) {
|
|
454
|
+
return typeof x === "function" && "langsmith:traceable" in x;
|
|
455
|
+
}
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { Runnable } from "@langchain/core/runnables";
|
|
2
2
|
import { Client, Feedback } from "langsmith";
|
|
3
|
-
import type {
|
|
4
|
-
|
|
3
|
+
import type { TraceableFunction } from "langsmith/traceable";
|
|
4
|
+
import { type RunEvalConfig } from "./config.js";
|
|
5
|
+
export type ChainOrFactory = Runnable | (() => Runnable) | AnyTraceableFunction | ((obj: any) => any) | ((obj: any) => Promise<any>) | (() => (obj: unknown) => unknown) | (() => (obj: unknown) => Promise<unknown>);
|
|
6
|
+
type AnyTraceableFunction = TraceableFunction<(...any: any[]) => any>;
|
|
5
7
|
export type RunOnDatasetParams = {
|
|
6
8
|
evaluationConfig?: RunEvalConfig;
|
|
7
9
|
projectMetadata?: Record<string, unknown>;
|
|
@@ -74,4 +76,6 @@ export type EvalResults = {
|
|
|
74
76
|
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
75
77
|
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
76
78
|
*/
|
|
77
|
-
export declare
|
|
79
|
+
export declare function runOnDataset(chainOrFactory: ChainOrFactory, datasetName: string, { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, }: RunOnDatasetParams): Promise<EvalResults>;
|
|
80
|
+
export declare function runOnDataset(chainOrFactory: ChainOrFactory, datasetName: string, evaluators: RunEvalConfig["evaluators"]): Promise<EvalResults>;
|
|
81
|
+
export {};
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { mapStoredMessagesToChatMessages } from "@langchain/core/messages";
|
|
2
|
-
import { Runnable, RunnableLambda, } from "@langchain/core/runnables";
|
|
2
|
+
import { Runnable, RunnableLambda, getCallbackManagerForConfig, } from "@langchain/core/runnables";
|
|
3
3
|
import { LangChainTracer } from "@langchain/core/tracers/tracer_langchain";
|
|
4
4
|
import { BaseTracer } from "@langchain/core/tracers/base";
|
|
5
|
-
import { Client } from "langsmith";
|
|
5
|
+
import { Client, RunTree, } from "langsmith";
|
|
6
6
|
import { loadEvaluator } from "../evaluation/loader.js";
|
|
7
|
+
import { isOffTheShelfEvaluator, isCustomEvaluator, } from "./config.js";
|
|
7
8
|
import { randomName } from "./name_generation.js";
|
|
8
9
|
import { ProgressBar } from "./progress.js";
|
|
9
10
|
class SingleRunIdExtractor {
|
|
@@ -111,6 +112,67 @@ class DynamicRunEvaluator {
|
|
|
111
112
|
function isLLMStringEvaluator(evaluator) {
|
|
112
113
|
return evaluator && typeof evaluator.evaluateStrings === "function";
|
|
113
114
|
}
|
|
115
|
+
class RunnableTraceable extends Runnable {
|
|
116
|
+
constructor(fields) {
|
|
117
|
+
super(fields);
|
|
118
|
+
Object.defineProperty(this, "lc_serializable", {
|
|
119
|
+
enumerable: true,
|
|
120
|
+
configurable: true,
|
|
121
|
+
writable: true,
|
|
122
|
+
value: false
|
|
123
|
+
});
|
|
124
|
+
Object.defineProperty(this, "lc_namespace", {
|
|
125
|
+
enumerable: true,
|
|
126
|
+
configurable: true,
|
|
127
|
+
writable: true,
|
|
128
|
+
value: ["langchain_core", "runnables"]
|
|
129
|
+
});
|
|
130
|
+
Object.defineProperty(this, "func", {
|
|
131
|
+
enumerable: true,
|
|
132
|
+
configurable: true,
|
|
133
|
+
writable: true,
|
|
134
|
+
value: void 0
|
|
135
|
+
});
|
|
136
|
+
if (!isLangsmithTraceableFunction(fields.func)) {
|
|
137
|
+
throw new Error("RunnableTraceable requires a function that is wrapped in traceable higher-order function");
|
|
138
|
+
}
|
|
139
|
+
this.func = fields.func;
|
|
140
|
+
}
|
|
141
|
+
async invoke(input, options) {
|
|
142
|
+
const [config] = this._getOptionsList(options ?? {}, 1);
|
|
143
|
+
const callbackManager = await getCallbackManagerForConfig(config);
|
|
144
|
+
const partialConfig = "langsmith:traceable" in this.func
|
|
145
|
+
? this.func["langsmith:traceable"]
|
|
146
|
+
: { name: "<lambda>" };
|
|
147
|
+
const runTree = new RunTree({
|
|
148
|
+
...partialConfig,
|
|
149
|
+
parent_run: callbackManager?._parentRunId
|
|
150
|
+
? new RunTree({ name: "<parent>", id: callbackManager?._parentRunId })
|
|
151
|
+
: undefined,
|
|
152
|
+
});
|
|
153
|
+
if (typeof input === "object" &&
|
|
154
|
+
input != null &&
|
|
155
|
+
Object.keys(input).length === 1) {
|
|
156
|
+
if ("args" in input && Array.isArray(input)) {
|
|
157
|
+
return (await this.func(runTree, ...input));
|
|
158
|
+
}
|
|
159
|
+
if ("input" in input &&
|
|
160
|
+
!(typeof input === "object" &&
|
|
161
|
+
input != null &&
|
|
162
|
+
!Array.isArray(input) &&
|
|
163
|
+
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
164
|
+
!(input instanceof Date))) {
|
|
165
|
+
try {
|
|
166
|
+
return (await this.func(runTree, input.input));
|
|
167
|
+
}
|
|
168
|
+
catch (err) {
|
|
169
|
+
return (await this.func(runTree, input));
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return (await this.func(runTree, input));
|
|
174
|
+
}
|
|
175
|
+
}
|
|
114
176
|
/**
|
|
115
177
|
* Wraps an off-the-shelf evaluator (loaded using loadEvaluator; of EvaluatorType[T])
|
|
116
178
|
* and composes with a prepareData function so the user can prepare the trace and
|
|
@@ -210,7 +272,7 @@ class LoadedEvalConfig {
|
|
|
210
272
|
}
|
|
211
273
|
static async fromRunEvalConfig(config) {
|
|
212
274
|
// Custom evaluators are applied "as-is"
|
|
213
|
-
const customEvaluators = config?.customEvaluators?.map((evaluator) => {
|
|
275
|
+
const customEvaluators = (config?.customEvaluators ?? config.evaluators?.filter(isCustomEvaluator))?.map((evaluator) => {
|
|
214
276
|
if (typeof evaluator === "function") {
|
|
215
277
|
return new DynamicRunEvaluator(evaluator);
|
|
216
278
|
}
|
|
@@ -218,7 +280,9 @@ class LoadedEvalConfig {
|
|
|
218
280
|
return evaluator;
|
|
219
281
|
}
|
|
220
282
|
});
|
|
221
|
-
const offTheShelfEvaluators = await Promise.all(config?.evaluators
|
|
283
|
+
const offTheShelfEvaluators = await Promise.all(config?.evaluators
|
|
284
|
+
?.filter(isOffTheShelfEvaluator)
|
|
285
|
+
?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
|
|
222
286
|
return new LoadedEvalConfig((customEvaluators ?? []).concat(offTheShelfEvaluators ?? []));
|
|
223
287
|
}
|
|
224
288
|
}
|
|
@@ -246,7 +310,11 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
246
310
|
}
|
|
247
311
|
catch (err) {
|
|
248
312
|
// Otherwise, it's a custom UDF, and we'll wrap
|
|
249
|
-
// in a lambda
|
|
313
|
+
// in a lambda or a traceable function
|
|
314
|
+
if (isLangsmithTraceableFunction(modelOrFactory)) {
|
|
315
|
+
const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
|
|
316
|
+
return () => wrappedModel;
|
|
317
|
+
}
|
|
250
318
|
const wrappedModel = new RunnableLambda({ func: modelOrFactory });
|
|
251
319
|
return () => wrappedModel;
|
|
252
320
|
}
|
|
@@ -318,62 +386,10 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
318
386
|
}
|
|
319
387
|
return examples.map(({ inputs }) => inputs);
|
|
320
388
|
};
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
* runs the model or chain against each example, and returns the evaluation
|
|
326
|
-
* results.
|
|
327
|
-
*
|
|
328
|
-
* @param chainOrFactory - A model or factory/constructor function to be evaluated. It can be a
|
|
329
|
-
* Runnable instance, a factory function that returns a Runnable, or a user-defined
|
|
330
|
-
* function or factory.
|
|
331
|
-
*
|
|
332
|
-
* @param datasetName - The name of the dataset against which the evaluation will be
|
|
333
|
-
* performed. This dataset should already be defined and contain the relevant data
|
|
334
|
-
* for evaluation.
|
|
335
|
-
*
|
|
336
|
-
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
337
|
-
* - `evaluationConfig` (RunEvalConfig): Configuration for the evaluation, including
|
|
338
|
-
* standard and custom evaluators.
|
|
339
|
-
* - `projectName` (string): Name of the project for logging and tracking.
|
|
340
|
-
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
341
|
-
* - `client` (Client): Client instance for LangChain service interaction.
|
|
342
|
-
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
|
|
343
|
-
*
|
|
344
|
-
* @returns A promise that resolves to an `EvalResults` object. This object includes
|
|
345
|
-
* detailed results of the evaluation, such as execution time, run IDs, and feedback
|
|
346
|
-
* for each entry in the dataset.
|
|
347
|
-
*
|
|
348
|
-
* @example
|
|
349
|
-
* ```typescript
|
|
350
|
-
* // Example usage for evaluating a model on a dataset
|
|
351
|
-
* async function evaluateModel() {
|
|
352
|
-
* const chain = /* ...create your model or chain...*\//
|
|
353
|
-
* const datasetName = 'example-dataset';
|
|
354
|
-
* const client = new Client(/* ...config... *\//);
|
|
355
|
-
*
|
|
356
|
-
* const evaluationConfig = {
|
|
357
|
-
* evaluators: [/* ...evaluators... *\//],
|
|
358
|
-
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
359
|
-
* };
|
|
360
|
-
*
|
|
361
|
-
* const results = await runOnDataset(chain, datasetName, {
|
|
362
|
-
* evaluationConfig,
|
|
363
|
-
* client,
|
|
364
|
-
* });
|
|
365
|
-
*
|
|
366
|
-
* console.log('Evaluation Results:', results);
|
|
367
|
-
* }
|
|
368
|
-
*
|
|
369
|
-
* evaluateModel();
|
|
370
|
-
* ```
|
|
371
|
-
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
|
|
372
|
-
* a dataset named 'example-dataset'. The evaluation process is configured using `RunEvalConfig`, which can
|
|
373
|
-
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
374
|
-
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
375
|
-
*/
|
|
376
|
-
export const runOnDataset = async (chainOrFactory, datasetName, { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, }) => {
|
|
389
|
+
export async function runOnDataset(chainOrFactory, datasetName, options) {
|
|
390
|
+
const { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, } = Array.isArray(options)
|
|
391
|
+
? { evaluationConfig: { evaluators: options } }
|
|
392
|
+
: options ?? {};
|
|
377
393
|
const wrappedModel = await createWrappedModel(chainOrFactory);
|
|
378
394
|
const testClient = client ?? new Client();
|
|
379
395
|
const testProjectName = projectName ?? randomName();
|
|
@@ -429,4 +445,7 @@ export const runOnDataset = async (chainOrFactory, datasetName, { evaluationConf
|
|
|
429
445
|
results: evalResults ?? {},
|
|
430
446
|
};
|
|
431
447
|
return results;
|
|
432
|
-
}
|
|
448
|
+
}
|
|
449
|
+
function isLangsmithTraceableFunction(x) {
|
|
450
|
+
return typeof x === "function" && "langsmith:traceable" in x;
|
|
451
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.1.19-rc.
|
|
3
|
+
"version": "0.1.19-rc.1",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -1513,7 +1513,7 @@
|
|
|
1513
1513
|
"js-yaml": "^4.1.0",
|
|
1514
1514
|
"jsonpointer": "^5.0.1",
|
|
1515
1515
|
"langchainhub": "~0.0.8",
|
|
1516
|
-
"langsmith": "~0.
|
|
1516
|
+
"langsmith": "~0.1.1",
|
|
1517
1517
|
"ml-distance": "^4.0.0",
|
|
1518
1518
|
"openapi-types": "^12.1.3",
|
|
1519
1519
|
"p-retry": "4",
|