langchain 0.1.18 → 0.1.19-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,57 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.LabeledCriteria = exports.Criteria = exports.isCustomEvaluator = exports.isOffTheShelfEvaluator = void 0;
4
+ function isOffTheShelfEvaluator(evaluator) {
5
+ return typeof evaluator === "string" || "evaluatorType" in evaluator;
6
+ }
7
+ exports.isOffTheShelfEvaluator = isOffTheShelfEvaluator;
8
+ function isCustomEvaluator(evaluator) {
9
+ return !isOffTheShelfEvaluator(evaluator);
10
+ }
11
+ exports.isCustomEvaluator = isCustomEvaluator;
12
+ const isStringifiableValue = (value) => typeof value === "string" ||
13
+ typeof value === "number" ||
14
+ typeof value === "boolean" ||
15
+ typeof value === "bigint";
16
+ const getSingleStringifiedValue = (value) => {
17
+ if (isStringifiableValue(value)) {
18
+ return `${value}`;
19
+ }
20
+ if (typeof value === "object" && value != null && !Array.isArray(value)) {
21
+ const entries = Object.entries(value);
22
+ if (entries.length === 1 && isStringifiableValue(entries[0][1])) {
23
+ return `${entries[0][1]}`;
24
+ }
25
+ }
26
+ console.warn("Non-stringifiable value found when coercing", value);
27
+ return `${value}`;
28
+ };
29
+ function Criteria(criteria, config) {
30
+ const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
31
+ ((payload) => ({
32
+ prediction: getSingleStringifiedValue(payload.rawPrediction),
33
+ input: getSingleStringifiedValue(payload.rawInput),
34
+ }));
35
+ return {
36
+ evaluatorType: "criteria",
37
+ criteria,
38
+ feedbackKey: config?.feedbackKey ?? criteria,
39
+ formatEvaluatorInputs,
40
+ };
41
+ }
42
+ exports.Criteria = Criteria;
43
+ function LabeledCriteria(criteria, config) {
44
+ const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
45
+ ((payload) => ({
46
+ prediction: getSingleStringifiedValue(payload.rawPrediction),
47
+ input: getSingleStringifiedValue(payload.rawInput),
48
+ reference: getSingleStringifiedValue(payload.rawReferenceOutput),
49
+ }));
50
+ return {
51
+ evaluatorType: "labeled_criteria",
52
+ criteria,
53
+ feedbackKey: config?.feedbackKey ?? criteria,
54
+ formatEvaluatorInputs,
55
+ };
56
+ }
57
+ exports.LabeledCriteria = LabeledCriteria;
@@ -2,7 +2,7 @@ import { BaseLanguageModel } from "@langchain/core/language_models/base";
2
2
  import { RunnableConfig } from "@langchain/core/runnables";
3
3
  import { Example, Run } from "langsmith";
4
4
  import { EvaluationResult, RunEvaluator } from "langsmith/evaluation";
5
- import { Criteria } from "../evaluation/index.js";
5
+ import { Criteria as CriteriaType } from "../evaluation/index.js";
6
6
  import { LoadEvaluatorOptions } from "../evaluation/loader.js";
7
7
  import { EvaluatorType } from "../evaluation/types.js";
8
8
  export type EvaluatorInputs = {
@@ -33,6 +33,8 @@ export type RunEvaluatorLike = ((props: DynamicRunEvaluatorParams, options?: {
33
33
  }) => Promise<EvaluationResult>) | ((props: DynamicRunEvaluatorParams, options?: {
34
34
  config?: RunnableConfig;
35
35
  }) => EvaluationResult);
36
+ export declare function isOffTheShelfEvaluator<T extends keyof EvaluatorType, U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike>(evaluator: T | EvalConfig | U): evaluator is T | EvalConfig;
37
+ export declare function isCustomEvaluator<T extends keyof EvaluatorType, U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike>(evaluator: T | EvalConfig | U): evaluator is U;
36
38
  /**
37
39
  * Configuration class for running evaluations on datasets.
38
40
  *
@@ -48,6 +50,8 @@ export type RunEvalConfig<T extends keyof EvaluatorType = keyof EvaluatorType, U
48
50
  * Each evaluator is provided with a run trace containing the model
49
51
  * outputs, as well as an "example" object representing a record
50
52
  * in the dataset.
53
+ *
54
+ * @deprecated Use `evaluators` instead.
51
55
  */
52
56
  customEvaluators?: U[];
53
57
  /**
@@ -55,7 +59,7 @@ export type RunEvalConfig<T extends keyof EvaluatorType = keyof EvaluatorType, U
55
59
  * You can optionally specify these by name, or by
56
60
  * configuring them with an EvalConfig object.
57
61
  */
58
- evaluators?: (T | EvalConfig)[];
62
+ evaluators?: (T | EvalConfig | U)[];
59
63
  /**
60
64
  * Convert the evaluation data into formats that can be used by the evaluator.
61
65
  * This should most commonly be a string.
@@ -155,7 +159,7 @@ export type CriteriaEvalChainConfig = EvalConfig & {
155
159
  * https://smith.langchain.com/hub/langchain-ai/criteria-evaluator
156
160
  * for more information.
157
161
  */
158
- criteria?: Criteria | Record<string, string>;
162
+ criteria?: CriteriaType | Record<string, string>;
159
163
  /**
160
164
  * The feedback (or metric) name to use for the logged
161
165
  * evaluation results. If none provided, we default to
@@ -202,7 +206,7 @@ export type LabeledCriteria = EvalConfig & {
202
206
  * https://smith.langchain.com/hub/langchain-ai/labeled-criteria
203
207
  * for more information.
204
208
  */
205
- criteria?: Criteria | Record<string, string>;
209
+ criteria?: CriteriaType | Record<string, string>;
206
210
  /**
207
211
  * The feedback (or metric) name to use for the logged
208
212
  * evaluation results. If none provided, we default to
@@ -214,3 +218,29 @@ export type LabeledCriteria = EvalConfig & {
214
218
  */
215
219
  llm?: BaseLanguageModel;
216
220
  };
221
+ export declare function Criteria(criteria: CriteriaType, config?: {
222
+ formatEvaluatorInputs?: EvaluatorInputFormatter;
223
+ feedbackKey?: string;
224
+ }): {
225
+ evaluatorType: "criteria";
226
+ criteria: CriteriaType;
227
+ feedbackKey: string;
228
+ formatEvaluatorInputs: EvaluatorInputFormatter | ((payload: {
229
+ rawInput: any;
230
+ rawPrediction: any;
231
+ rawReferenceOutput?: any;
232
+ run: Run;
233
+ }) => {
234
+ prediction: string;
235
+ input: string;
236
+ });
237
+ };
238
+ export declare function LabeledCriteria(criteria: CriteriaType, config?: {
239
+ formatEvaluatorInputs?: EvaluatorInputFormatter;
240
+ feedbackKey?: string;
241
+ }): {
242
+ evaluatorType: "labeled_criteria";
243
+ criteria: CriteriaType;
244
+ feedbackKey: string;
245
+ formatEvaluatorInputs: EvaluatorInputFormatter;
246
+ };
@@ -1 +1,50 @@
1
- export {};
1
+ export function isOffTheShelfEvaluator(evaluator) {
2
+ return typeof evaluator === "string" || "evaluatorType" in evaluator;
3
+ }
4
+ export function isCustomEvaluator(evaluator) {
5
+ return !isOffTheShelfEvaluator(evaluator);
6
+ }
7
+ const isStringifiableValue = (value) => typeof value === "string" ||
8
+ typeof value === "number" ||
9
+ typeof value === "boolean" ||
10
+ typeof value === "bigint";
11
+ const getSingleStringifiedValue = (value) => {
12
+ if (isStringifiableValue(value)) {
13
+ return `${value}`;
14
+ }
15
+ if (typeof value === "object" && value != null && !Array.isArray(value)) {
16
+ const entries = Object.entries(value);
17
+ if (entries.length === 1 && isStringifiableValue(entries[0][1])) {
18
+ return `${entries[0][1]}`;
19
+ }
20
+ }
21
+ console.warn("Non-stringifiable value found when coercing", value);
22
+ return `${value}`;
23
+ };
24
+ export function Criteria(criteria, config) {
25
+ const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
26
+ ((payload) => ({
27
+ prediction: getSingleStringifiedValue(payload.rawPrediction),
28
+ input: getSingleStringifiedValue(payload.rawInput),
29
+ }));
30
+ return {
31
+ evaluatorType: "criteria",
32
+ criteria,
33
+ feedbackKey: config?.feedbackKey ?? criteria,
34
+ formatEvaluatorInputs,
35
+ };
36
+ }
37
+ export function LabeledCriteria(criteria, config) {
38
+ const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
39
+ ((payload) => ({
40
+ prediction: getSingleStringifiedValue(payload.rawPrediction),
41
+ input: getSingleStringifiedValue(payload.rawInput),
42
+ reference: getSingleStringifiedValue(payload.rawReferenceOutput),
43
+ }));
44
+ return {
45
+ evaluatorType: "labeled_criteria",
46
+ criteria,
47
+ feedbackKey: config?.feedbackKey ?? criteria,
48
+ formatEvaluatorInputs,
49
+ };
50
+ }
@@ -7,6 +7,7 @@ const tracer_langchain_1 = require("@langchain/core/tracers/tracer_langchain");
7
7
  const base_1 = require("@langchain/core/tracers/base");
8
8
  const langsmith_1 = require("langsmith");
9
9
  const loader_js_1 = require("../evaluation/loader.cjs");
10
+ const config_js_1 = require("./config.cjs");
10
11
  const name_generation_js_1 = require("./name_generation.cjs");
11
12
  const progress_js_1 = require("./progress.cjs");
12
13
  class SingleRunIdExtractor {
@@ -114,6 +115,67 @@ class DynamicRunEvaluator {
114
115
  function isLLMStringEvaluator(evaluator) {
115
116
  return evaluator && typeof evaluator.evaluateStrings === "function";
116
117
  }
118
+ class RunnableTraceable extends runnables_1.Runnable {
119
+ constructor(fields) {
120
+ super(fields);
121
+ Object.defineProperty(this, "lc_serializable", {
122
+ enumerable: true,
123
+ configurable: true,
124
+ writable: true,
125
+ value: false
126
+ });
127
+ Object.defineProperty(this, "lc_namespace", {
128
+ enumerable: true,
129
+ configurable: true,
130
+ writable: true,
131
+ value: ["langchain_core", "runnables"]
132
+ });
133
+ Object.defineProperty(this, "func", {
134
+ enumerable: true,
135
+ configurable: true,
136
+ writable: true,
137
+ value: void 0
138
+ });
139
+ if (!isLangsmithTraceableFunction(fields.func)) {
140
+ throw new Error("RunnableTraceable requires a function that is wrapped in traceable higher-order function");
141
+ }
142
+ this.func = fields.func;
143
+ }
144
+ async invoke(input, options) {
145
+ const [config] = this._getOptionsList(options ?? {}, 1);
146
+ const callbackManager = await (0, runnables_1.getCallbackManagerForConfig)(config);
147
+ const partialConfig = "langsmith:traceable" in this.func
148
+ ? this.func["langsmith:traceable"]
149
+ : { name: "<lambda>" };
150
+ const runTree = new langsmith_1.RunTree({
151
+ ...partialConfig,
152
+ parent_run: callbackManager?._parentRunId
153
+ ? new langsmith_1.RunTree({ name: "<parent>", id: callbackManager?._parentRunId })
154
+ : undefined,
155
+ });
156
+ if (typeof input === "object" &&
157
+ input != null &&
158
+ Object.keys(input).length === 1) {
159
+ if ("args" in input && Array.isArray(input)) {
160
+ return (await this.func(runTree, ...input));
161
+ }
162
+ if ("input" in input &&
163
+ !(typeof input === "object" &&
164
+ input != null &&
165
+ !Array.isArray(input) &&
166
+ // eslint-disable-next-line no-instanceof/no-instanceof
167
+ !(input instanceof Date))) {
168
+ try {
169
+ return (await this.func(runTree, input.input));
170
+ }
171
+ catch (err) {
172
+ return (await this.func(runTree, input));
173
+ }
174
+ }
175
+ }
176
+ return (await this.func(runTree, input));
177
+ }
178
+ }
117
179
  /**
118
180
  * Wraps an off-the-shelf evaluator (loaded using loadEvaluator; of EvaluatorType[T])
119
181
  * and composes with a prepareData function so the user can prepare the trace and
@@ -213,7 +275,7 @@ class LoadedEvalConfig {
213
275
  }
214
276
  static async fromRunEvalConfig(config) {
215
277
  // Custom evaluators are applied "as-is"
216
- const customEvaluators = config?.customEvaluators?.map((evaluator) => {
278
+ const customEvaluators = (config?.customEvaluators ?? config.evaluators?.filter(config_js_1.isCustomEvaluator))?.map((evaluator) => {
217
279
  if (typeof evaluator === "function") {
218
280
  return new DynamicRunEvaluator(evaluator);
219
281
  }
@@ -221,7 +283,9 @@ class LoadedEvalConfig {
221
283
  return evaluator;
222
284
  }
223
285
  });
224
- const offTheShelfEvaluators = await Promise.all(config?.evaluators?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
286
+ const offTheShelfEvaluators = await Promise.all(config?.evaluators
287
+ ?.filter(config_js_1.isOffTheShelfEvaluator)
288
+ ?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
225
289
  return new LoadedEvalConfig((customEvaluators ?? []).concat(offTheShelfEvaluators ?? []));
226
290
  }
227
291
  }
@@ -249,7 +313,11 @@ const createWrappedModel = async (modelOrFactory) => {
249
313
  }
250
314
  catch (err) {
251
315
  // Otherwise, it's a custom UDF, and we'll wrap
252
- // in a lambda
316
+ // in a lambda or a traceable function
317
+ if (isLangsmithTraceableFunction(modelOrFactory)) {
318
+ const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
319
+ return () => wrappedModel;
320
+ }
253
321
  const wrappedModel = new runnables_1.RunnableLambda({ func: modelOrFactory });
254
322
  return () => wrappedModel;
255
323
  }
@@ -321,62 +389,10 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
321
389
  }
322
390
  return examples.map(({ inputs }) => inputs);
323
391
  };
324
- /**
325
- * Evaluates a given model or chain against a specified LangSmith dataset.
326
- *
327
- * This function fetches example records from the specified dataset,
328
- * runs the model or chain against each example, and returns the evaluation
329
- * results.
330
- *
331
- * @param chainOrFactory - A model or factory/constructor function to be evaluated. It can be a
332
- * Runnable instance, a factory function that returns a Runnable, or a user-defined
333
- * function or factory.
334
- *
335
- * @param datasetName - The name of the dataset against which the evaluation will be
336
- * performed. This dataset should already be defined and contain the relevant data
337
- * for evaluation.
338
- *
339
- * @param options - (Optional) Additional parameters for the evaluation process:
340
- * - `evaluationConfig` (RunEvalConfig): Configuration for the evaluation, including
341
- * standard and custom evaluators.
342
- * - `projectName` (string): Name of the project for logging and tracking.
343
- * - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
344
- * - `client` (Client): Client instance for LangChain service interaction.
345
- * - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
346
- *
347
- * @returns A promise that resolves to an `EvalResults` object. This object includes
348
- * detailed results of the evaluation, such as execution time, run IDs, and feedback
349
- * for each entry in the dataset.
350
- *
351
- * @example
352
- * ```typescript
353
- * // Example usage for evaluating a model on a dataset
354
- * async function evaluateModel() {
355
- * const chain = /* ...create your model or chain...*\//
356
- * const datasetName = 'example-dataset';
357
- * const client = new Client(/* ...config... *\//);
358
- *
359
- * const evaluationConfig = {
360
- * evaluators: [/* ...evaluators... *\//],
361
- * customEvaluators: [/* ...custom evaluators... *\//],
362
- * };
363
- *
364
- * const results = await runOnDataset(chain, datasetName, {
365
- * evaluationConfig,
366
- * client,
367
- * });
368
- *
369
- * console.log('Evaluation Results:', results);
370
- * }
371
- *
372
- * evaluateModel();
373
- * ```
374
- * In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
375
- * a dataset named 'example-dataset'. The evaluation process is configured using `RunEvalConfig`, which can
376
- * include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
377
- * The function returns the evaluation results, which can be logged or further processed as needed.
378
- */
379
- const runOnDataset = async (chainOrFactory, datasetName, { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, }) => {
392
+ async function runOnDataset(chainOrFactory, datasetName, options) {
393
+ const { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, } = Array.isArray(options)
394
+ ? { evaluationConfig: { evaluators: options } }
395
+ : options ?? {};
380
396
  const wrappedModel = await createWrappedModel(chainOrFactory);
381
397
  const testClient = client ?? new langsmith_1.Client();
382
398
  const testProjectName = projectName ?? (0, name_generation_js_1.randomName)();
@@ -432,5 +448,8 @@ const runOnDataset = async (chainOrFactory, datasetName, { evaluationConfig, pro
432
448
  results: evalResults ?? {},
433
449
  };
434
450
  return results;
435
- };
451
+ }
436
452
  exports.runOnDataset = runOnDataset;
453
+ function isLangsmithTraceableFunction(x) {
454
+ return typeof x === "function" && "langsmith:traceable" in x;
455
+ }
@@ -1,7 +1,9 @@
1
1
  import { Runnable } from "@langchain/core/runnables";
2
2
  import { Client, Feedback } from "langsmith";
3
- import type { RunEvalConfig } from "./config.js";
4
- export type ChainOrFactory = Runnable | (() => Runnable) | ((obj: any) => any) | ((obj: any) => Promise<any>) | (() => (obj: unknown) => unknown) | (() => (obj: unknown) => Promise<unknown>);
3
+ import type { TraceableFunction } from "langsmith/traceable";
4
+ import { type RunEvalConfig } from "./config.js";
5
+ export type ChainOrFactory = Runnable | (() => Runnable) | AnyTraceableFunction | ((obj: any) => any) | ((obj: any) => Promise<any>) | (() => (obj: unknown) => unknown) | (() => (obj: unknown) => Promise<unknown>);
6
+ type AnyTraceableFunction = TraceableFunction<(...any: any[]) => any>;
5
7
  export type RunOnDatasetParams = {
6
8
  evaluationConfig?: RunEvalConfig;
7
9
  projectMetadata?: Record<string, unknown>;
@@ -74,4 +76,6 @@ export type EvalResults = {
74
76
  * include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
75
77
  * The function returns the evaluation results, which can be logged or further processed as needed.
76
78
  */
77
- export declare const runOnDataset: (chainOrFactory: ChainOrFactory, datasetName: string, { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, }: RunOnDatasetParams) => Promise<EvalResults>;
79
+ export declare function runOnDataset(chainOrFactory: ChainOrFactory, datasetName: string, { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, }: RunOnDatasetParams): Promise<EvalResults>;
80
+ export declare function runOnDataset(chainOrFactory: ChainOrFactory, datasetName: string, evaluators: RunEvalConfig["evaluators"]): Promise<EvalResults>;
81
+ export {};
@@ -1,9 +1,10 @@
1
1
  import { mapStoredMessagesToChatMessages } from "@langchain/core/messages";
2
- import { Runnable, RunnableLambda, } from "@langchain/core/runnables";
2
+ import { Runnable, RunnableLambda, getCallbackManagerForConfig, } from "@langchain/core/runnables";
3
3
  import { LangChainTracer } from "@langchain/core/tracers/tracer_langchain";
4
4
  import { BaseTracer } from "@langchain/core/tracers/base";
5
- import { Client } from "langsmith";
5
+ import { Client, RunTree, } from "langsmith";
6
6
  import { loadEvaluator } from "../evaluation/loader.js";
7
+ import { isOffTheShelfEvaluator, isCustomEvaluator, } from "./config.js";
7
8
  import { randomName } from "./name_generation.js";
8
9
  import { ProgressBar } from "./progress.js";
9
10
  class SingleRunIdExtractor {
@@ -111,6 +112,67 @@ class DynamicRunEvaluator {
111
112
  function isLLMStringEvaluator(evaluator) {
112
113
  return evaluator && typeof evaluator.evaluateStrings === "function";
113
114
  }
115
+ class RunnableTraceable extends Runnable {
116
+ constructor(fields) {
117
+ super(fields);
118
+ Object.defineProperty(this, "lc_serializable", {
119
+ enumerable: true,
120
+ configurable: true,
121
+ writable: true,
122
+ value: false
123
+ });
124
+ Object.defineProperty(this, "lc_namespace", {
125
+ enumerable: true,
126
+ configurable: true,
127
+ writable: true,
128
+ value: ["langchain_core", "runnables"]
129
+ });
130
+ Object.defineProperty(this, "func", {
131
+ enumerable: true,
132
+ configurable: true,
133
+ writable: true,
134
+ value: void 0
135
+ });
136
+ if (!isLangsmithTraceableFunction(fields.func)) {
137
+ throw new Error("RunnableTraceable requires a function that is wrapped in traceable higher-order function");
138
+ }
139
+ this.func = fields.func;
140
+ }
141
+ async invoke(input, options) {
142
+ const [config] = this._getOptionsList(options ?? {}, 1);
143
+ const callbackManager = await getCallbackManagerForConfig(config);
144
+ const partialConfig = "langsmith:traceable" in this.func
145
+ ? this.func["langsmith:traceable"]
146
+ : { name: "<lambda>" };
147
+ const runTree = new RunTree({
148
+ ...partialConfig,
149
+ parent_run: callbackManager?._parentRunId
150
+ ? new RunTree({ name: "<parent>", id: callbackManager?._parentRunId })
151
+ : undefined,
152
+ });
153
+ if (typeof input === "object" &&
154
+ input != null &&
155
+ Object.keys(input).length === 1) {
156
+ if ("args" in input && Array.isArray(input)) {
157
+ return (await this.func(runTree, ...input));
158
+ }
159
+ if ("input" in input &&
160
+ !(typeof input === "object" &&
161
+ input != null &&
162
+ !Array.isArray(input) &&
163
+ // eslint-disable-next-line no-instanceof/no-instanceof
164
+ !(input instanceof Date))) {
165
+ try {
166
+ return (await this.func(runTree, input.input));
167
+ }
168
+ catch (err) {
169
+ return (await this.func(runTree, input));
170
+ }
171
+ }
172
+ }
173
+ return (await this.func(runTree, input));
174
+ }
175
+ }
114
176
  /**
115
177
  * Wraps an off-the-shelf evaluator (loaded using loadEvaluator; of EvaluatorType[T])
116
178
  * and composes with a prepareData function so the user can prepare the trace and
@@ -210,7 +272,7 @@ class LoadedEvalConfig {
210
272
  }
211
273
  static async fromRunEvalConfig(config) {
212
274
  // Custom evaluators are applied "as-is"
213
- const customEvaluators = config?.customEvaluators?.map((evaluator) => {
275
+ const customEvaluators = (config?.customEvaluators ?? config.evaluators?.filter(isCustomEvaluator))?.map((evaluator) => {
214
276
  if (typeof evaluator === "function") {
215
277
  return new DynamicRunEvaluator(evaluator);
216
278
  }
@@ -218,7 +280,9 @@ class LoadedEvalConfig {
218
280
  return evaluator;
219
281
  }
220
282
  });
221
- const offTheShelfEvaluators = await Promise.all(config?.evaluators?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
283
+ const offTheShelfEvaluators = await Promise.all(config?.evaluators
284
+ ?.filter(isOffTheShelfEvaluator)
285
+ ?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
222
286
  return new LoadedEvalConfig((customEvaluators ?? []).concat(offTheShelfEvaluators ?? []));
223
287
  }
224
288
  }
@@ -246,7 +310,11 @@ const createWrappedModel = async (modelOrFactory) => {
246
310
  }
247
311
  catch (err) {
248
312
  // Otherwise, it's a custom UDF, and we'll wrap
249
- // in a lambda
313
+ // in a lambda or a traceable function
314
+ if (isLangsmithTraceableFunction(modelOrFactory)) {
315
+ const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
316
+ return () => wrappedModel;
317
+ }
250
318
  const wrappedModel = new RunnableLambda({ func: modelOrFactory });
251
319
  return () => wrappedModel;
252
320
  }
@@ -318,62 +386,10 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
318
386
  }
319
387
  return examples.map(({ inputs }) => inputs);
320
388
  };
321
- /**
322
- * Evaluates a given model or chain against a specified LangSmith dataset.
323
- *
324
- * This function fetches example records from the specified dataset,
325
- * runs the model or chain against each example, and returns the evaluation
326
- * results.
327
- *
328
- * @param chainOrFactory - A model or factory/constructor function to be evaluated. It can be a
329
- * Runnable instance, a factory function that returns a Runnable, or a user-defined
330
- * function or factory.
331
- *
332
- * @param datasetName - The name of the dataset against which the evaluation will be
333
- * performed. This dataset should already be defined and contain the relevant data
334
- * for evaluation.
335
- *
336
- * @param options - (Optional) Additional parameters for the evaluation process:
337
- * - `evaluationConfig` (RunEvalConfig): Configuration for the evaluation, including
338
- * standard and custom evaluators.
339
- * - `projectName` (string): Name of the project for logging and tracking.
340
- * - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
341
- * - `client` (Client): Client instance for LangChain service interaction.
342
- * - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
343
- *
344
- * @returns A promise that resolves to an `EvalResults` object. This object includes
345
- * detailed results of the evaluation, such as execution time, run IDs, and feedback
346
- * for each entry in the dataset.
347
- *
348
- * @example
349
- * ```typescript
350
- * // Example usage for evaluating a model on a dataset
351
- * async function evaluateModel() {
352
- * const chain = /* ...create your model or chain...*\//
353
- * const datasetName = 'example-dataset';
354
- * const client = new Client(/* ...config... *\//);
355
- *
356
- * const evaluationConfig = {
357
- * evaluators: [/* ...evaluators... *\//],
358
- * customEvaluators: [/* ...custom evaluators... *\//],
359
- * };
360
- *
361
- * const results = await runOnDataset(chain, datasetName, {
362
- * evaluationConfig,
363
- * client,
364
- * });
365
- *
366
- * console.log('Evaluation Results:', results);
367
- * }
368
- *
369
- * evaluateModel();
370
- * ```
371
- * In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
372
- * a dataset named 'example-dataset'. The evaluation process is configured using `RunEvalConfig`, which can
373
- * include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
374
- * The function returns the evaluation results, which can be logged or further processed as needed.
375
- */
376
- export const runOnDataset = async (chainOrFactory, datasetName, { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, }) => {
389
+ export async function runOnDataset(chainOrFactory, datasetName, options) {
390
+ const { evaluationConfig, projectName, projectMetadata, client, maxConcurrency, } = Array.isArray(options)
391
+ ? { evaluationConfig: { evaluators: options } }
392
+ : options ?? {};
377
393
  const wrappedModel = await createWrappedModel(chainOrFactory);
378
394
  const testClient = client ?? new Client();
379
395
  const testProjectName = projectName ?? randomName();
@@ -429,4 +445,7 @@ export const runOnDataset = async (chainOrFactory, datasetName, { evaluationConf
429
445
  results: evalResults ?? {},
430
446
  };
431
447
  return results;
432
- };
448
+ }
449
+ function isLangsmithTraceableFunction(x) {
450
+ return typeof x === "function" && "langsmith:traceable" in x;
451
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "langchain",
3
- "version": "0.1.18",
3
+ "version": "0.1.19-rc.1",
4
4
  "description": "Typescript bindings for langchain",
5
5
  "type": "module",
6
6
  "engines": {
@@ -1513,7 +1513,7 @@
1513
1513
  "js-yaml": "^4.1.0",
1514
1514
  "jsonpointer": "^5.0.1",
1515
1515
  "langchainhub": "~0.0.8",
1516
- "langsmith": "~0.0.59",
1516
+ "langsmith": "~0.1.1",
1517
1517
  "ml-distance": "^4.0.0",
1518
1518
  "openapi-types": "^12.1.3",
1519
1519
  "p-retry": "4",