langsmith 0.2.8 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.cjs CHANGED
@@ -172,7 +172,7 @@ class AutoBatchQueue {
172
172
  exports.AutoBatchQueue = AutoBatchQueue;
173
173
  // 20 MB
174
174
  exports.DEFAULT_BATCH_SIZE_LIMIT_BYTES = 20_971_520;
175
- const SERVER_INFO_REQUEST_TIMEOUT = 1000;
175
+ const SERVER_INFO_REQUEST_TIMEOUT = 2500;
176
176
  class Client {
177
177
  constructor(config = {}) {
178
178
  Object.defineProperty(this, "apiKey", {
package/dist/client.js CHANGED
@@ -144,7 +144,7 @@ export class AutoBatchQueue {
144
144
  }
145
145
  // 20 MB
146
146
  export const DEFAULT_BATCH_SIZE_LIMIT_BYTES = 20_971_520;
147
- const SERVER_INFO_REQUEST_TIMEOUT = 1000;
147
+ const SERVER_INFO_REQUEST_TIMEOUT = 2500;
148
148
  export class Client {
149
149
  constructor(config = {}) {
150
150
  Object.defineProperty(this, "apiKey", {
@@ -12,11 +12,9 @@ const error_js_1 = require("../utils/error.cjs");
12
12
  const _random_name_js_1 = require("./_random_name.cjs");
13
13
  const evaluator_js_1 = require("./evaluator.cjs");
14
14
  const uuid_1 = require("uuid");
15
- function evaluate(
16
- /**
17
- * The target system or function to evaluate.
18
- */
19
- target, options) {
15
+ const evaluate_comparative_js_1 = require("./evaluate_comparative.cjs");
16
+ // Implementation signature
17
+ function evaluate(target, options) {
20
18
  return _evaluate(target, options);
21
19
  }
22
20
  exports.evaluate = evaluate;
@@ -381,7 +379,7 @@ class _ExperimentManager {
381
379
  // Private methods
382
380
  /**
383
381
  * Run the target function or runnable on the examples.
384
- * @param {TargetT} target The target function or runnable to evaluate.
382
+ * @param {StandardTargetT} target The target function or runnable to evaluate.
385
383
  * @param options
386
384
  * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
387
385
  */
@@ -630,12 +628,32 @@ class ExperimentResults {
630
628
  }
631
629
  }
632
630
  async function _evaluate(target, fields) {
631
+ // Add check for comparative evaluation
632
+ if (Array.isArray(target)) {
633
+ const comparativeOptions = fields;
634
+ if (!comparativeOptions.evaluators) {
635
+ throw new Error("Evaluators are required for comparative evaluation");
636
+ }
637
+ return (0, evaluate_comparative_js_1.evaluateComparative)(target, {
638
+ evaluators: comparativeOptions.evaluators,
639
+ client: comparativeOptions.client,
640
+ metadata: comparativeOptions.metadata,
641
+ experimentPrefix: comparativeOptions.experimentPrefix,
642
+ description: comparativeOptions.description,
643
+ maxConcurrency: comparativeOptions.maxConcurrency,
644
+ loadNested: comparativeOptions.loadNested ?? false,
645
+ randomizeOrder: comparativeOptions.randomizeOrder ?? false,
646
+ });
647
+ }
633
648
  const client = fields.client ?? new index_js_1.Client();
634
649
  const runs = _isCallable(target) ? null : target;
650
+ const standardFields = fields;
635
651
  const [experiment_, newRuns] = await _resolveExperiment(fields.experiment ?? null, runs, client);
636
652
  let manager = await new _ExperimentManager({
637
- data: Array.isArray(fields.data) ? undefined : fields.data,
638
- examples: Array.isArray(fields.data) ? fields.data : undefined,
653
+ data: Array.isArray(standardFields.data) ? undefined : standardFields.data,
654
+ examples: Array.isArray(standardFields.data)
655
+ ? standardFields.data
656
+ : undefined,
639
657
  client,
640
658
  metadata: fields.metadata,
641
659
  experiment: experiment_ ?? fields.experimentPrefix,
@@ -647,13 +665,13 @@ async function _evaluate(target, fields) {
647
665
  maxConcurrency: fields.maxConcurrency,
648
666
  });
649
667
  }
650
- if (fields.evaluators) {
651
- manager = await manager.withEvaluators(fields.evaluators, {
668
+ if (standardFields.evaluators) {
669
+ manager = await manager.withEvaluators(standardFields.evaluators, {
652
670
  maxConcurrency: fields.maxConcurrency,
653
671
  });
654
672
  }
655
- if (fields.summaryEvaluators) {
656
- manager = await manager.withSummaryEvaluators(fields.summaryEvaluators);
673
+ if (standardFields.summaryEvaluators) {
674
+ manager = await manager.withSummaryEvaluators(standardFields.summaryEvaluators);
657
675
  }
658
676
  // Start consuming the results.
659
677
  const results = new ExperimentResults(manager);
@@ -743,6 +761,20 @@ async function wrapSummaryEvaluators(evaluators, optionsArray) {
743
761
  const evalName = evaluator.name || "BatchEvaluator";
744
762
  const wrapperInner = (runs, examples) => {
745
763
  const wrapperSuperInner = (0, traceable_js_1.traceable)((_runs_, _examples_) => {
764
+ // Check if the evaluator expects an object parameter
765
+ if (evaluator.length === 1) {
766
+ const inputs = examples.map((ex) => ex.inputs);
767
+ const outputs = runs.map((run) => run.outputs || {});
768
+ const referenceOutputs = examples.map((ex) => ex.outputs || {});
769
+ return Promise.resolve(evaluator({
770
+ runs,
771
+ examples,
772
+ inputs,
773
+ outputs,
774
+ referenceOutputs,
775
+ }));
776
+ }
777
+ // Otherwise use the traditional (runs, examples) signature
746
778
  return Promise.resolve(evaluator(runs, examples));
747
779
  }, { ...optionsArray, name: evalName });
748
780
  return Promise.resolve(wrapperSuperInner(`Runs[] (Length=${runs.length})`, `Examples[] (Length=${examples.length})`));
@@ -1,14 +1,51 @@
1
1
  import { Client } from "../index.js";
2
2
  import { Example, KVMap, Run, TracerSession } from "../schemas.js";
3
3
  import { EvaluationResult, EvaluationResults, RunEvaluator } from "./evaluator.js";
4
- export type TargetT<TInput = any, TOutput = KVMap> = ((input: TInput, config?: KVMap) => Promise<TOutput>) | ((input: TInput, config?: KVMap) => TOutput) | {
4
+ import { ComparisonEvaluationResults, ComparativeEvaluator } from "./evaluate_comparative.js";
5
+ type StandardTargetT<TInput = any, TOutput = KVMap> = ((input: TInput, config?: KVMap) => Promise<TOutput>) | ((input: TInput, config?: KVMap) => TOutput) | {
5
6
  invoke: (input: TInput, config?: KVMap) => TOutput;
6
7
  } | {
7
8
  invoke: (input: TInput, config?: KVMap) => Promise<TOutput>;
8
9
  };
10
+ type ComparativeTargetT = Array<string> | Array<Promise<ExperimentResults> | ExperimentResults>;
11
+ export type TargetT<TInput = any, TOutput = KVMap> = StandardTargetT<TInput, TOutput> | ComparativeTargetT;
9
12
  export type DataT = string | AsyncIterable<Example> | Example[];
10
- export type SummaryEvaluatorT = ((runs: Array<Run>, examples: Array<Example>) => Promise<EvaluationResult | EvaluationResults>) | ((runs: Array<Run>, examples: Array<Example>) => EvaluationResult | EvaluationResults);
11
- export type EvaluatorT = RunEvaluator | ((run: Run, example?: Example) => EvaluationResult | EvaluationResults) | ((run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResults>);
13
+ /** @deprecated Use object parameter version instead: (args: { runs, examples, inputs, outputs, referenceOutputs }) => ... */
14
+ type DeprecatedSyncSummaryEvaluator = (runs: Array<Run>, examples: Array<Example>) => EvaluationResult | EvaluationResults;
15
+ /** @deprecated Use object parameter version instead: (args: { runs, examples, inputs, outputs, referenceOutputs }) => ... */
16
+ type DeprecatedAsyncSummaryEvaluator = (runs: Array<Run>, examples: Array<Example>) => Promise<EvaluationResult | EvaluationResults>;
17
+ export type SummaryEvaluatorT = DeprecatedSyncSummaryEvaluator | DeprecatedAsyncSummaryEvaluator | ((args: {
18
+ runs: Array<Run>;
19
+ examples: Array<Example>;
20
+ inputs: Array<Record<string, any>>;
21
+ outputs: Array<Record<string, any>>;
22
+ referenceOutputs?: Array<Record<string, any>>;
23
+ }) => EvaluationResult | EvaluationResults) | ((args: {
24
+ runs: Array<Run>;
25
+ examples: Array<Example>;
26
+ inputs: Array<Record<string, any>>;
27
+ outputs: Array<Record<string, any>>;
28
+ referenceOutputs?: Array<Record<string, any>>;
29
+ }) => Promise<EvaluationResult | EvaluationResults>);
30
+ /** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */
31
+ type DeprecatedRunEvaluator = RunEvaluator;
32
+ /** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */
33
+ type DeprecatedFunctionEvaluator = (run: Run, example?: Example) => EvaluationResult | EvaluationResults;
34
+ /** @deprecated Use object parameter version instead: (args: { run, example, inputs, outputs, referenceOutputs }) => ... */
35
+ type DeprecatedAsyncFunctionEvaluator = (run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResults>;
36
+ export type EvaluatorT = DeprecatedRunEvaluator | DeprecatedFunctionEvaluator | DeprecatedAsyncFunctionEvaluator | ((args: {
37
+ run: Run;
38
+ example: Example;
39
+ inputs: Record<string, any>;
40
+ outputs: Record<string, any>;
41
+ referenceOutputs?: Record<string, any>;
42
+ }) => EvaluationResult | EvaluationResults) | ((args: {
43
+ run: Run;
44
+ example: Example;
45
+ inputs: Record<string, any>;
46
+ outputs: Record<string, any>;
47
+ referenceOutputs?: Record<string, any>;
48
+ }) => Promise<EvaluationResult | EvaluationResults>);
12
49
  interface _ForwardResults {
13
50
  run: Run;
14
51
  example: Example;
@@ -25,22 +62,7 @@ interface _ExperimentManagerArgs {
25
62
  numRepetitions?: number;
26
63
  _runsArray?: Run[];
27
64
  }
28
- export interface EvaluateOptions {
29
- /**
30
- * The dataset to evaluate on. Can be a dataset name, a list of
31
- * examples, or a generator of examples.
32
- */
33
- data: DataT;
34
- /**
35
- * A list of evaluators to run on each example.
36
- * @default undefined
37
- */
38
- evaluators?: Array<EvaluatorT>;
39
- /**
40
- * A list of summary evaluators to run on the entire dataset.
41
- * @default undefined
42
- */
43
- summaryEvaluators?: Array<SummaryEvaluatorT>;
65
+ type BaseEvaluateOptions = {
44
66
  /**
45
67
  * Metadata to attach to the experiment.
46
68
  * @default undefined
@@ -71,12 +93,42 @@ export interface EvaluateOptions {
71
93
  * @default 1
72
94
  */
73
95
  numRepetitions?: number;
96
+ };
97
+ export interface EvaluateOptions extends BaseEvaluateOptions {
98
+ /**
99
+ * A list of evaluators to run on each example.
100
+ * @default undefined
101
+ */
102
+ evaluators?: Array<EvaluatorT>;
103
+ /**
104
+ * A list of summary evaluators to run on the entire dataset.
105
+ * @default undefined
106
+ */
107
+ summaryEvaluators?: Array<SummaryEvaluatorT>;
108
+ /**
109
+ * The dataset to evaluate on. Can be a dataset name, a list of
110
+ * examples, or a generator of examples.
111
+ */
112
+ data: DataT;
74
113
  }
75
- export declare function evaluate(
76
- /**
77
- * The target system or function to evaluate.
78
- */
79
- target: TargetT, options: EvaluateOptions): Promise<ExperimentResults>;
114
+ export interface ComparativeEvaluateOptions extends BaseEvaluateOptions {
115
+ /**
116
+ * A list of evaluators to run on each example.
117
+ */
118
+ evaluators: Array<ComparativeEvaluator>;
119
+ /**
120
+ * Whether to load all child runs for the experiment.
121
+ * @default false
122
+ */
123
+ loadNested?: boolean;
124
+ /**
125
+ * Randomize the order of outputs for each evaluation
126
+ * @default false
127
+ */
128
+ randomizeOrder?: boolean;
129
+ }
130
+ export declare function evaluate(target: ComparativeTargetT, options: ComparativeEvaluateOptions): Promise<ComparisonEvaluationResults>;
131
+ export declare function evaluate(target: StandardTargetT, options: EvaluateOptions): Promise<ExperimentResults>;
80
132
  export interface ExperimentResultRow {
81
133
  run: Run;
82
134
  example: Example;
@@ -114,7 +166,7 @@ export declare class _ExperimentManager {
114
166
  _getProject(firstExample: Example): Promise<TracerSession>;
115
167
  protected _printExperimentStart(): Promise<void>;
116
168
  start(): Promise<_ExperimentManager>;
117
- withPredictions(target: TargetT, options?: {
169
+ withPredictions(target: StandardTargetT, options?: {
118
170
  maxConcurrency?: number;
119
171
  }): Promise<_ExperimentManager>;
120
172
  withEvaluators(evaluators: Array<EvaluatorT | RunEvaluator>, options?: {
@@ -125,11 +177,11 @@ export declare class _ExperimentManager {
125
177
  getSummaryScores(): Promise<EvaluationResults>;
126
178
  /**
127
179
  * Run the target function or runnable on the examples.
128
- * @param {TargetT} target The target function or runnable to evaluate.
180
+ * @param {StandardTargetT} target The target function or runnable to evaluate.
129
181
  * @param options
130
182
  * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
131
183
  */
132
- _predict(target: TargetT, options?: {
184
+ _predict(target: StandardTargetT, options?: {
133
185
  maxConcurrency?: number;
134
186
  }): AsyncGenerator<_ForwardResults>;
135
187
  _runEvaluators(evaluators: Array<RunEvaluator>, currentResults: ExperimentResultRow, fields: {
@@ -9,11 +9,9 @@ import { printErrorStackTrace } from "../utils/error.js";
9
9
  import { randomName } from "./_random_name.js";
10
10
  import { runEvaluator, } from "./evaluator.js";
11
11
  import { v4 as uuidv4 } from "uuid";
12
- export function evaluate(
13
- /**
14
- * The target system or function to evaluate.
15
- */
16
- target, options) {
12
+ import { evaluateComparative, } from "./evaluate_comparative.js";
13
+ // Implementation signature
14
+ export function evaluate(target, options) {
17
15
  return _evaluate(target, options);
18
16
  }
19
17
  /**
@@ -377,7 +375,7 @@ export class _ExperimentManager {
377
375
  // Private methods
378
376
  /**
379
377
  * Run the target function or runnable on the examples.
380
- * @param {TargetT} target The target function or runnable to evaluate.
378
+ * @param {StandardTargetT} target The target function or runnable to evaluate.
381
379
  * @param options
382
380
  * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
383
381
  */
@@ -625,12 +623,32 @@ class ExperimentResults {
625
623
  }
626
624
  }
627
625
  async function _evaluate(target, fields) {
626
+ // Add check for comparative evaluation
627
+ if (Array.isArray(target)) {
628
+ const comparativeOptions = fields;
629
+ if (!comparativeOptions.evaluators) {
630
+ throw new Error("Evaluators are required for comparative evaluation");
631
+ }
632
+ return evaluateComparative(target, {
633
+ evaluators: comparativeOptions.evaluators,
634
+ client: comparativeOptions.client,
635
+ metadata: comparativeOptions.metadata,
636
+ experimentPrefix: comparativeOptions.experimentPrefix,
637
+ description: comparativeOptions.description,
638
+ maxConcurrency: comparativeOptions.maxConcurrency,
639
+ loadNested: comparativeOptions.loadNested ?? false,
640
+ randomizeOrder: comparativeOptions.randomizeOrder ?? false,
641
+ });
642
+ }
628
643
  const client = fields.client ?? new Client();
629
644
  const runs = _isCallable(target) ? null : target;
645
+ const standardFields = fields;
630
646
  const [experiment_, newRuns] = await _resolveExperiment(fields.experiment ?? null, runs, client);
631
647
  let manager = await new _ExperimentManager({
632
- data: Array.isArray(fields.data) ? undefined : fields.data,
633
- examples: Array.isArray(fields.data) ? fields.data : undefined,
648
+ data: Array.isArray(standardFields.data) ? undefined : standardFields.data,
649
+ examples: Array.isArray(standardFields.data)
650
+ ? standardFields.data
651
+ : undefined,
634
652
  client,
635
653
  metadata: fields.metadata,
636
654
  experiment: experiment_ ?? fields.experimentPrefix,
@@ -642,13 +660,13 @@ async function _evaluate(target, fields) {
642
660
  maxConcurrency: fields.maxConcurrency,
643
661
  });
644
662
  }
645
- if (fields.evaluators) {
646
- manager = await manager.withEvaluators(fields.evaluators, {
663
+ if (standardFields.evaluators) {
664
+ manager = await manager.withEvaluators(standardFields.evaluators, {
647
665
  maxConcurrency: fields.maxConcurrency,
648
666
  });
649
667
  }
650
- if (fields.summaryEvaluators) {
651
- manager = await manager.withSummaryEvaluators(fields.summaryEvaluators);
668
+ if (standardFields.summaryEvaluators) {
669
+ manager = await manager.withSummaryEvaluators(standardFields.summaryEvaluators);
652
670
  }
653
671
  // Start consuming the results.
654
672
  const results = new ExperimentResults(manager);
@@ -738,6 +756,20 @@ async function wrapSummaryEvaluators(evaluators, optionsArray) {
738
756
  const evalName = evaluator.name || "BatchEvaluator";
739
757
  const wrapperInner = (runs, examples) => {
740
758
  const wrapperSuperInner = traceable((_runs_, _examples_) => {
759
+ // Check if the evaluator expects an object parameter
760
+ if (evaluator.length === 1) {
761
+ const inputs = examples.map((ex) => ex.inputs);
762
+ const outputs = runs.map((run) => run.outputs || {});
763
+ const referenceOutputs = examples.map((ex) => ex.outputs || {});
764
+ return Promise.resolve(evaluator({
765
+ runs,
766
+ examples,
767
+ inputs,
768
+ outputs,
769
+ referenceOutputs,
770
+ }));
771
+ }
772
+ // Otherwise use the traditional (runs, examples) signature
741
773
  return Promise.resolve(evaluator(runs, examples));
742
774
  }, { ...optionsArray, name: evalName });
743
775
  return Promise.resolve(wrapperSuperInner(`Runs[] (Length=${runs.length})`, `Examples[] (Length=${examples.length})`));
@@ -162,7 +162,16 @@ async function evaluateComparative(experiments, options) {
162
162
  const caller = new async_caller_js_1.AsyncCaller({ maxConcurrency: options.maxConcurrency });
163
163
  async function evaluateAndSubmitFeedback(runs, example, evaluator) {
164
164
  const expectedRunIds = new Set(runs.map((r) => r.id));
165
- const result = await evaluator(options.randomizeOrder ? (0, shuffle_js_1.shuffle)(runs) : runs, example);
165
+ // Check if evaluator expects an object parameter
166
+ const result = evaluator.length === 1
167
+ ? await evaluator({
168
+ runs: options.randomizeOrder ? (0, shuffle_js_1.shuffle)(runs) : runs,
169
+ example,
170
+ inputs: example.inputs,
171
+ outputs: runs.map((run) => run.outputs || {}),
172
+ referenceOutputs: example.outputs || {},
173
+ })
174
+ : await evaluator(runs, example);
166
175
  for (const [runId, score] of Object.entries(result.scores)) {
167
176
  // validate if the run id
168
177
  if (!expectedRunIds.has(runId)) {
@@ -178,7 +187,15 @@ async function evaluateComparative(experiments, options) {
178
187
  }
179
188
  const tracedEvaluators = options.evaluators.map((evaluator) => (0, traceable_js_1.traceable)(async (runs, example) => {
180
189
  const evaluatorRun = (0, traceable_js_1.getCurrentRunTree)();
181
- const result = await evaluator(runs, example);
190
+ const result = evaluator.length === 1
191
+ ? await evaluator({
192
+ runs: options.randomizeOrder ? (0, shuffle_js_1.shuffle)(runs) : runs,
193
+ example,
194
+ inputs: example.inputs,
195
+ outputs: runs.map((run) => run.outputs || {}),
196
+ referenceOutputs: example.outputs || {},
197
+ })
198
+ : await evaluator(runs, example);
182
199
  // sanitise the payload before sending to LangSmith
183
200
  evaluatorRun.inputs = { runs: runs, example: example };
184
201
  evaluatorRun.outputs = result;
@@ -2,11 +2,21 @@ import { Client } from "../index.js";
2
2
  import { ComparisonEvaluationResult as ComparisonEvaluationResultRow, Example, Run } from "../schemas.js";
3
3
  import { evaluate } from "./index.js";
4
4
  type ExperimentResults = Awaited<ReturnType<typeof evaluate>>;
5
+ /** @deprecated Use ComparativeEvaluatorNew instead: (args: { runs, example, inputs, outputs, referenceOutputs }) => ... */
6
+ export type _ComparativeEvaluatorLegacy = (runs: Run[], example: Example) => ComparisonEvaluationResultRow | Promise<ComparisonEvaluationResultRow>;
7
+ export type _ComparativeEvaluator = (args: {
8
+ runs: Run[];
9
+ example: Example;
10
+ inputs: Record<string, any>;
11
+ outputs: Record<string, any>[];
12
+ referenceOutputs?: Record<string, any>;
13
+ }) => ComparisonEvaluationResultRow | Promise<ComparisonEvaluationResultRow>;
14
+ export type ComparativeEvaluator = _ComparativeEvaluatorLegacy | _ComparativeEvaluator;
5
15
  export interface EvaluateComparativeOptions {
6
16
  /**
7
17
  * A list of evaluators to use for comparative evaluation.
8
18
  */
9
- evaluators: Array<(runs: Run[], example: Example) => ComparisonEvaluationResultRow | Promise<ComparisonEvaluationResultRow>>;
19
+ evaluators: Array<ComparativeEvaluator>;
10
20
  /**
11
21
  * Randomize the order of outputs for each evaluation
12
22
  * @default false
@@ -156,7 +156,16 @@ export async function evaluateComparative(experiments, options) {
156
156
  const caller = new AsyncCaller({ maxConcurrency: options.maxConcurrency });
157
157
  async function evaluateAndSubmitFeedback(runs, example, evaluator) {
158
158
  const expectedRunIds = new Set(runs.map((r) => r.id));
159
- const result = await evaluator(options.randomizeOrder ? shuffle(runs) : runs, example);
159
+ // Check if evaluator expects an object parameter
160
+ const result = evaluator.length === 1
161
+ ? await evaluator({
162
+ runs: options.randomizeOrder ? shuffle(runs) : runs,
163
+ example,
164
+ inputs: example.inputs,
165
+ outputs: runs.map((run) => run.outputs || {}),
166
+ referenceOutputs: example.outputs || {},
167
+ })
168
+ : await evaluator(runs, example);
160
169
  for (const [runId, score] of Object.entries(result.scores)) {
161
170
  // validate if the run id
162
171
  if (!expectedRunIds.has(runId)) {
@@ -172,7 +181,15 @@ export async function evaluateComparative(experiments, options) {
172
181
  }
173
182
  const tracedEvaluators = options.evaluators.map((evaluator) => traceable(async (runs, example) => {
174
183
  const evaluatorRun = getCurrentRunTree();
175
- const result = await evaluator(runs, example);
184
+ const result = evaluator.length === 1
185
+ ? await evaluator({
186
+ runs: options.randomizeOrder ? shuffle(runs) : runs,
187
+ example,
188
+ inputs: example.inputs,
189
+ outputs: runs.map((run) => run.outputs || {}),
190
+ referenceOutputs: example.outputs || {},
191
+ })
192
+ : await evaluator(runs, example);
176
193
  // sanitise the payload before sending to LangSmith
177
194
  evaluatorRun.inputs = { runs: runs, example: example };
178
195
  evaluatorRun.outputs = result;
@@ -16,7 +16,14 @@ class DynamicRunEvaluator {
16
16
  });
17
17
  this.func = ((input) => {
18
18
  const { run, example } = input.langSmithRunAndExample;
19
- return evaluator(run, example);
19
+ return evaluator({
20
+ ...run,
21
+ run,
22
+ example,
23
+ inputs: example?.inputs,
24
+ outputs: run?.outputs,
25
+ referenceOutputs: example?.outputs,
26
+ }, example);
20
27
  });
21
28
  }
22
29
  isEvaluationResults(x) {
@@ -72,7 +72,19 @@ export type EvaluationResults = {
72
72
  export interface RunEvaluator {
73
73
  evaluateRun(run: Run, example?: Example, options?: Partial<RunTreeConfig>): Promise<EvaluationResult | EvaluationResults>;
74
74
  }
75
- export type RunEvaluatorLike = ((run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResults>) | ((run: Run, example?: Example) => EvaluationResult | EvaluationResults);
75
+ export type RunEvaluatorLike = ((run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResults>) | ((run: Run, example?: Example) => EvaluationResult | EvaluationResults) | ((run: Run, example: Example) => Promise<EvaluationResult | EvaluationResults>) | ((run: Run, example: Example) => EvaluationResult | EvaluationResults) | ((args: {
76
+ run: Run;
77
+ example: Example;
78
+ inputs: Record<string, any>;
79
+ outputs: Record<string, any>;
80
+ referenceOutputs?: Record<string, any>;
81
+ }) => EvaluationResult | EvaluationResults) | ((args: {
82
+ run: Run;
83
+ example: Example;
84
+ inputs: Record<string, any>;
85
+ outputs: Record<string, any>;
86
+ referenceOutputs?: Record<string, any>;
87
+ }) => Promise<EvaluationResult | EvaluationResults>);
76
88
  /**
77
89
  * Wraps an evaluator function + implements the RunEvaluator interface.
78
90
  */
@@ -13,7 +13,14 @@ export class DynamicRunEvaluator {
13
13
  });
14
14
  this.func = ((input) => {
15
15
  const { run, example } = input.langSmithRunAndExample;
16
- return evaluator(run, example);
16
+ return evaluator({
17
+ ...run,
18
+ run,
19
+ example,
20
+ inputs: example?.inputs,
21
+ outputs: run?.outputs,
22
+ referenceOutputs: example?.outputs,
23
+ }, example);
17
24
  });
18
25
  }
19
26
  isEvaluationResults(x) {
package/dist/index.cjs CHANGED
@@ -8,4 +8,4 @@ Object.defineProperty(exports, "RunTree", { enumerable: true, get: function () {
8
8
  var fetch_js_1 = require("./singletons/fetch.cjs");
9
9
  Object.defineProperty(exports, "overrideFetchImplementation", { enumerable: true, get: function () { return fetch_js_1.overrideFetchImplementation; } });
10
10
  // Update using yarn bump-version
11
- exports.__version__ = "0.2.8";
11
+ exports.__version__ = "0.2.10";
package/dist/index.d.ts CHANGED
@@ -2,4 +2,4 @@ export { Client, type ClientConfig, type LangSmithTracingClientInterface, } from
2
2
  export type { Dataset, Example, TracerSession, Run, Feedback, RetrieverOutput, } from "./schemas.js";
3
3
  export { RunTree, type RunTreeConfig } from "./run_trees.js";
4
4
  export { overrideFetchImplementation } from "./singletons/fetch.js";
5
- export declare const __version__ = "0.2.8";
5
+ export declare const __version__ = "0.2.10";
package/dist/index.js CHANGED
@@ -2,4 +2,4 @@ export { Client, } from "./client.js";
2
2
  export { RunTree } from "./run_trees.js";
3
3
  export { overrideFetchImplementation } from "./singletons/fetch.js";
4
4
  // Update using yarn bump-version
5
- export const __version__ = "0.2.8";
5
+ export const __version__ = "0.2.10";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "langsmith",
3
- "version": "0.2.8",
3
+ "version": "0.2.10",
4
4
  "description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
5
5
  "packageManager": "yarn@1.22.19",
6
6
  "files": [