langsmith 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +1 -1
  2. package/dist/client.cjs +71 -31
  3. package/dist/client.d.ts +7 -3
  4. package/dist/client.js +48 -8
  5. package/dist/evaluation/_random_name.cjs +730 -0
  6. package/dist/evaluation/_random_name.d.ts +5 -0
  7. package/dist/evaluation/_random_name.js +726 -0
  8. package/dist/evaluation/_runner.cjs +709 -0
  9. package/dist/evaluation/_runner.d.ts +158 -0
  10. package/dist/evaluation/_runner.js +705 -0
  11. package/dist/evaluation/evaluator.cjs +86 -0
  12. package/dist/evaluation/evaluator.d.ts +31 -27
  13. package/dist/evaluation/evaluator.js +83 -1
  14. package/dist/evaluation/index.cjs +3 -1
  15. package/dist/evaluation/index.d.ts +1 -0
  16. package/dist/evaluation/index.js +1 -0
  17. package/dist/index.cjs +1 -1
  18. package/dist/index.d.ts +1 -1
  19. package/dist/index.js +1 -1
  20. package/dist/run_trees.cjs +4 -4
  21. package/dist/run_trees.d.ts +2 -1
  22. package/dist/run_trees.js +4 -4
  23. package/dist/schemas.d.ts +22 -1
  24. package/dist/traceable.cjs +237 -62
  25. package/dist/traceable.d.ts +7 -3
  26. package/dist/traceable.js +235 -61
  27. package/dist/utils/_git.cjs +72 -0
  28. package/dist/utils/_git.d.ts +14 -0
  29. package/dist/utils/_git.js +67 -0
  30. package/dist/utils/_uuid.cjs +33 -0
  31. package/dist/utils/_uuid.d.ts +1 -0
  32. package/dist/utils/_uuid.js +6 -0
  33. package/dist/utils/async_caller.cjs +17 -9
  34. package/dist/utils/async_caller.js +17 -9
  35. package/dist/utils/atee.cjs +24 -0
  36. package/dist/utils/atee.d.ts +1 -0
  37. package/dist/utils/atee.js +20 -0
  38. package/dist/wrappers/openai.cjs +53 -74
  39. package/dist/wrappers/openai.d.ts +10 -11
  40. package/dist/wrappers/openai.js +53 -74
  41. package/package.json +4 -4
@@ -1,2 +1,88 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.runEvaluator = exports.DynamicRunEvaluator = void 0;
4
+ const uuid_1 = require("uuid");
5
+ const traceable_js_1 = require("../traceable.cjs");
6
+ /**
7
+ * Wraps an evaluator function + implements the RunEvaluator interface.
8
+ */
9
+ class DynamicRunEvaluator {
10
+ constructor(evaluator) {
11
+ Object.defineProperty(this, "func", {
12
+ enumerable: true,
13
+ configurable: true,
14
+ writable: true,
15
+ value: void 0
16
+ });
17
+ const wrappedFunc = (input) => {
18
+ const runAndExample = input.langSmithRunAndExample;
19
+ return evaluator(...Object.values(runAndExample));
20
+ };
21
+ this.func = wrappedFunc;
22
+ }
23
+ coerceEvaluationResults(results, sourceRunId) {
24
+ if ("results" in results) {
25
+ throw new Error("EvaluationResults not supported yet.");
26
+ }
27
+ return this.coerceEvaluationResult(results, sourceRunId, true);
28
+ }
29
+ coerceEvaluationResult(result, sourceRunId, allowNoKey = false) {
30
+ if ("key" in result) {
31
+ if (!result.sourceRunId) {
32
+ result.sourceRunId = sourceRunId;
33
+ }
34
+ return result;
35
+ }
36
+ if (!("key" in result)) {
37
+ if (allowNoKey) {
38
+ result["key"] = this.func.name;
39
+ }
40
+ }
41
+ return {
42
+ sourceRunId,
43
+ ...result,
44
+ };
45
+ }
46
+ /**
47
+ * Evaluates a run with an optional example and returns the evaluation result.
48
+ * @param run The run to evaluate.
49
+ * @param example The optional example to use for evaluation.
50
+ * @returns A promise that extracts to the evaluation result.
51
+ */
52
+ async evaluateRun(run, example, options) {
53
+ const sourceRunId = (0, uuid_1.v4)();
54
+ const metadata = {
55
+ targetRunId: run.id,
56
+ };
57
+ if ("session_id" in run) {
58
+ metadata["experiment"] = run.session_id;
59
+ }
60
+ const wrappedTraceableFunc = (0, traceable_js_1.wrapFunctionAndEnsureTraceable)(this.func, options || {}, "evaluator");
61
+ // Pass data via `langSmithRunAndExample` key to avoid conflicts with other
62
+ // inputs. This key is extracted in the wrapped function, with `run` and
63
+ // `example` passed to evaluator function as arguments.
64
+ const langSmithRunAndExample = {
65
+ run,
66
+ example,
67
+ };
68
+ const result = (await wrappedTraceableFunc({ langSmithRunAndExample }, {
69
+ metadata,
70
+ }));
71
+ // Check the one required property of EvaluationResult since 'instanceof' is not possible
72
+ if ("key" in result) {
73
+ if (!result.sourceRunId) {
74
+ result.sourceRunId = sourceRunId;
75
+ }
76
+ return result;
77
+ }
78
+ if (typeof result !== "object") {
79
+ throw new Error("Evaluator function must return an object.");
80
+ }
81
+ return this.coerceEvaluationResults(result, sourceRunId);
82
+ }
83
+ }
84
+ exports.DynamicRunEvaluator = DynamicRunEvaluator;
85
+ function runEvaluator(func) {
86
+ return new DynamicRunEvaluator(func);
87
+ }
88
+ exports.runEvaluator = runEvaluator;
@@ -1,4 +1,5 @@
1
- import { Example, Run, ScoreType, ValueType } from "../schemas.js";
1
+ import { Example, FeedbackConfig, Run, ScoreType, ValueType } from "../schemas.js";
2
+ import { RunTreeConfig } from "../run_trees.js";
2
3
  /**
3
4
  * Represents a categorical class.
4
5
  */
@@ -12,31 +13,6 @@ export type Category = {
12
13
  */
13
14
  label: string;
14
15
  };
15
- /**
16
- * Configuration for feedback.
17
- */
18
- export type FeedbackConfig = {
19
- /**
20
- * The type of feedback.
21
- * - "continuous": Feedback with a continuous numeric.
22
- * - "categorical": Feedback with a categorical value (classes)
23
- * - "freeform": Feedback with a freeform text value (notes).
24
- */
25
- type: "continuous" | "categorical" | "freeform";
26
- /**
27
- * The minimum value for continuous feedback.
28
- */
29
- min?: number;
30
- /**
31
- * The maximum value for continuous feedback.
32
- */
33
- max?: number;
34
- /**
35
- * The categories for categorical feedback.
36
- * Each category can be a string or an object with additional properties.
37
- */
38
- categories?: (Category | Record<string, unknown>)[];
39
- };
40
16
  /**
41
17
  * Represents the result of an evaluation.
42
18
  */
@@ -83,6 +59,34 @@ export type EvaluationResult = {
83
59
  */
84
60
  feedbackConfig?: FeedbackConfig;
85
61
  };
62
+ /**
63
+ * Batch evaluation results, if your evaluator wishes
64
+ * to return multiple scores.
65
+ */
66
+ export type EvaluationResults = {
67
+ /**
68
+ * The evaluation results.
69
+ */
70
+ results: Array<EvaluationResult>;
71
+ };
86
72
  export interface RunEvaluator {
87
- evaluateRun(run: Run, example?: Example): Promise<EvaluationResult>;
73
+ evaluateRun(run: Run, example?: Example, options?: Partial<RunTreeConfig>): Promise<EvaluationResult>;
74
+ }
75
+ export type RunEvaluatorLike = ((run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResults>) | ((run: Run, example?: Example) => EvaluationResult | EvaluationResults);
76
+ /**
77
+ * Wraps an evaluator function + implements the RunEvaluator interface.
78
+ */
79
+ export declare class DynamicRunEvaluator<Func extends (...args: any[]) => any> implements RunEvaluator {
80
+ func: Func;
81
+ constructor(evaluator: Func);
82
+ private coerceEvaluationResults;
83
+ private coerceEvaluationResult;
84
+ /**
85
+ * Evaluates a run with an optional example and returns the evaluation result.
86
+ * @param run The run to evaluate.
87
+ * @param example The optional example to use for evaluation.
88
+ * @returns A promise that extracts to the evaluation result.
89
+ */
90
+ evaluateRun(run: Run, example?: Example, options?: Partial<RunTreeConfig>): Promise<EvaluationResult>;
88
91
  }
92
+ export declare function runEvaluator(func: RunEvaluatorLike): RunEvaluator;
@@ -1 +1,83 @@
1
- export {};
1
+ import { v4 as uuidv4 } from "uuid";
2
+ import { wrapFunctionAndEnsureTraceable } from "../traceable.js";
3
+ /**
4
+ * Wraps an evaluator function + implements the RunEvaluator interface.
5
+ */
6
+ export class DynamicRunEvaluator {
7
+ constructor(evaluator) {
8
+ Object.defineProperty(this, "func", {
9
+ enumerable: true,
10
+ configurable: true,
11
+ writable: true,
12
+ value: void 0
13
+ });
14
+ const wrappedFunc = (input) => {
15
+ const runAndExample = input.langSmithRunAndExample;
16
+ return evaluator(...Object.values(runAndExample));
17
+ };
18
+ this.func = wrappedFunc;
19
+ }
20
+ coerceEvaluationResults(results, sourceRunId) {
21
+ if ("results" in results) {
22
+ throw new Error("EvaluationResults not supported yet.");
23
+ }
24
+ return this.coerceEvaluationResult(results, sourceRunId, true);
25
+ }
26
+ coerceEvaluationResult(result, sourceRunId, allowNoKey = false) {
27
+ if ("key" in result) {
28
+ if (!result.sourceRunId) {
29
+ result.sourceRunId = sourceRunId;
30
+ }
31
+ return result;
32
+ }
33
+ if (!("key" in result)) {
34
+ if (allowNoKey) {
35
+ result["key"] = this.func.name;
36
+ }
37
+ }
38
+ return {
39
+ sourceRunId,
40
+ ...result,
41
+ };
42
+ }
43
+ /**
44
+ * Evaluates a run with an optional example and returns the evaluation result.
45
+ * @param run The run to evaluate.
46
+ * @param example The optional example to use for evaluation.
47
+ * @returns A promise that extracts to the evaluation result.
48
+ */
49
+ async evaluateRun(run, example, options) {
50
+ const sourceRunId = uuidv4();
51
+ const metadata = {
52
+ targetRunId: run.id,
53
+ };
54
+ if ("session_id" in run) {
55
+ metadata["experiment"] = run.session_id;
56
+ }
57
+ const wrappedTraceableFunc = wrapFunctionAndEnsureTraceable(this.func, options || {}, "evaluator");
58
+ // Pass data via `langSmithRunAndExample` key to avoid conflicts with other
59
+ // inputs. This key is extracted in the wrapped function, with `run` and
60
+ // `example` passed to evaluator function as arguments.
61
+ const langSmithRunAndExample = {
62
+ run,
63
+ example,
64
+ };
65
+ const result = (await wrappedTraceableFunc({ langSmithRunAndExample }, {
66
+ metadata,
67
+ }));
68
+ // Check the one required property of EvaluationResult since 'instanceof' is not possible
69
+ if ("key" in result) {
70
+ if (!result.sourceRunId) {
71
+ result.sourceRunId = sourceRunId;
72
+ }
73
+ return result;
74
+ }
75
+ if (typeof result !== "object") {
76
+ throw new Error("Evaluator function must return an object.");
77
+ }
78
+ return this.coerceEvaluationResults(result, sourceRunId);
79
+ }
80
+ }
81
+ export function runEvaluator(func) {
82
+ return new DynamicRunEvaluator(func);
83
+ }
@@ -1,5 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.StringEvaluator = void 0;
3
+ exports.evaluate = exports.StringEvaluator = void 0;
4
4
  var string_evaluator_js_1 = require("./string_evaluator.cjs");
5
5
  Object.defineProperty(exports, "StringEvaluator", { enumerable: true, get: function () { return string_evaluator_js_1.StringEvaluator; } });
6
+ var _runner_js_1 = require("./_runner.cjs");
7
+ Object.defineProperty(exports, "evaluate", { enumerable: true, get: function () { return _runner_js_1.evaluate; } });
@@ -1,2 +1,3 @@
1
1
  export { RunEvaluator, EvaluationResult } from "./evaluator.js";
2
2
  export { StringEvaluator, GradingFunctionParams, GradingFunctionResult, } from "./string_evaluator.js";
3
+ export { evaluate, type EvaluateOptions } from "./_runner.js";
@@ -1 +1,2 @@
1
1
  export { StringEvaluator, } from "./string_evaluator.js";
2
+ export { evaluate } from "./_runner.js";
package/dist/index.cjs CHANGED
@@ -6,4 +6,4 @@ Object.defineProperty(exports, "Client", { enumerable: true, get: function () {
6
6
  var run_trees_js_1 = require("./run_trees.cjs");
7
7
  Object.defineProperty(exports, "RunTree", { enumerable: true, get: function () { return run_trees_js_1.RunTree; } });
8
8
  // Update using yarn bump-version
9
- exports.__version__ = "0.1.20";
9
+ exports.__version__ = "0.1.22";
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  export { Client } from "./client.js";
2
2
  export type { Dataset, Example, TracerSession, Run, Feedback, } from "./schemas.js";
3
3
  export { RunTree, type RunTreeConfig } from "./run_trees.js";
4
- export declare const __version__ = "0.1.20";
4
+ export declare const __version__ = "0.1.22";
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
1
  export { Client } from "./client.js";
2
2
  export { RunTree } from "./run_trees.js";
3
3
  // Update using yarn bump-version
4
- export const __version__ = "0.1.20";
4
+ export const __version__ = "0.1.22";
@@ -232,7 +232,7 @@ class RunTree {
232
232
  extra: {},
233
233
  };
234
234
  }
235
- async createChild(config) {
235
+ createChild(config) {
236
236
  const child = new RunTree({
237
237
  ...config,
238
238
  parent_run: this,
@@ -243,9 +243,9 @@ class RunTree {
243
243
  return child;
244
244
  }
245
245
  async end(outputs, error, endTime = Date.now()) {
246
- this.outputs = outputs;
247
- this.error = error;
248
- this.end_time = endTime;
246
+ this.outputs = this.outputs ?? outputs;
247
+ this.error = this.error ?? error;
248
+ this.end_time = this.end_time ?? endTime;
249
249
  }
250
250
  async _convertToCreate(run, excludeChildRuns = true) {
251
251
  const runExtra = run.extra ?? {};
@@ -20,6 +20,7 @@ export interface RunTreeConfig {
20
20
  outputs?: KVMap;
21
21
  reference_example_id?: string;
22
22
  client?: Client;
23
+ on_end?: (runTree: RunTree) => void;
23
24
  }
24
25
  export interface RunnableConfigLike {
25
26
  /**
@@ -65,7 +66,7 @@ export declare class RunTree implements BaseRun {
65
66
  metadata?: KVMap;
66
67
  }): RunTree;
67
68
  private static getDefaultConfig;
68
- createChild(config: RunTreeConfig): Promise<RunTree>;
69
+ createChild(config: RunTreeConfig): RunTree;
69
70
  end(outputs?: KVMap, error?: string, endTime?: number): Promise<void>;
70
71
  private _convertToCreate;
71
72
  postRun(excludeChildRuns?: boolean): Promise<void>;
package/dist/run_trees.js CHANGED
@@ -205,7 +205,7 @@ export class RunTree {
205
205
  extra: {},
206
206
  };
207
207
  }
208
- async createChild(config) {
208
+ createChild(config) {
209
209
  const child = new RunTree({
210
210
  ...config,
211
211
  parent_run: this,
@@ -216,9 +216,9 @@ export class RunTree {
216
216
  return child;
217
217
  }
218
218
  async end(outputs, error, endTime = Date.now()) {
219
- this.outputs = outputs;
220
- this.error = error;
221
- this.end_time = endTime;
219
+ this.outputs = this.outputs ?? outputs;
220
+ this.error = this.error ?? error;
221
+ this.end_time = this.end_time ?? endTime;
222
222
  }
223
223
  async _convertToCreate(run, excludeChildRuns = true) {
224
224
  const runExtra = run.extra ?? {};
package/dist/schemas.d.ts CHANGED
@@ -5,6 +5,9 @@ export interface TracerSession {
5
5
  end_time?: number;
6
6
  description?: string;
7
7
  name?: string;
8
+ /** Extra metadata for the project. */
9
+ extra?: KVMap;
10
+ reference_dataset_id?: string;
8
11
  }
9
12
  export interface TracerSessionResult extends TracerSession {
10
13
  run_count?: number;
@@ -15,7 +18,6 @@ export interface TracerSessionResult extends TracerSession {
15
18
  completion_tokens?: number;
16
19
  last_run_start_time?: number;
17
20
  feedback_stats?: Record<string, unknown>;
18
- reference_dataset_id?: string;
19
21
  run_facets?: KVMap[];
20
22
  }
21
23
  export type KVMap = Record<string, any>;
@@ -78,6 +80,14 @@ export interface BaseRun {
78
80
  */
79
81
  dotted_order?: string;
80
82
  }
83
+ type S3URL = {
84
+ ROOT: {
85
+ /** A pre-signed URL */
86
+ presigned_url: string;
87
+ /** The S3 path to the object in storage */
88
+ s3_url: string;
89
+ };
90
+ };
81
91
  /**
82
92
  * Describes properties of a run when loaded from the database.
83
93
  * Extends the BaseRun interface.
@@ -111,6 +121,10 @@ export interface Run extends BaseRun {
111
121
  parent_run_ids?: string[];
112
122
  /** Whether the run is included in a dataset. */
113
123
  in_dataset?: boolean;
124
+ /** The output S3 URLs */
125
+ outputs_s3_urls?: S3URL;
126
+ /** The input S3 URLs */
127
+ inputs_s3_urls?: S3URL;
114
128
  }
115
129
  export interface RunCreate extends BaseRun {
116
130
  revision_id?: string;
@@ -235,6 +249,9 @@ export interface FeedbackCategory {
235
249
  export interface FeedbackConfig {
236
250
  /**
237
251
  * The type of feedback.
252
+ * - "continuous": Feedback with a continuous numeric.
253
+ * - "categorical": Feedback with a categorical value (classes)
254
+ * - "freeform": Feedback with a freeform text value (notes).
238
255
  */
239
256
  type: "continuous" | "categorical" | "freeform";
240
257
  /**
@@ -246,6 +263,9 @@ export interface FeedbackConfig {
246
263
  */
247
264
  max?: number | null;
248
265
  /**
266
+ * The categories for categorical feedback.
267
+ * Each category can be a string or an object with additional properties.
268
+ *
249
269
  * If feedback is categorical, this defines the valid categories the server will accept.
250
270
  * Not applicable to continuous or freeform feedback types.
251
271
  */
@@ -256,3 +276,4 @@ export interface DatasetDiffInfo {
256
276
  examples_added: string[];
257
277
  examples_removed: string[];
258
278
  }
279
+ export {};