langsmith 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/client.cjs +71 -31
- package/dist/client.d.ts +7 -3
- package/dist/client.js +48 -8
- package/dist/evaluation/_random_name.cjs +730 -0
- package/dist/evaluation/_random_name.d.ts +5 -0
- package/dist/evaluation/_random_name.js +726 -0
- package/dist/evaluation/_runner.cjs +709 -0
- package/dist/evaluation/_runner.d.ts +158 -0
- package/dist/evaluation/_runner.js +705 -0
- package/dist/evaluation/evaluator.cjs +86 -0
- package/dist/evaluation/evaluator.d.ts +31 -27
- package/dist/evaluation/evaluator.js +83 -1
- package/dist/evaluation/index.cjs +3 -1
- package/dist/evaluation/index.d.ts +1 -0
- package/dist/evaluation/index.js +1 -0
- package/dist/index.cjs +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/run_trees.cjs +4 -4
- package/dist/run_trees.d.ts +2 -1
- package/dist/run_trees.js +4 -4
- package/dist/schemas.d.ts +22 -1
- package/dist/traceable.cjs +237 -62
- package/dist/traceable.d.ts +7 -3
- package/dist/traceable.js +235 -61
- package/dist/utils/_git.cjs +72 -0
- package/dist/utils/_git.d.ts +14 -0
- package/dist/utils/_git.js +67 -0
- package/dist/utils/_uuid.cjs +33 -0
- package/dist/utils/_uuid.d.ts +1 -0
- package/dist/utils/_uuid.js +6 -0
- package/dist/utils/async_caller.cjs +17 -9
- package/dist/utils/async_caller.js +17 -9
- package/dist/utils/atee.cjs +24 -0
- package/dist/utils/atee.d.ts +1 -0
- package/dist/utils/atee.js +20 -0
- package/dist/wrappers/openai.cjs +53 -74
- package/dist/wrappers/openai.d.ts +10 -11
- package/dist/wrappers/openai.js +53 -74
- package/package.json +4 -4
|
@@ -1,2 +1,88 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runEvaluator = exports.DynamicRunEvaluator = void 0;
|
|
4
|
+
const uuid_1 = require("uuid");
|
|
5
|
+
const traceable_js_1 = require("../traceable.cjs");
|
|
6
|
+
/**
|
|
7
|
+
* Wraps an evaluator function + implements the RunEvaluator interface.
|
|
8
|
+
*/
|
|
9
|
+
class DynamicRunEvaluator {
|
|
10
|
+
constructor(evaluator) {
|
|
11
|
+
Object.defineProperty(this, "func", {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
configurable: true,
|
|
14
|
+
writable: true,
|
|
15
|
+
value: void 0
|
|
16
|
+
});
|
|
17
|
+
const wrappedFunc = (input) => {
|
|
18
|
+
const runAndExample = input.langSmithRunAndExample;
|
|
19
|
+
return evaluator(...Object.values(runAndExample));
|
|
20
|
+
};
|
|
21
|
+
this.func = wrappedFunc;
|
|
22
|
+
}
|
|
23
|
+
coerceEvaluationResults(results, sourceRunId) {
|
|
24
|
+
if ("results" in results) {
|
|
25
|
+
throw new Error("EvaluationResults not supported yet.");
|
|
26
|
+
}
|
|
27
|
+
return this.coerceEvaluationResult(results, sourceRunId, true);
|
|
28
|
+
}
|
|
29
|
+
coerceEvaluationResult(result, sourceRunId, allowNoKey = false) {
|
|
30
|
+
if ("key" in result) {
|
|
31
|
+
if (!result.sourceRunId) {
|
|
32
|
+
result.sourceRunId = sourceRunId;
|
|
33
|
+
}
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
if (!("key" in result)) {
|
|
37
|
+
if (allowNoKey) {
|
|
38
|
+
result["key"] = this.func.name;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
sourceRunId,
|
|
43
|
+
...result,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Evaluates a run with an optional example and returns the evaluation result.
|
|
48
|
+
* @param run The run to evaluate.
|
|
49
|
+
* @param example The optional example to use for evaluation.
|
|
50
|
+
* @returns A promise that extracts to the evaluation result.
|
|
51
|
+
*/
|
|
52
|
+
async evaluateRun(run, example, options) {
|
|
53
|
+
const sourceRunId = (0, uuid_1.v4)();
|
|
54
|
+
const metadata = {
|
|
55
|
+
targetRunId: run.id,
|
|
56
|
+
};
|
|
57
|
+
if ("session_id" in run) {
|
|
58
|
+
metadata["experiment"] = run.session_id;
|
|
59
|
+
}
|
|
60
|
+
const wrappedTraceableFunc = (0, traceable_js_1.wrapFunctionAndEnsureTraceable)(this.func, options || {}, "evaluator");
|
|
61
|
+
// Pass data via `langSmithRunAndExample` key to avoid conflicts with other
|
|
62
|
+
// inputs. This key is extracted in the wrapped function, with `run` and
|
|
63
|
+
// `example` passed to evaluator function as arguments.
|
|
64
|
+
const langSmithRunAndExample = {
|
|
65
|
+
run,
|
|
66
|
+
example,
|
|
67
|
+
};
|
|
68
|
+
const result = (await wrappedTraceableFunc({ langSmithRunAndExample }, {
|
|
69
|
+
metadata,
|
|
70
|
+
}));
|
|
71
|
+
// Check the one required property of EvaluationResult since 'instanceof' is not possible
|
|
72
|
+
if ("key" in result) {
|
|
73
|
+
if (!result.sourceRunId) {
|
|
74
|
+
result.sourceRunId = sourceRunId;
|
|
75
|
+
}
|
|
76
|
+
return result;
|
|
77
|
+
}
|
|
78
|
+
if (typeof result !== "object") {
|
|
79
|
+
throw new Error("Evaluator function must return an object.");
|
|
80
|
+
}
|
|
81
|
+
return this.coerceEvaluationResults(result, sourceRunId);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
exports.DynamicRunEvaluator = DynamicRunEvaluator;
|
|
85
|
+
function runEvaluator(func) {
|
|
86
|
+
return new DynamicRunEvaluator(func);
|
|
87
|
+
}
|
|
88
|
+
exports.runEvaluator = runEvaluator;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { Example, Run, ScoreType, ValueType } from "../schemas.js";
|
|
1
|
+
import { Example, FeedbackConfig, Run, ScoreType, ValueType } from "../schemas.js";
|
|
2
|
+
import { RunTreeConfig } from "../run_trees.js";
|
|
2
3
|
/**
|
|
3
4
|
* Represents a categorical class.
|
|
4
5
|
*/
|
|
@@ -12,31 +13,6 @@ export type Category = {
|
|
|
12
13
|
*/
|
|
13
14
|
label: string;
|
|
14
15
|
};
|
|
15
|
-
/**
|
|
16
|
-
* Configuration for feedback.
|
|
17
|
-
*/
|
|
18
|
-
export type FeedbackConfig = {
|
|
19
|
-
/**
|
|
20
|
-
* The type of feedback.
|
|
21
|
-
* - "continuous": Feedback with a continuous numeric.
|
|
22
|
-
* - "categorical": Feedback with a categorical value (classes)
|
|
23
|
-
* - "freeform": Feedback with a freeform text value (notes).
|
|
24
|
-
*/
|
|
25
|
-
type: "continuous" | "categorical" | "freeform";
|
|
26
|
-
/**
|
|
27
|
-
* The minimum value for continuous feedback.
|
|
28
|
-
*/
|
|
29
|
-
min?: number;
|
|
30
|
-
/**
|
|
31
|
-
* The maximum value for continuous feedback.
|
|
32
|
-
*/
|
|
33
|
-
max?: number;
|
|
34
|
-
/**
|
|
35
|
-
* The categories for categorical feedback.
|
|
36
|
-
* Each category can be a string or an object with additional properties.
|
|
37
|
-
*/
|
|
38
|
-
categories?: (Category | Record<string, unknown>)[];
|
|
39
|
-
};
|
|
40
16
|
/**
|
|
41
17
|
* Represents the result of an evaluation.
|
|
42
18
|
*/
|
|
@@ -83,6 +59,34 @@ export type EvaluationResult = {
|
|
|
83
59
|
*/
|
|
84
60
|
feedbackConfig?: FeedbackConfig;
|
|
85
61
|
};
|
|
62
|
+
/**
|
|
63
|
+
* Batch evaluation results, if your evaluator wishes
|
|
64
|
+
* to return multiple scores.
|
|
65
|
+
*/
|
|
66
|
+
export type EvaluationResults = {
|
|
67
|
+
/**
|
|
68
|
+
* The evaluation results.
|
|
69
|
+
*/
|
|
70
|
+
results: Array<EvaluationResult>;
|
|
71
|
+
};
|
|
86
72
|
export interface RunEvaluator {
|
|
87
|
-
evaluateRun(run: Run, example?: Example): Promise<EvaluationResult>;
|
|
73
|
+
evaluateRun(run: Run, example?: Example, options?: Partial<RunTreeConfig>): Promise<EvaluationResult>;
|
|
74
|
+
}
|
|
75
|
+
export type RunEvaluatorLike = ((run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResults>) | ((run: Run, example?: Example) => EvaluationResult | EvaluationResults);
|
|
76
|
+
/**
|
|
77
|
+
* Wraps an evaluator function + implements the RunEvaluator interface.
|
|
78
|
+
*/
|
|
79
|
+
export declare class DynamicRunEvaluator<Func extends (...args: any[]) => any> implements RunEvaluator {
|
|
80
|
+
func: Func;
|
|
81
|
+
constructor(evaluator: Func);
|
|
82
|
+
private coerceEvaluationResults;
|
|
83
|
+
private coerceEvaluationResult;
|
|
84
|
+
/**
|
|
85
|
+
* Evaluates a run with an optional example and returns the evaluation result.
|
|
86
|
+
* @param run The run to evaluate.
|
|
87
|
+
* @param example The optional example to use for evaluation.
|
|
88
|
+
* @returns A promise that extracts to the evaluation result.
|
|
89
|
+
*/
|
|
90
|
+
evaluateRun(run: Run, example?: Example, options?: Partial<RunTreeConfig>): Promise<EvaluationResult>;
|
|
88
91
|
}
|
|
92
|
+
export declare function runEvaluator(func: RunEvaluatorLike): RunEvaluator;
|
|
@@ -1 +1,83 @@
|
|
|
1
|
-
|
|
1
|
+
import { v4 as uuidv4 } from "uuid";
|
|
2
|
+
import { wrapFunctionAndEnsureTraceable } from "../traceable.js";
|
|
3
|
+
/**
|
|
4
|
+
* Wraps an evaluator function + implements the RunEvaluator interface.
|
|
5
|
+
*/
|
|
6
|
+
export class DynamicRunEvaluator {
|
|
7
|
+
constructor(evaluator) {
|
|
8
|
+
Object.defineProperty(this, "func", {
|
|
9
|
+
enumerable: true,
|
|
10
|
+
configurable: true,
|
|
11
|
+
writable: true,
|
|
12
|
+
value: void 0
|
|
13
|
+
});
|
|
14
|
+
const wrappedFunc = (input) => {
|
|
15
|
+
const runAndExample = input.langSmithRunAndExample;
|
|
16
|
+
return evaluator(...Object.values(runAndExample));
|
|
17
|
+
};
|
|
18
|
+
this.func = wrappedFunc;
|
|
19
|
+
}
|
|
20
|
+
coerceEvaluationResults(results, sourceRunId) {
|
|
21
|
+
if ("results" in results) {
|
|
22
|
+
throw new Error("EvaluationResults not supported yet.");
|
|
23
|
+
}
|
|
24
|
+
return this.coerceEvaluationResult(results, sourceRunId, true);
|
|
25
|
+
}
|
|
26
|
+
coerceEvaluationResult(result, sourceRunId, allowNoKey = false) {
|
|
27
|
+
if ("key" in result) {
|
|
28
|
+
if (!result.sourceRunId) {
|
|
29
|
+
result.sourceRunId = sourceRunId;
|
|
30
|
+
}
|
|
31
|
+
return result;
|
|
32
|
+
}
|
|
33
|
+
if (!("key" in result)) {
|
|
34
|
+
if (allowNoKey) {
|
|
35
|
+
result["key"] = this.func.name;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return {
|
|
39
|
+
sourceRunId,
|
|
40
|
+
...result,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Evaluates a run with an optional example and returns the evaluation result.
|
|
45
|
+
* @param run The run to evaluate.
|
|
46
|
+
* @param example The optional example to use for evaluation.
|
|
47
|
+
* @returns A promise that extracts to the evaluation result.
|
|
48
|
+
*/
|
|
49
|
+
async evaluateRun(run, example, options) {
|
|
50
|
+
const sourceRunId = uuidv4();
|
|
51
|
+
const metadata = {
|
|
52
|
+
targetRunId: run.id,
|
|
53
|
+
};
|
|
54
|
+
if ("session_id" in run) {
|
|
55
|
+
metadata["experiment"] = run.session_id;
|
|
56
|
+
}
|
|
57
|
+
const wrappedTraceableFunc = wrapFunctionAndEnsureTraceable(this.func, options || {}, "evaluator");
|
|
58
|
+
// Pass data via `langSmithRunAndExample` key to avoid conflicts with other
|
|
59
|
+
// inputs. This key is extracted in the wrapped function, with `run` and
|
|
60
|
+
// `example` passed to evaluator function as arguments.
|
|
61
|
+
const langSmithRunAndExample = {
|
|
62
|
+
run,
|
|
63
|
+
example,
|
|
64
|
+
};
|
|
65
|
+
const result = (await wrappedTraceableFunc({ langSmithRunAndExample }, {
|
|
66
|
+
metadata,
|
|
67
|
+
}));
|
|
68
|
+
// Check the one required property of EvaluationResult since 'instanceof' is not possible
|
|
69
|
+
if ("key" in result) {
|
|
70
|
+
if (!result.sourceRunId) {
|
|
71
|
+
result.sourceRunId = sourceRunId;
|
|
72
|
+
}
|
|
73
|
+
return result;
|
|
74
|
+
}
|
|
75
|
+
if (typeof result !== "object") {
|
|
76
|
+
throw new Error("Evaluator function must return an object.");
|
|
77
|
+
}
|
|
78
|
+
return this.coerceEvaluationResults(result, sourceRunId);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
export function runEvaluator(func) {
|
|
82
|
+
return new DynamicRunEvaluator(func);
|
|
83
|
+
}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.StringEvaluator = void 0;
|
|
3
|
+
exports.evaluate = exports.StringEvaluator = void 0;
|
|
4
4
|
var string_evaluator_js_1 = require("./string_evaluator.cjs");
|
|
5
5
|
Object.defineProperty(exports, "StringEvaluator", { enumerable: true, get: function () { return string_evaluator_js_1.StringEvaluator; } });
|
|
6
|
+
var _runner_js_1 = require("./_runner.cjs");
|
|
7
|
+
Object.defineProperty(exports, "evaluate", { enumerable: true, get: function () { return _runner_js_1.evaluate; } });
|
package/dist/evaluation/index.js
CHANGED
package/dist/index.cjs
CHANGED
|
@@ -6,4 +6,4 @@ Object.defineProperty(exports, "Client", { enumerable: true, get: function () {
|
|
|
6
6
|
var run_trees_js_1 = require("./run_trees.cjs");
|
|
7
7
|
Object.defineProperty(exports, "RunTree", { enumerable: true, get: function () { return run_trees_js_1.RunTree; } });
|
|
8
8
|
// Update using yarn bump-version
|
|
9
|
-
exports.__version__ = "0.1.
|
|
9
|
+
exports.__version__ = "0.1.22";
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
export { Client } from "./client.js";
|
|
2
2
|
export type { Dataset, Example, TracerSession, Run, Feedback, } from "./schemas.js";
|
|
3
3
|
export { RunTree, type RunTreeConfig } from "./run_trees.js";
|
|
4
|
-
export declare const __version__ = "0.1.
|
|
4
|
+
export declare const __version__ = "0.1.22";
|
package/dist/index.js
CHANGED
package/dist/run_trees.cjs
CHANGED
|
@@ -232,7 +232,7 @@ class RunTree {
|
|
|
232
232
|
extra: {},
|
|
233
233
|
};
|
|
234
234
|
}
|
|
235
|
-
|
|
235
|
+
createChild(config) {
|
|
236
236
|
const child = new RunTree({
|
|
237
237
|
...config,
|
|
238
238
|
parent_run: this,
|
|
@@ -243,9 +243,9 @@ class RunTree {
|
|
|
243
243
|
return child;
|
|
244
244
|
}
|
|
245
245
|
async end(outputs, error, endTime = Date.now()) {
|
|
246
|
-
this.outputs = outputs;
|
|
247
|
-
this.error = error;
|
|
248
|
-
this.end_time = endTime;
|
|
246
|
+
this.outputs = this.outputs ?? outputs;
|
|
247
|
+
this.error = this.error ?? error;
|
|
248
|
+
this.end_time = this.end_time ?? endTime;
|
|
249
249
|
}
|
|
250
250
|
async _convertToCreate(run, excludeChildRuns = true) {
|
|
251
251
|
const runExtra = run.extra ?? {};
|
package/dist/run_trees.d.ts
CHANGED
|
@@ -20,6 +20,7 @@ export interface RunTreeConfig {
|
|
|
20
20
|
outputs?: KVMap;
|
|
21
21
|
reference_example_id?: string;
|
|
22
22
|
client?: Client;
|
|
23
|
+
on_end?: (runTree: RunTree) => void;
|
|
23
24
|
}
|
|
24
25
|
export interface RunnableConfigLike {
|
|
25
26
|
/**
|
|
@@ -65,7 +66,7 @@ export declare class RunTree implements BaseRun {
|
|
|
65
66
|
metadata?: KVMap;
|
|
66
67
|
}): RunTree;
|
|
67
68
|
private static getDefaultConfig;
|
|
68
|
-
createChild(config: RunTreeConfig):
|
|
69
|
+
createChild(config: RunTreeConfig): RunTree;
|
|
69
70
|
end(outputs?: KVMap, error?: string, endTime?: number): Promise<void>;
|
|
70
71
|
private _convertToCreate;
|
|
71
72
|
postRun(excludeChildRuns?: boolean): Promise<void>;
|
package/dist/run_trees.js
CHANGED
|
@@ -205,7 +205,7 @@ export class RunTree {
|
|
|
205
205
|
extra: {},
|
|
206
206
|
};
|
|
207
207
|
}
|
|
208
|
-
|
|
208
|
+
createChild(config) {
|
|
209
209
|
const child = new RunTree({
|
|
210
210
|
...config,
|
|
211
211
|
parent_run: this,
|
|
@@ -216,9 +216,9 @@ export class RunTree {
|
|
|
216
216
|
return child;
|
|
217
217
|
}
|
|
218
218
|
async end(outputs, error, endTime = Date.now()) {
|
|
219
|
-
this.outputs = outputs;
|
|
220
|
-
this.error = error;
|
|
221
|
-
this.end_time = endTime;
|
|
219
|
+
this.outputs = this.outputs ?? outputs;
|
|
220
|
+
this.error = this.error ?? error;
|
|
221
|
+
this.end_time = this.end_time ?? endTime;
|
|
222
222
|
}
|
|
223
223
|
async _convertToCreate(run, excludeChildRuns = true) {
|
|
224
224
|
const runExtra = run.extra ?? {};
|
package/dist/schemas.d.ts
CHANGED
|
@@ -5,6 +5,9 @@ export interface TracerSession {
|
|
|
5
5
|
end_time?: number;
|
|
6
6
|
description?: string;
|
|
7
7
|
name?: string;
|
|
8
|
+
/** Extra metadata for the project. */
|
|
9
|
+
extra?: KVMap;
|
|
10
|
+
reference_dataset_id?: string;
|
|
8
11
|
}
|
|
9
12
|
export interface TracerSessionResult extends TracerSession {
|
|
10
13
|
run_count?: number;
|
|
@@ -15,7 +18,6 @@ export interface TracerSessionResult extends TracerSession {
|
|
|
15
18
|
completion_tokens?: number;
|
|
16
19
|
last_run_start_time?: number;
|
|
17
20
|
feedback_stats?: Record<string, unknown>;
|
|
18
|
-
reference_dataset_id?: string;
|
|
19
21
|
run_facets?: KVMap[];
|
|
20
22
|
}
|
|
21
23
|
export type KVMap = Record<string, any>;
|
|
@@ -78,6 +80,14 @@ export interface BaseRun {
|
|
|
78
80
|
*/
|
|
79
81
|
dotted_order?: string;
|
|
80
82
|
}
|
|
83
|
+
type S3URL = {
|
|
84
|
+
ROOT: {
|
|
85
|
+
/** A pre-signed URL */
|
|
86
|
+
presigned_url: string;
|
|
87
|
+
/** The S3 path to the object in storage */
|
|
88
|
+
s3_url: string;
|
|
89
|
+
};
|
|
90
|
+
};
|
|
81
91
|
/**
|
|
82
92
|
* Describes properties of a run when loaded from the database.
|
|
83
93
|
* Extends the BaseRun interface.
|
|
@@ -111,6 +121,10 @@ export interface Run extends BaseRun {
|
|
|
111
121
|
parent_run_ids?: string[];
|
|
112
122
|
/** Whether the run is included in a dataset. */
|
|
113
123
|
in_dataset?: boolean;
|
|
124
|
+
/** The output S3 URLs */
|
|
125
|
+
outputs_s3_urls?: S3URL;
|
|
126
|
+
/** The input S3 URLs */
|
|
127
|
+
inputs_s3_urls?: S3URL;
|
|
114
128
|
}
|
|
115
129
|
export interface RunCreate extends BaseRun {
|
|
116
130
|
revision_id?: string;
|
|
@@ -235,6 +249,9 @@ export interface FeedbackCategory {
|
|
|
235
249
|
export interface FeedbackConfig {
|
|
236
250
|
/**
|
|
237
251
|
* The type of feedback.
|
|
252
|
+
* - "continuous": Feedback with a continuous numeric.
|
|
253
|
+
* - "categorical": Feedback with a categorical value (classes)
|
|
254
|
+
* - "freeform": Feedback with a freeform text value (notes).
|
|
238
255
|
*/
|
|
239
256
|
type: "continuous" | "categorical" | "freeform";
|
|
240
257
|
/**
|
|
@@ -246,6 +263,9 @@ export interface FeedbackConfig {
|
|
|
246
263
|
*/
|
|
247
264
|
max?: number | null;
|
|
248
265
|
/**
|
|
266
|
+
* The categories for categorical feedback.
|
|
267
|
+
* Each category can be a string or an object with additional properties.
|
|
268
|
+
*
|
|
249
269
|
* If feedback is categorical, this defines the valid categories the server will accept.
|
|
250
270
|
* Not applicable to continuous or freeform feedback types.
|
|
251
271
|
*/
|
|
@@ -256,3 +276,4 @@ export interface DatasetDiffInfo {
|
|
|
256
276
|
examples_added: string[];
|
|
257
277
|
examples_removed: string[];
|
|
258
278
|
}
|
|
279
|
+
export {};
|