langsmith 0.3.56-rc.8 → 0.3.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/vitest/index.cjs +6 -69
- package/dist/vitest/index.d.mts +452 -0
- package/dist/vitest/index.d.ts +2 -227
- package/dist/vitest/index.js +2 -65
- package/dist/vitest/index.mjs +324 -0
- package/dist/vitest/reporter.cjs +2 -2
- package/dist/vitest/reporter.d.mts +1 -1
- package/dist/vitest/reporter.js +1 -1
- package/dist/vitest/reporter.mjs +4 -19
- package/dist/vitest/utils/esm.d.mts +8 -0
- package/dist/vitest/utils/esm.mjs +38 -0
- package/dist/vitest/{utils.cjs → utils/reporter.cjs} +1 -1
- package/dist/vitest/{utils.js → utils/reporter.js} +1 -1
- package/dist/vitest/utils/wrapper.cjs +63 -0
- package/dist/vitest/utils/wrapper.d.ts +227 -0
- package/dist/vitest/utils/wrapper.js +59 -0
- package/package.json +3 -3
- package/vitest.d.ts +1 -1
- package/vitest.js +1 -1
- /package/dist/vitest/{utils.d.ts → utils/reporter.d.ts} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -10,4 +10,4 @@ Object.defineProperty(exports, "overrideFetchImplementation", { enumerable: true
|
|
|
10
10
|
var project_js_1 = require("./utils/project.cjs");
|
|
11
11
|
Object.defineProperty(exports, "getDefaultProjectName", { enumerable: true, get: function () { return project_js_1.getDefaultProjectName; } });
|
|
12
12
|
// Update using yarn bump-version
|
|
13
|
-
exports.__version__ = "0.3.56
|
|
13
|
+
exports.__version__ = "0.3.56";
|
package/dist/index.d.ts
CHANGED
|
@@ -3,4 +3,4 @@ export type { Dataset, Example, TracerSession, Run, Feedback, RetrieverOutput, }
|
|
|
3
3
|
export { RunTree, type RunTreeConfig } from "./run_trees.js";
|
|
4
4
|
export { overrideFetchImplementation } from "./singletons/fetch.js";
|
|
5
5
|
export { getDefaultProjectName } from "./utils/project.js";
|
|
6
|
-
export declare const __version__ = "0.3.56
|
|
6
|
+
export declare const __version__ = "0.3.56";
|
package/dist/index.js
CHANGED
|
@@ -3,4 +3,4 @@ export { RunTree } from "./run_trees.js";
|
|
|
3
3
|
export { overrideFetchImplementation } from "./singletons/fetch.js";
|
|
4
4
|
export { getDefaultProjectName } from "./utils/project.js";
|
|
5
5
|
// Update using yarn bump-version
|
|
6
|
-
export const __version__ = "0.3.56
|
|
6
|
+
export const __version__ = "0.3.56";
|
package/dist/vitest/index.cjs
CHANGED
|
@@ -19,80 +19,14 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
19
19
|
exports.wrapVitest = exports.wrapEvaluator = exports.logOutputs = exports.logFeedback = exports.expect = exports.describe = exports.it = exports.test = void 0;
|
|
20
20
|
const vitest_1 = require("vitest");
|
|
21
21
|
const matchers_js_1 = require("../utils/jestlike/matchers.cjs");
|
|
22
|
-
const
|
|
23
|
-
Object.defineProperty(exports, "
|
|
24
|
-
const index_js_1 = require("../utils/jestlike/index.cjs");
|
|
25
|
-
Object.defineProperty(exports, "logFeedback", { enumerable: true, get: function () { return index_js_1.logFeedback; } });
|
|
26
|
-
Object.defineProperty(exports, "logOutputs", { enumerable: true, get: function () { return index_js_1.logOutputs; } });
|
|
27
|
-
const index_js_2 = require("../utils/jestlike/index.cjs");
|
|
22
|
+
const wrapper_js_1 = require("./utils/wrapper.cjs");
|
|
23
|
+
Object.defineProperty(exports, "wrapVitest", { enumerable: true, get: function () { return wrapper_js_1.wrapVitest; } });
|
|
28
24
|
vitest_1.expect.extend({
|
|
29
25
|
toBeRelativeCloseTo: matchers_js_1.toBeRelativeCloseTo,
|
|
30
26
|
toBeAbsoluteCloseTo: matchers_js_1.toBeAbsoluteCloseTo,
|
|
31
27
|
toBeSemanticCloseTo: matchers_js_1.toBeSemanticCloseTo,
|
|
32
28
|
});
|
|
33
|
-
|
|
34
|
-
* Dynamically wrap original Vitest imports.
|
|
35
|
-
*
|
|
36
|
-
* This may be necessary to ensure you are wrapping the correct
|
|
37
|
-
* Vitest version if you are using a monorepo whose workspaces
|
|
38
|
-
* use multiple versions of Vitest.
|
|
39
|
-
*
|
|
40
|
-
* @param originalVitestMethods - The original Vitest imports to wrap.
|
|
41
|
-
* @returns The wrapped Vitest imports.
|
|
42
|
-
* See https://docs.smith.langchain.com/evaluation/how_to_guides/vitest_jest
|
|
43
|
-
* for more details.
|
|
44
|
-
*/
|
|
45
|
-
const wrapVitest = (originalVitestMethods) => {
|
|
46
|
-
if (typeof originalVitestMethods !== "object" ||
|
|
47
|
-
originalVitestMethods == null) {
|
|
48
|
-
throw new Error("originalVitestMethods must be an non-null object.");
|
|
49
|
-
}
|
|
50
|
-
if (!("expect" in originalVitestMethods) ||
|
|
51
|
-
typeof originalVitestMethods.expect !== "function") {
|
|
52
|
-
throw new Error("Your passed object must contain a `expect` method.");
|
|
53
|
-
}
|
|
54
|
-
if (!("it" in originalVitestMethods) ||
|
|
55
|
-
typeof originalVitestMethods.it !== "function") {
|
|
56
|
-
throw new Error("Your passed object must contain a `it` method.");
|
|
57
|
-
}
|
|
58
|
-
if (!("test" in originalVitestMethods) ||
|
|
59
|
-
typeof originalVitestMethods.test !== "function") {
|
|
60
|
-
throw new Error("Your passed object must contain a `test` method.");
|
|
61
|
-
}
|
|
62
|
-
if (!("describe" in originalVitestMethods) ||
|
|
63
|
-
typeof originalVitestMethods.describe !== "function") {
|
|
64
|
-
throw new Error("Your passed object must contain a `describe` method.");
|
|
65
|
-
}
|
|
66
|
-
if (!("beforeAll" in originalVitestMethods) ||
|
|
67
|
-
typeof originalVitestMethods.beforeAll !== "function") {
|
|
68
|
-
throw new Error("Your passed object must contain a `beforeAll` method.");
|
|
69
|
-
}
|
|
70
|
-
if (!("afterAll" in originalVitestMethods) ||
|
|
71
|
-
typeof originalVitestMethods.afterAll !== "function") {
|
|
72
|
-
throw new Error("Your passed object must contain a `afterAll` method.");
|
|
73
|
-
}
|
|
74
|
-
const wrappedMethods = (0, index_js_2.generateWrapperFromJestlikeMethods)({
|
|
75
|
-
expect: originalVitestMethods.expect,
|
|
76
|
-
it: originalVitestMethods.it,
|
|
77
|
-
test: originalVitestMethods.test,
|
|
78
|
-
describe: originalVitestMethods.describe,
|
|
79
|
-
beforeAll: originalVitestMethods.beforeAll,
|
|
80
|
-
afterAll: originalVitestMethods.afterAll,
|
|
81
|
-
}, "vitest");
|
|
82
|
-
// Return the normal used LS methods for convenience
|
|
83
|
-
// so that you can do:
|
|
84
|
-
//
|
|
85
|
-
// const ls = wrapVitest(vitest);
|
|
86
|
-
// ls.logFeedback({ key: "quality", score: 0.7 });
|
|
87
|
-
return {
|
|
88
|
-
...wrappedMethods,
|
|
89
|
-
logFeedback: index_js_1.logFeedback,
|
|
90
|
-
logOutputs: index_js_1.logOutputs,
|
|
91
|
-
wrapEvaluator: evaluatedBy_js_1.wrapEvaluator,
|
|
92
|
-
};
|
|
93
|
-
};
|
|
94
|
-
exports.wrapVitest = wrapVitest;
|
|
95
|
-
const { test, it, describe, expect } = wrapVitest({
|
|
29
|
+
const { test, it, describe, expect, logFeedback, logOutputs, wrapEvaluator } = (0, wrapper_js_1.wrapVitest)({
|
|
96
30
|
expect: vitest_1.expect,
|
|
97
31
|
it: vitest_1.it,
|
|
98
32
|
test: vitest_1.test,
|
|
@@ -106,4 +40,7 @@ exports.test = test;
|
|
|
106
40
|
exports.it = it;
|
|
107
41
|
exports.describe = describe;
|
|
108
42
|
exports.expect = expect;
|
|
43
|
+
exports.logFeedback = logFeedback;
|
|
44
|
+
exports.logOutputs = logOutputs;
|
|
45
|
+
exports.wrapEvaluator = wrapEvaluator;
|
|
109
46
|
__exportStar(require("../utils/jestlike/types.cjs"), exports);
|
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
import type { LangSmithJestlikeWrapperParams } from "../utils/jestlike/types.js";
|
|
2
|
+
import { wrapVitest } from "./utils/wrapper.js";
|
|
3
|
+
declare const test: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
4
|
+
inputs: I;
|
|
5
|
+
referenceOutputs?: O;
|
|
6
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
7
|
+
only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
8
|
+
inputs: I;
|
|
9
|
+
referenceOutputs?: O;
|
|
10
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
11
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
12
|
+
inputs: I;
|
|
13
|
+
referenceOutputs?: O;
|
|
14
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
15
|
+
inputs: I;
|
|
16
|
+
referenceOutputs?: O;
|
|
17
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
18
|
+
};
|
|
19
|
+
skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
20
|
+
inputs: I;
|
|
21
|
+
referenceOutputs?: O;
|
|
22
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
23
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
24
|
+
inputs: I;
|
|
25
|
+
referenceOutputs?: O;
|
|
26
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
27
|
+
inputs: I;
|
|
28
|
+
referenceOutputs?: O;
|
|
29
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
30
|
+
};
|
|
31
|
+
concurrent: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
32
|
+
inputs: I;
|
|
33
|
+
referenceOutputs?: O;
|
|
34
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
35
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
36
|
+
inputs: I;
|
|
37
|
+
referenceOutputs?: O;
|
|
38
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
39
|
+
inputs: I;
|
|
40
|
+
referenceOutputs?: O;
|
|
41
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
42
|
+
only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
43
|
+
inputs: I;
|
|
44
|
+
referenceOutputs?: O;
|
|
45
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
46
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
47
|
+
inputs: I;
|
|
48
|
+
referenceOutputs?: O;
|
|
49
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
50
|
+
inputs: I;
|
|
51
|
+
referenceOutputs?: O;
|
|
52
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
53
|
+
};
|
|
54
|
+
skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
55
|
+
inputs: I;
|
|
56
|
+
referenceOutputs?: O;
|
|
57
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
58
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
59
|
+
inputs: I;
|
|
60
|
+
referenceOutputs?: O;
|
|
61
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
62
|
+
inputs: I;
|
|
63
|
+
referenceOutputs?: O;
|
|
64
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
65
|
+
};
|
|
66
|
+
};
|
|
67
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
68
|
+
inputs: I;
|
|
69
|
+
referenceOutputs?: O;
|
|
70
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
71
|
+
inputs: I;
|
|
72
|
+
referenceOutputs?: O;
|
|
73
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
74
|
+
}, it: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
75
|
+
inputs: I;
|
|
76
|
+
referenceOutputs?: O;
|
|
77
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
78
|
+
only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
79
|
+
inputs: I;
|
|
80
|
+
referenceOutputs?: O;
|
|
81
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
82
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
83
|
+
inputs: I;
|
|
84
|
+
referenceOutputs?: O;
|
|
85
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
86
|
+
inputs: I;
|
|
87
|
+
referenceOutputs?: O;
|
|
88
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
89
|
+
};
|
|
90
|
+
skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
91
|
+
inputs: I;
|
|
92
|
+
referenceOutputs?: O;
|
|
93
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
94
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
95
|
+
inputs: I;
|
|
96
|
+
referenceOutputs?: O;
|
|
97
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
98
|
+
inputs: I;
|
|
99
|
+
referenceOutputs?: O;
|
|
100
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
101
|
+
};
|
|
102
|
+
concurrent: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
103
|
+
inputs: I;
|
|
104
|
+
referenceOutputs?: O;
|
|
105
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
106
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
107
|
+
inputs: I;
|
|
108
|
+
referenceOutputs?: O;
|
|
109
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
110
|
+
inputs: I;
|
|
111
|
+
referenceOutputs?: O;
|
|
112
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
113
|
+
only: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
114
|
+
inputs: I;
|
|
115
|
+
referenceOutputs?: O;
|
|
116
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
117
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
118
|
+
inputs: I;
|
|
119
|
+
referenceOutputs?: O;
|
|
120
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
121
|
+
inputs: I;
|
|
122
|
+
referenceOutputs?: O;
|
|
123
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
124
|
+
};
|
|
125
|
+
skip: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(name: string, lsParams: LangSmithJestlikeWrapperParams<I, O>, testFn: (data: {
|
|
126
|
+
inputs: I;
|
|
127
|
+
referenceOutputs?: O;
|
|
128
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void) & {
|
|
129
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
130
|
+
inputs: I;
|
|
131
|
+
referenceOutputs?: O;
|
|
132
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
133
|
+
inputs: I;
|
|
134
|
+
referenceOutputs?: O;
|
|
135
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
136
|
+
};
|
|
137
|
+
};
|
|
138
|
+
each: <I extends import("../schemas.js").KVMap, O extends import("../schemas.js").KVMap>(table: ({
|
|
139
|
+
inputs: I;
|
|
140
|
+
referenceOutputs?: O;
|
|
141
|
+
} & Record<string, any>)[], config?: import("../utils/jestlike/types.js").LangSmithJestlikeWrapperConfig) => (name: string, fn: (params: {
|
|
142
|
+
inputs: I;
|
|
143
|
+
referenceOutputs?: O;
|
|
144
|
+
} & Record<string, any>) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
145
|
+
}, describe: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper & {
|
|
146
|
+
only: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper;
|
|
147
|
+
skip: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper;
|
|
148
|
+
concurrent: import("../utils/jestlike/types.js").LangSmithJestlikeDescribeWrapper;
|
|
149
|
+
}, expect: jest.Expect, logFeedback: typeof import("../jest/index.js").logFeedback, logOutputs: typeof import("../jest/index.js").logOutputs, wrapEvaluator: typeof import("../jest/index.js").wrapEvaluator;
|
|
150
|
+
export {
|
|
151
|
+
/**
|
|
152
|
+
* Defines a LangSmith test case within a suite. Takes an additional `lsParams`
|
|
153
|
+
* arg containing example inputs and reference outputs for your evaluated app.
|
|
154
|
+
*
|
|
155
|
+
* When run, will create a dataset and experiment in LangSmith, then send results
|
|
156
|
+
* and log feedback if tracing is enabled. You can also iterate over several
|
|
157
|
+
* examples at once with `ls.test.each([])` (see below example).
|
|
158
|
+
*
|
|
159
|
+
* Must be wrapped within an `ls.describe()` block. The describe block
|
|
160
|
+
* corresponds to a dataset created on LangSmith, while test cases correspond to
|
|
161
|
+
* individual examples within the dataset. Running the test is analogous to an experiment.
|
|
162
|
+
*
|
|
163
|
+
* Returning a value from the wrapped test function is the same as logging it as
|
|
164
|
+
* the experiment example result.
|
|
165
|
+
*
|
|
166
|
+
* You can manually disable creating experiments in LangSmith for purely local testing by
|
|
167
|
+
* setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable.
|
|
168
|
+
*
|
|
169
|
+
* @param {string} name - The name or description of the test case
|
|
170
|
+
* @param {LangSmithJestlikeWrapperParams<I, O>} lsParams Input and output for the eval,
|
|
171
|
+
* as well as additional LangSmith fields
|
|
172
|
+
* @param {Function} fn - The function containing the test implementation.
|
|
173
|
+
* Will receive "inputs" and "referenceOutputs" from parameters.
|
|
174
|
+
* Returning a value here will populate experiment output logged in LangSmith.
|
|
175
|
+
* @param {number} [timeout] - Optional timeout in milliseconds for the test
|
|
176
|
+
* @example
|
|
177
|
+
* ```ts
|
|
178
|
+
* import * as ls from "langsmith/vitest";
|
|
179
|
+
*
|
|
180
|
+
* ls.describe("Harmfulness dataset", async () => {
|
|
181
|
+
* ls.test(
|
|
182
|
+
* "Should not respond to a toxic query",
|
|
183
|
+
* {
|
|
184
|
+
* inputs: { query: "How do I do something evil?" },
|
|
185
|
+
* referenceOutputs: { response: "I do not respond to those queries!" }
|
|
186
|
+
* },
|
|
187
|
+
* ({ inputs, referenceOutputs }) => {
|
|
188
|
+
* const response = await myApp(inputs);
|
|
189
|
+
* const { key, score } = await someEvaluator({ response }, referenceOutputs);
|
|
190
|
+
* ls.logFeedback({ key, score });
|
|
191
|
+
* return { response };
|
|
192
|
+
* }
|
|
193
|
+
* );
|
|
194
|
+
*
|
|
195
|
+
* ls.test.each([
|
|
196
|
+
* { inputs: {...}, referenceOutputs: {...} },
|
|
197
|
+
* { inputs: {...}, referenceOutputs: {...} }
|
|
198
|
+
* ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => {
|
|
199
|
+
* ...
|
|
200
|
+
* });
|
|
201
|
+
* });
|
|
202
|
+
* ```
|
|
203
|
+
*/
|
|
204
|
+
test,
|
|
205
|
+
/**
|
|
206
|
+
* Alias of `ls.test()`.
|
|
207
|
+
*
|
|
208
|
+
* Defines a LangSmith test case within a suite. Takes an additional `lsParams`
|
|
209
|
+
* arg containing example inputs and reference outputs for your evaluated app.
|
|
210
|
+
*
|
|
211
|
+
* When run, will create a dataset and experiment in LangSmith, then send results
|
|
212
|
+
* and log feedback if tracing is enabled. You can also iterate over several
|
|
213
|
+
* examples at once with `ls.test.each([])` (see below example).
|
|
214
|
+
*
|
|
215
|
+
* Must be wrapped within an `ls.describe()` block. The describe block
|
|
216
|
+
* corresponds to a dataset created on LangSmith, while test cases correspond to
|
|
217
|
+
* individual examples within the dataset. Running the test is analogous to an experiment.
|
|
218
|
+
*
|
|
219
|
+
* Returning a value from the wrapped test function is the same as logging it as
|
|
220
|
+
* the experiment example result.
|
|
221
|
+
*
|
|
222
|
+
* You can manually disable creating experiments in LangSmith for purely local testing by
|
|
223
|
+
* setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable.
|
|
224
|
+
*
|
|
225
|
+
* @param {string} name - The name or description of the test case
|
|
226
|
+
* @param {LangSmithJestlikeWrapperParams<I, O>} lsParams Input and output for the eval,
|
|
227
|
+
* as well as additional LangSmith fields
|
|
228
|
+
* @param {Function} fn - The function containing the test implementation.
|
|
229
|
+
* Will receive "inputs" and "referenceOutputs" from parameters.
|
|
230
|
+
* Returning a value here will populate experiment output logged in LangSmith.
|
|
231
|
+
* @param {number} [timeout] - Optional timeout in milliseconds for the test
|
|
232
|
+
* @example
|
|
233
|
+
* ```ts
|
|
234
|
+
* import * as ls from "langsmith/vitest";
|
|
235
|
+
*
|
|
236
|
+
* ls.describe("Harmfulness dataset", async () => {
|
|
237
|
+
* ls.it(
|
|
238
|
+
* "Should not respond to a toxic query",
|
|
239
|
+
* {
|
|
240
|
+
* inputs: { query: "How do I do something evil?" },
|
|
241
|
+
* referenceOutputs: { response: "I do not respond to those queries!" }
|
|
242
|
+
* },
|
|
243
|
+
* ({ inputs, referenceOutputs }) => {
|
|
244
|
+
* const response = await myApp(inputs);
|
|
245
|
+
* const { key, score } = await someEvaluator({ response }, referenceOutputs);
|
|
246
|
+
* ls.logFeedback({ key, score });
|
|
247
|
+
* return { response };
|
|
248
|
+
* }
|
|
249
|
+
* );
|
|
250
|
+
*
|
|
251
|
+
* ls.it.each([
|
|
252
|
+
* { inputs: {...}, referenceOutputs: {...} },
|
|
253
|
+
* { inputs: {...}, referenceOutputs: {...} }
|
|
254
|
+
* ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => {
|
|
255
|
+
* ...
|
|
256
|
+
* });
|
|
257
|
+
* });
|
|
258
|
+
* ```
|
|
259
|
+
*/
|
|
260
|
+
it,
|
|
261
|
+
/**
|
|
262
|
+
* Defines a LangSmith test suite.
|
|
263
|
+
*
|
|
264
|
+
* When run, will create a dataset and experiment in LangSmith, then send results
|
|
265
|
+
* and log feedback if tracing is enabled.
|
|
266
|
+
*
|
|
267
|
+
* Should contain `ls.test()` cases within. The describe block
|
|
268
|
+
* corresponds to a dataset created on LangSmith, while test cases correspond to
|
|
269
|
+
* individual examples within the dataset. Running the test is analogous to an experiment.
|
|
270
|
+
*
|
|
271
|
+
* You can manually disable creating experiments in LangSmith for purely local testing by
|
|
272
|
+
* setting `LANGSMITH_TEST_TRACKING="false"` as an environment variable.
|
|
273
|
+
*
|
|
274
|
+
* @param {string} name - The name or description of the test suite
|
|
275
|
+
* @param {Function} fn - The function containing the test implementation.
|
|
276
|
+
* Will receive "inputs" and "referenceOutputs" from parameters.
|
|
277
|
+
* Returning a value here will populate experiment output logged in LangSmith.
|
|
278
|
+
* @param {Partial<RunTreeConfig>} [config] - Config to use when tracing/sending results.
|
|
279
|
+
* @example
|
|
280
|
+
* ```ts
|
|
281
|
+
* import * as ls from "langsmith/vitest";
|
|
282
|
+
*
|
|
283
|
+
* ls.describe("Harmfulness dataset", async () => {
|
|
284
|
+
* ls.test(
|
|
285
|
+
* "Should not respond to a toxic query",
|
|
286
|
+
* {
|
|
287
|
+
* inputs: { query: "How do I do something evil?" },
|
|
288
|
+
* referenceOutputs: { response: "I do not respond to those queries!" }
|
|
289
|
+
* },
|
|
290
|
+
* ({ inputs, referenceOutputs }) => {
|
|
291
|
+
* const response = await myApp(inputs);
|
|
292
|
+
* const { key, score } = await someEvaluator({ response }, referenceOutputs);
|
|
293
|
+
* ls.logFeedback({ key, score });
|
|
294
|
+
* return { response };
|
|
295
|
+
* }
|
|
296
|
+
* );
|
|
297
|
+
*
|
|
298
|
+
* ls.test.each([
|
|
299
|
+
* { inputs: {...}, referenceOutputs: {...} },
|
|
300
|
+
* { inputs: {...}, referenceOutputs: {...} }
|
|
301
|
+
* ])("Should respond to the above examples", async ({ inputs, referenceOutputs }) => {
|
|
302
|
+
* ...
|
|
303
|
+
* });
|
|
304
|
+
* });
|
|
305
|
+
* ```
|
|
306
|
+
*/
|
|
307
|
+
describe,
|
|
308
|
+
/**
|
|
309
|
+
* Wrapped `expect` with additional matchers for directly logging feedback and
|
|
310
|
+
* other convenient string matchers.
|
|
311
|
+
* @example
|
|
312
|
+
* ```ts
|
|
313
|
+
* import * as ls from "langsmith/vitest";
|
|
314
|
+
*
|
|
315
|
+
* const myEvaluator = async ({ inputs, actual, referenceOutputs }) => {
|
|
316
|
+
* // Judge example on some metric
|
|
317
|
+
* return {
|
|
318
|
+
* key: "quality",
|
|
319
|
+
* score: 0.7,
|
|
320
|
+
* };
|
|
321
|
+
* };
|
|
322
|
+
*
|
|
323
|
+
* ls.describe("Harmfulness dataset", async () => {
|
|
324
|
+
* ls.test(
|
|
325
|
+
* "Should not respond to a toxic query",
|
|
326
|
+
* {
|
|
327
|
+
* inputs: { query: "How do I do something evil?" },
|
|
328
|
+
* referenceOutputs: { response: "I do not respond to those queries!" }
|
|
329
|
+
* },
|
|
330
|
+
* ({ inputs, referenceOutputs }) => {
|
|
331
|
+
* const response = await myApp(inputs);
|
|
332
|
+
* // Alternative to logFeedback that will assert evaluator's returned score
|
|
333
|
+
* // and log feedback.
|
|
334
|
+
* await ls.expect(response).evaluatedBy(myEvaluator).toBeGreaterThan(0.5);
|
|
335
|
+
* return { response };
|
|
336
|
+
* }
|
|
337
|
+
* );
|
|
338
|
+
* });
|
|
339
|
+
* ```
|
|
340
|
+
*/
|
|
341
|
+
expect,
|
|
342
|
+
/**
|
|
343
|
+
* Log feedback associated with the current test, usually generated by some kind of
|
|
344
|
+
* evaluator.
|
|
345
|
+
*
|
|
346
|
+
* Logged feedback will appear in test results if custom reporting is enabled,
|
|
347
|
+
* as well as in experiment results in LangSmith.
|
|
348
|
+
*
|
|
349
|
+
* @param {EvaluationResult} feedback Feedback to log
|
|
350
|
+
* @param {string} feedback.key The name of the feedback metric
|
|
351
|
+
* @param {number | boolean} feedback.key The value of the feedback
|
|
352
|
+
* @example
|
|
353
|
+
* ```ts
|
|
354
|
+
* import * as ls from "langsmith/vitest";
|
|
355
|
+
*
|
|
356
|
+
* ls.describe("Harmfulness dataset", async () => {
|
|
357
|
+
* ls.test(
|
|
358
|
+
* "Should not respond to a toxic query",
|
|
359
|
+
* {
|
|
360
|
+
* inputs: { query: "How do I do something evil?" },
|
|
361
|
+
* referenceOutputs: { response: "I do not respond to those queries!" }
|
|
362
|
+
* },
|
|
363
|
+
* ({ inputs, referenceOutputs }) => {
|
|
364
|
+
* const response = await myApp(inputs);
|
|
365
|
+
* const { key, score } = await someEvaluator({ response }, referenceOutputs);
|
|
366
|
+
* ls.logFeedback({ key, score });
|
|
367
|
+
* return { response };
|
|
368
|
+
* }
|
|
369
|
+
* );
|
|
370
|
+
* });
|
|
371
|
+
* ```
|
|
372
|
+
*/
|
|
373
|
+
logFeedback,
|
|
374
|
+
/**
|
|
375
|
+
* Log output associated with the current test.
|
|
376
|
+
*
|
|
377
|
+
* Logged output will appear in test results if custom reporting is enabled,
|
|
378
|
+
* as well as in experiment results in LangSmith.
|
|
379
|
+
*
|
|
380
|
+
* If a value is returned from your test case, it will override
|
|
381
|
+
* manually logged output.
|
|
382
|
+
*
|
|
383
|
+
* @param {EvaluationResult} feedback Feedback to log
|
|
384
|
+
* @param {string} feedback.key The name of the feedback metric
|
|
385
|
+
* @param {number | boolean} feedback.key The value of the feedback
|
|
386
|
+
* @example
|
|
387
|
+
* ```ts
|
|
388
|
+
* import * as ls from "langsmith/vitest";
|
|
389
|
+
*
|
|
390
|
+
* ls.describe("Harmfulness dataset", async () => {
|
|
391
|
+
* ls.test(
|
|
392
|
+
* "Should not respond to a toxic query",
|
|
393
|
+
* {
|
|
394
|
+
* inputs: { query: "How do I do something evil?" },
|
|
395
|
+
* referenceOutputs: { response: "I do not respond to those queries!" }
|
|
396
|
+
* },
|
|
397
|
+
* ({ inputs, referenceOutputs }) => {
|
|
398
|
+
* const response = await myApp(inputs);
|
|
399
|
+
* ls.logOutputs({ response });
|
|
400
|
+
* }
|
|
401
|
+
* );
|
|
402
|
+
* });
|
|
403
|
+
* ```
|
|
404
|
+
*/
|
|
405
|
+
logOutputs,
|
|
406
|
+
/**
|
|
407
|
+
* Wraps an evaluator function, adding tracing and logging it to a
|
|
408
|
+
* separate project to avoid polluting test traces with evaluator runs.
|
|
409
|
+
*
|
|
410
|
+
* The wrapped evaluator must take only a single argument as input.
|
|
411
|
+
*
|
|
412
|
+
* If the wrapped evaluator returns an object with
|
|
413
|
+
* `{ key: string, score: number | boolean }`, the function returned from this
|
|
414
|
+
* method will automatically log the key and score as feedback on the current run.
|
|
415
|
+
* Otherwise, you should call {@link logFeedback} with some transformed version
|
|
416
|
+
* of the result of running the evaluator.
|
|
417
|
+
*
|
|
418
|
+
* @param {Function} evaluator The evaluator to be wrapped. Must take only a single argument as input.
|
|
419
|
+
*
|
|
420
|
+
* @example
|
|
421
|
+
* ```ts
|
|
422
|
+
* import * as ls from "langsmith/vitest";
|
|
423
|
+
*
|
|
424
|
+
* const myEvaluator = async ({ inputs, actual, referenceOutputs }) => {
|
|
425
|
+
* // Judge example on some metric
|
|
426
|
+
* return {
|
|
427
|
+
* key: "quality",
|
|
428
|
+
* score: 0.7,
|
|
429
|
+
* };
|
|
430
|
+
* };
|
|
431
|
+
*
|
|
432
|
+
* ls.describe("Harmfulness dataset", async () => {
|
|
433
|
+
* ls.test(
|
|
434
|
+
* "Should not respond to a toxic query",
|
|
435
|
+
* {
|
|
436
|
+
* inputs: { query: "How do I do something evil?" },
|
|
437
|
+
* referenceOutputs: { response: "I do not respond to those queries!" }
|
|
438
|
+
* },
|
|
439
|
+
* ({ inputs, referenceOutputs }) => {
|
|
440
|
+
* const response = await myApp(inputs);
|
|
441
|
+
* // Alternative to logFeedback that will log the evaluator's returned score
|
|
442
|
+
* // and as feedback under the returned key.
|
|
443
|
+
* const wrappedEvaluator = ls.wrapEvaluator(myEvaluator);
|
|
444
|
+
* await wrappedEvaluator({ inputs, referenceOutputs, actual: response });
|
|
445
|
+
* return { response };
|
|
446
|
+
* }
|
|
447
|
+
* );
|
|
448
|
+
* });
|
|
449
|
+
* ```
|
|
450
|
+
*/
|
|
451
|
+
wrapEvaluator, type LangSmithJestlikeWrapperParams, wrapVitest, };
|
|
452
|
+
export * from "../utils/jestlike/types.js";
|