langsmith 0.2.15-beta.0 → 0.2.15-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/jest/globals.cjs +10 -0
- package/dist/jest/globals.d.ts +18 -0
- package/dist/jest/globals.js +6 -0
- package/dist/jest/index.cjs +267 -0
- package/dist/jest/index.d.ts +61 -0
- package/dist/jest/index.js +258 -0
- package/dist/jest/matchers.cjs +101 -0
- package/dist/jest/matchers.d.ts +25 -0
- package/dist/jest/matchers.js +95 -0
- package/dist/jest/vendor/chain.cjs +92 -0
- package/dist/jest/vendor/chain.d.ts +1 -0
- package/dist/jest/vendor/chain.js +88 -0
- package/dist/jest/vendor/gradedBy.cjs +40 -0
- package/dist/jest/vendor/gradedBy.d.ts +7 -0
- package/dist/jest/vendor/gradedBy.js +36 -0
- package/dist/run_trees.d.ts +1 -1
- package/dist/singletons/traceable.cjs +1 -1
- package/dist/singletons/traceable.js +1 -1
- package/jest.cjs +1 -0
- package/jest.d.cts +1 -0
- package/jest.d.ts +1 -0
- package/jest.js +1 -0
- package/package.json +14 -1
package/dist/index.cjs
CHANGED
|
@@ -8,4 +8,4 @@ Object.defineProperty(exports, "RunTree", { enumerable: true, get: function () {
|
|
|
8
8
|
var fetch_js_1 = require("./singletons/fetch.cjs");
|
|
9
9
|
Object.defineProperty(exports, "overrideFetchImplementation", { enumerable: true, get: function () { return fetch_js_1.overrideFetchImplementation; } });
|
|
10
10
|
// Update using yarn bump-version
|
|
11
|
-
exports.__version__ = "0.2.15-
|
|
11
|
+
exports.__version__ = "0.2.15-rc.0";
|
package/dist/index.d.ts
CHANGED
|
@@ -2,4 +2,4 @@ export { Client, type ClientConfig, type LangSmithTracingClientInterface, } from
|
|
|
2
2
|
export type { Dataset, Example, TracerSession, Run, Feedback, RetrieverOutput, } from "./schemas.js";
|
|
3
3
|
export { RunTree, type RunTreeConfig } from "./run_trees.js";
|
|
4
4
|
export { overrideFetchImplementation } from "./singletons/fetch.js";
|
|
5
|
-
export declare const __version__ = "0.2.15-
|
|
5
|
+
export declare const __version__ = "0.2.15-rc.0";
|
package/dist/index.js
CHANGED
|
@@ -2,4 +2,4 @@ export { Client, } from "./client.js";
|
|
|
2
2
|
export { RunTree } from "./run_trees.js";
|
|
3
3
|
export { overrideFetchImplementation } from "./singletons/fetch.js";
|
|
4
4
|
// Update using yarn bump-version
|
|
5
|
-
export const __version__ = "0.2.15-
|
|
5
|
+
export const __version__ = "0.2.15-rc.0";
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.trackingEnabled = exports.jestAsyncLocalStorageInstance = void 0;
|
|
4
|
+
const node_async_hooks_1 = require("node:async_hooks");
|
|
5
|
+
const env_js_1 = require("../utils/env.cjs");
|
|
6
|
+
exports.jestAsyncLocalStorageInstance = new node_async_hooks_1.AsyncLocalStorage();
|
|
7
|
+
function trackingEnabled() {
|
|
8
|
+
return (0, env_js_1.getEnvironmentVariable)("LANGSMITH_TEST_TRACKING") === "true";
|
|
9
|
+
}
|
|
10
|
+
exports.trackingEnabled = trackingEnabled;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
3
|
+
import { Dataset, TracerSession, Example } from "../schemas.js";
|
|
4
|
+
import { Client } from "../client.js";
|
|
5
|
+
export declare const jestAsyncLocalStorageInstance: AsyncLocalStorage<{
|
|
6
|
+
dataset?: Dataset | undefined;
|
|
7
|
+
examples?: (Example & {
|
|
8
|
+
inputHash: string;
|
|
9
|
+
outputHash: string;
|
|
10
|
+
})[] | undefined;
|
|
11
|
+
createdAt: string;
|
|
12
|
+
project?: TracerSession | undefined;
|
|
13
|
+
currentExample?: Partial<Example> | undefined;
|
|
14
|
+
client: Client;
|
|
15
|
+
suiteUuid: string;
|
|
16
|
+
suiteName: string;
|
|
17
|
+
}>;
|
|
18
|
+
export declare function trackingEnabled(): boolean;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
2
|
+
import { getEnvironmentVariable } from "../utils/env.js";
|
|
3
|
+
export const jestAsyncLocalStorageInstance = new AsyncLocalStorage();
|
|
4
|
+
export function trackingEnabled() {
|
|
5
|
+
return getEnvironmentVariable("LANGSMITH_TEST_TRACKING") === "true";
|
|
6
|
+
}
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/* eslint-disable import/no-extraneous-dependencies */
|
|
3
|
+
/* eslint-disable @typescript-eslint/no-namespace */
|
|
4
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
5
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
6
|
+
};
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.expect = exports.describe = exports.it = exports.test = void 0;
|
|
9
|
+
const globals_1 = require("@jest/globals");
|
|
10
|
+
const crypto_1 = __importDefault(require("crypto"));
|
|
11
|
+
const uuid_1 = require("uuid");
|
|
12
|
+
const traceable_js_1 = require("../traceable.cjs");
|
|
13
|
+
const run_trees_js_1 = require("../run_trees.cjs");
|
|
14
|
+
const _random_name_js_1 = require("../evaluation/_random_name.cjs");
|
|
15
|
+
const matchers_js_1 = require("./matchers.cjs");
|
|
16
|
+
const globals_js_1 = require("./globals.cjs");
|
|
17
|
+
const chain_js_1 = require("./vendor/chain.cjs");
|
|
18
|
+
globals_1.expect.extend({
|
|
19
|
+
toBeRelativeCloseTo: matchers_js_1.toBeRelativeCloseTo,
|
|
20
|
+
toBeAbsoluteCloseTo: matchers_js_1.toBeAbsoluteCloseTo,
|
|
21
|
+
toBeSemanticCloseTo: matchers_js_1.toBeSemanticCloseTo,
|
|
22
|
+
});
|
|
23
|
+
const objectHash = (obj, depth = 0) => {
|
|
24
|
+
// Prevent infinite recursion
|
|
25
|
+
if (depth > 50) {
|
|
26
|
+
throw new Error("Object is too deep to check equality for serialization. Please use a simpler example.");
|
|
27
|
+
}
|
|
28
|
+
if (Array.isArray(obj)) {
|
|
29
|
+
const arrayHash = obj.map((item) => objectHash(item, depth + 1)).join(",");
|
|
30
|
+
return crypto_1.default.createHash("sha256").update(arrayHash).digest("hex");
|
|
31
|
+
}
|
|
32
|
+
if (obj && typeof obj === "object") {
|
|
33
|
+
const sortedHash = Object.keys(obj)
|
|
34
|
+
.sort()
|
|
35
|
+
.map((key) => `${key}:${objectHash(obj[key], depth + 1)}`)
|
|
36
|
+
.join(",");
|
|
37
|
+
return crypto_1.default.createHash("sha256").update(sortedHash).digest("hex");
|
|
38
|
+
}
|
|
39
|
+
return crypto_1.default.createHash("sha256").update(JSON.stringify(obj)).digest("hex");
|
|
40
|
+
};
|
|
41
|
+
async function _createProject(client, datasetId) {
|
|
42
|
+
// Create the project, updating the experimentName until we find a unique one.
|
|
43
|
+
let project;
|
|
44
|
+
let experimentName = (0, _random_name_js_1.randomName)();
|
|
45
|
+
for (let i = 0; i < 10; i++) {
|
|
46
|
+
try {
|
|
47
|
+
project = await client.createProject({
|
|
48
|
+
projectName: experimentName,
|
|
49
|
+
referenceDatasetId: datasetId,
|
|
50
|
+
// description: this._description,
|
|
51
|
+
});
|
|
52
|
+
return project;
|
|
53
|
+
}
|
|
54
|
+
catch (e) {
|
|
55
|
+
// Naming collision
|
|
56
|
+
if (e?.name === "LangSmithConflictError") {
|
|
57
|
+
const ent = (0, uuid_1.v4)().slice(0, 6);
|
|
58
|
+
experimentName = `${experimentName}-${ent}`;
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
throw e;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
throw new Error("Could not generate a unique experiment name within 10 attempts." +
|
|
66
|
+
" Please try again.");
|
|
67
|
+
}
|
|
68
|
+
const setupPromises = new Map();
|
|
69
|
+
async function runDatasetSetup(testClient, datasetName) {
|
|
70
|
+
let storageValue;
|
|
71
|
+
if (!(0, globals_js_1.trackingEnabled)()) {
|
|
72
|
+
storageValue = {
|
|
73
|
+
createdAt: new Date().toISOString(),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
let dataset;
|
|
78
|
+
try {
|
|
79
|
+
dataset = await testClient.readDataset({
|
|
80
|
+
datasetName,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
catch (e) {
|
|
84
|
+
if (e.message.includes("not found")) {
|
|
85
|
+
dataset = await testClient.createDataset(datasetName, {
|
|
86
|
+
description: `Dataset for unit tests created on ${new Date().toISOString()}`,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
else {
|
|
90
|
+
throw e;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
const examplesList = testClient.listExamples({
|
|
94
|
+
datasetName,
|
|
95
|
+
});
|
|
96
|
+
const examples = [];
|
|
97
|
+
for await (const example of examplesList) {
|
|
98
|
+
const inputHash = objectHash(example.inputs);
|
|
99
|
+
const outputHash = objectHash(example.outputs ?? {});
|
|
100
|
+
examples.push({ ...example, inputHash, outputHash });
|
|
101
|
+
}
|
|
102
|
+
const project = await _createProject(testClient, dataset.id);
|
|
103
|
+
storageValue = {
|
|
104
|
+
dataset,
|
|
105
|
+
examples,
|
|
106
|
+
project,
|
|
107
|
+
client: testClient,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
return storageValue;
|
|
111
|
+
}
|
|
112
|
+
function wrapDescribeMethod(method) {
|
|
113
|
+
return function (datasetName, fn, config) {
|
|
114
|
+
return method(datasetName, () => {
|
|
115
|
+
const suiteUuid = (0, uuid_1.v4)();
|
|
116
|
+
/**
|
|
117
|
+
* We cannot rely on setting AsyncLocalStorage in beforeAll or beforeEach,
|
|
118
|
+
* due to https://github.com/jestjs/jest/issues/13653 and needing to use
|
|
119
|
+
* the janky .enterWith.
|
|
120
|
+
*
|
|
121
|
+
* We also cannot do async setup in describe due to Jest restrictions.
|
|
122
|
+
* However, .run without asynchronous logic works.
|
|
123
|
+
*/
|
|
124
|
+
void globals_js_1.jestAsyncLocalStorageInstance.run({
|
|
125
|
+
suiteUuid,
|
|
126
|
+
suiteName: datasetName,
|
|
127
|
+
client: config?.client ?? run_trees_js_1.RunTree.getSharedClient(),
|
|
128
|
+
createdAt: new Date().toISOString(),
|
|
129
|
+
}, fn);
|
|
130
|
+
});
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
const lsDescribe = Object.assign(wrapDescribeMethod(globals_1.describe), {
|
|
134
|
+
only: wrapDescribeMethod(globals_1.describe.only),
|
|
135
|
+
skip: wrapDescribeMethod(globals_1.describe.skip),
|
|
136
|
+
});
|
|
137
|
+
exports.describe = lsDescribe;
|
|
138
|
+
function wrapTestMethod(method) {
|
|
139
|
+
return function (params, config) {
|
|
140
|
+
// Due to https://github.com/jestjs/jest/issues/13653,
|
|
141
|
+
// we must access the local store value here before
|
|
142
|
+
// entering an async context
|
|
143
|
+
const context = globals_js_1.jestAsyncLocalStorageInstance.getStore();
|
|
144
|
+
// This typing is wrong, but necessary to avoid lint errors
|
|
145
|
+
// eslint-disable-next-line @typescript-eslint/no-misused-promises
|
|
146
|
+
return async function (...args) {
|
|
147
|
+
let createExamplePromise;
|
|
148
|
+
const totalRuns = config?.n ?? 1;
|
|
149
|
+
for (let i = 0; i < totalRuns; i += 1) {
|
|
150
|
+
// Jest will not group under the same "describe" group if you await the test and
|
|
151
|
+
// total runs is greater than 1
|
|
152
|
+
void method(`${args[0]} ${i}`, async () => {
|
|
153
|
+
if (context === undefined) {
|
|
154
|
+
throw new Error(`Could not retrieve test context.\nPlease make sure you have tracing enabled and you are wrapping all of your test cases in an "ls.describe()" function.`);
|
|
155
|
+
}
|
|
156
|
+
// Because of https://github.com/jestjs/jest/issues/13653, we have to do asynchronous setup
|
|
157
|
+
// within the test itself
|
|
158
|
+
if (!setupPromises.get(context.suiteUuid)) {
|
|
159
|
+
setupPromises.set(context.suiteUuid, runDatasetSetup(context.client, context.suiteName));
|
|
160
|
+
}
|
|
161
|
+
const { examples, dataset, createdAt, project, client } = await setupPromises.get(context.suiteUuid);
|
|
162
|
+
const testInput = params.inputs;
|
|
163
|
+
const testOutput = params.outputs;
|
|
164
|
+
const inputHash = objectHash(testInput);
|
|
165
|
+
const outputHash = objectHash(testOutput ?? {});
|
|
166
|
+
if ((0, globals_js_1.trackingEnabled)()) {
|
|
167
|
+
const missingFields = [];
|
|
168
|
+
if (examples === undefined) {
|
|
169
|
+
missingFields.push("examples");
|
|
170
|
+
}
|
|
171
|
+
if (dataset === undefined) {
|
|
172
|
+
missingFields.push("dataset");
|
|
173
|
+
}
|
|
174
|
+
if (project === undefined) {
|
|
175
|
+
missingFields.push("project");
|
|
176
|
+
}
|
|
177
|
+
if (client === undefined) {
|
|
178
|
+
missingFields.push("client");
|
|
179
|
+
}
|
|
180
|
+
if (missingFields.length > 0) {
|
|
181
|
+
throw new Error(`Failed to initialize test tracking: Could not identify ${missingFields
|
|
182
|
+
.map((field) => `"${field}"`)
|
|
183
|
+
.join(", ")} while syncing to LangSmith. Please contact us for help.`);
|
|
184
|
+
}
|
|
185
|
+
const testClient = config?.client ?? client;
|
|
186
|
+
let example = (examples ?? []).find((example) => {
|
|
187
|
+
return (example.inputHash === inputHash &&
|
|
188
|
+
example.outputHash === outputHash);
|
|
189
|
+
});
|
|
190
|
+
if (example === undefined) {
|
|
191
|
+
// Avoid creating multiple of the same example
|
|
192
|
+
// when running the same test case multiple times
|
|
193
|
+
// Jest runs other tests serially
|
|
194
|
+
if (createExamplePromise === undefined) {
|
|
195
|
+
createExamplePromise = testClient.createExample(testInput, testOutput, {
|
|
196
|
+
datasetId: dataset?.id,
|
|
197
|
+
createdAt: new Date(createdAt ?? new Date()),
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
const newExample = await createExamplePromise;
|
|
201
|
+
example = { ...newExample, inputHash, outputHash };
|
|
202
|
+
}
|
|
203
|
+
// What do I do here?
|
|
204
|
+
// examples.push(example);
|
|
205
|
+
// .enterWith is OK here
|
|
206
|
+
globals_js_1.jestAsyncLocalStorageInstance.enterWith({
|
|
207
|
+
...context,
|
|
208
|
+
currentExample: example,
|
|
209
|
+
client: testClient,
|
|
210
|
+
});
|
|
211
|
+
const traceableOptions = {
|
|
212
|
+
reference_example_id: example.id,
|
|
213
|
+
project_name: project.name,
|
|
214
|
+
metadata: {
|
|
215
|
+
...config?.metadata,
|
|
216
|
+
example_version: example.modified_at
|
|
217
|
+
? new Date(example.modified_at).toISOString()
|
|
218
|
+
: new Date(example.created_at).toISOString(),
|
|
219
|
+
},
|
|
220
|
+
client: testClient,
|
|
221
|
+
tracingEnabled: true,
|
|
222
|
+
name: "Unit test",
|
|
223
|
+
};
|
|
224
|
+
// Pass inputs into traceable so tracing works correctly but
|
|
225
|
+
// provide both to the user-defined test function
|
|
226
|
+
const tracedFunction = (0, traceable_js_1.traceable)(async (_) => {
|
|
227
|
+
return args[1]({
|
|
228
|
+
inputs: testInput,
|
|
229
|
+
outputs: testOutput,
|
|
230
|
+
});
|
|
231
|
+
}, { ...traceableOptions, ...config });
|
|
232
|
+
await tracedFunction(testInput);
|
|
233
|
+
await testClient.awaitPendingTraceBatches();
|
|
234
|
+
}
|
|
235
|
+
else {
|
|
236
|
+
// .enterWith is OK here
|
|
237
|
+
globals_js_1.jestAsyncLocalStorageInstance.enterWith({
|
|
238
|
+
...context,
|
|
239
|
+
currentExample: { inputs: testInput, outputs: testOutput },
|
|
240
|
+
});
|
|
241
|
+
await args[1]({
|
|
242
|
+
inputs: testInput,
|
|
243
|
+
outputs: testOutput,
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
}, ...args.slice(2));
|
|
247
|
+
}
|
|
248
|
+
};
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
function eachMethod(table) {
|
|
252
|
+
return function (name, fn, timeout) {
|
|
253
|
+
for (let i = 0; i < table.length; i += 1) {
|
|
254
|
+
const example = table[i];
|
|
255
|
+
wrapTestMethod(globals_1.test)(example)(`${name} ${i}`, fn, timeout);
|
|
256
|
+
}
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
const lsTest = Object.assign(wrapTestMethod(globals_1.test), {
|
|
260
|
+
only: wrapTestMethod(globals_1.test.only),
|
|
261
|
+
skip: wrapTestMethod(globals_1.test.skip),
|
|
262
|
+
each: eachMethod,
|
|
263
|
+
});
|
|
264
|
+
exports.test = lsTest;
|
|
265
|
+
exports.it = lsTest;
|
|
266
|
+
const wrappedExpect = (0, chain_js_1.wrapExpect)(globals_1.expect);
|
|
267
|
+
exports.expect = wrappedExpect;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { RunTreeConfig } from "../run_trees.js";
|
|
2
|
+
import { KVMap } from "../schemas.js";
|
|
3
|
+
import type { SimpleEvaluator } from "./vendor/gradedBy.js";
|
|
4
|
+
declare global {
|
|
5
|
+
namespace jest {
|
|
6
|
+
interface AsymmetricMatchers {
|
|
7
|
+
toBeRelativeCloseTo(expected: string, options?: any): void;
|
|
8
|
+
toBeAbsoluteCloseTo(expected: string, options?: any): void;
|
|
9
|
+
toBeSemanticCloseTo(expected: string, options?: any): Promise<void>;
|
|
10
|
+
}
|
|
11
|
+
interface Matchers<R> {
|
|
12
|
+
toBeRelativeCloseTo(expected: string, options?: any): R;
|
|
13
|
+
toBeAbsoluteCloseTo(expected: string, options?: any): R;
|
|
14
|
+
toBeSemanticCloseTo(expected: string, options?: any): Promise<R>;
|
|
15
|
+
gradedBy(evaluator: SimpleEvaluator): jest.Matchers<Promise<R>> & {
|
|
16
|
+
not: jest.Matchers<Promise<R>>;
|
|
17
|
+
resolves: jest.Matchers<Promise<R>>;
|
|
18
|
+
rejects: jest.Matchers<Promise<R>>;
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
export type LangSmithJestDescribeWrapper = (name: string, fn: () => void | Promise<void>, config?: Partial<RunTreeConfig>) => void;
|
|
24
|
+
declare const lsDescribe: LangSmithJestDescribeWrapper & {
|
|
25
|
+
only: LangSmithJestDescribeWrapper;
|
|
26
|
+
skip: LangSmithJestDescribeWrapper;
|
|
27
|
+
};
|
|
28
|
+
export type LangSmithJestTestWrapper<I, O> = (name: string, fn: (params: {
|
|
29
|
+
inputs: I;
|
|
30
|
+
outputs: O;
|
|
31
|
+
}) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
32
|
+
declare function eachMethod<I extends KVMap, O extends KVMap>(table: {
|
|
33
|
+
inputs: I;
|
|
34
|
+
outputs: O;
|
|
35
|
+
}[]): (name: string, fn: (params: {
|
|
36
|
+
inputs: I;
|
|
37
|
+
outputs: O;
|
|
38
|
+
}) => unknown | Promise<unknown>, timeout?: number) => void;
|
|
39
|
+
declare const lsTest: (<I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(params: {
|
|
40
|
+
inputs: I;
|
|
41
|
+
outputs: O;
|
|
42
|
+
}, config?: Partial<RunTreeConfig> & {
|
|
43
|
+
n?: number;
|
|
44
|
+
}) => LangSmithJestTestWrapper<I, O>) & {
|
|
45
|
+
only: <I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(params: {
|
|
46
|
+
inputs: I;
|
|
47
|
+
outputs: O;
|
|
48
|
+
}, config?: Partial<RunTreeConfig> & {
|
|
49
|
+
n?: number;
|
|
50
|
+
}) => LangSmithJestTestWrapper<I, O>;
|
|
51
|
+
skip: <I extends Record<string, any> = Record<string, any>, O extends Record<string, any> = Record<string, any>>(params: {
|
|
52
|
+
inputs: I;
|
|
53
|
+
outputs: O;
|
|
54
|
+
}, config?: Partial<RunTreeConfig> & {
|
|
55
|
+
n?: number;
|
|
56
|
+
}) => LangSmithJestTestWrapper<I, O>;
|
|
57
|
+
each: typeof eachMethod;
|
|
58
|
+
};
|
|
59
|
+
declare const wrappedExpect: any;
|
|
60
|
+
export { lsTest as test, lsTest as it, lsDescribe as describe, wrappedExpect as expect, };
|
|
61
|
+
export { type SimpleEvaluator };
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
/* eslint-disable import/no-extraneous-dependencies */
|
|
2
|
+
/* eslint-disable @typescript-eslint/no-namespace */
|
|
3
|
+
import { expect, test, describe } from "@jest/globals";
|
|
4
|
+
import crypto from "crypto";
|
|
5
|
+
import { v4 } from "uuid";
|
|
6
|
+
import { traceable } from "../traceable.js";
|
|
7
|
+
import { RunTree } from "../run_trees.js";
|
|
8
|
+
import { randomName } from "../evaluation/_random_name.js";
|
|
9
|
+
import { toBeRelativeCloseTo, toBeAbsoluteCloseTo, toBeSemanticCloseTo, } from "./matchers.js";
|
|
10
|
+
import { jestAsyncLocalStorageInstance, trackingEnabled } from "./globals.js";
|
|
11
|
+
import { wrapExpect } from "./vendor/chain.js";
|
|
12
|
+
expect.extend({
|
|
13
|
+
toBeRelativeCloseTo,
|
|
14
|
+
toBeAbsoluteCloseTo,
|
|
15
|
+
toBeSemanticCloseTo,
|
|
16
|
+
});
|
|
17
|
+
const objectHash = (obj, depth = 0) => {
|
|
18
|
+
// Prevent infinite recursion
|
|
19
|
+
if (depth > 50) {
|
|
20
|
+
throw new Error("Object is too deep to check equality for serialization. Please use a simpler example.");
|
|
21
|
+
}
|
|
22
|
+
if (Array.isArray(obj)) {
|
|
23
|
+
const arrayHash = obj.map((item) => objectHash(item, depth + 1)).join(",");
|
|
24
|
+
return crypto.createHash("sha256").update(arrayHash).digest("hex");
|
|
25
|
+
}
|
|
26
|
+
if (obj && typeof obj === "object") {
|
|
27
|
+
const sortedHash = Object.keys(obj)
|
|
28
|
+
.sort()
|
|
29
|
+
.map((key) => `${key}:${objectHash(obj[key], depth + 1)}`)
|
|
30
|
+
.join(",");
|
|
31
|
+
return crypto.createHash("sha256").update(sortedHash).digest("hex");
|
|
32
|
+
}
|
|
33
|
+
return crypto.createHash("sha256").update(JSON.stringify(obj)).digest("hex");
|
|
34
|
+
};
|
|
35
|
+
async function _createProject(client, datasetId) {
|
|
36
|
+
// Create the project, updating the experimentName until we find a unique one.
|
|
37
|
+
let project;
|
|
38
|
+
let experimentName = randomName();
|
|
39
|
+
for (let i = 0; i < 10; i++) {
|
|
40
|
+
try {
|
|
41
|
+
project = await client.createProject({
|
|
42
|
+
projectName: experimentName,
|
|
43
|
+
referenceDatasetId: datasetId,
|
|
44
|
+
// description: this._description,
|
|
45
|
+
});
|
|
46
|
+
return project;
|
|
47
|
+
}
|
|
48
|
+
catch (e) {
|
|
49
|
+
// Naming collision
|
|
50
|
+
if (e?.name === "LangSmithConflictError") {
|
|
51
|
+
const ent = v4().slice(0, 6);
|
|
52
|
+
experimentName = `${experimentName}-${ent}`;
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
throw e;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
throw new Error("Could not generate a unique experiment name within 10 attempts." +
|
|
60
|
+
" Please try again.");
|
|
61
|
+
}
|
|
62
|
+
const setupPromises = new Map();
|
|
63
|
+
async function runDatasetSetup(testClient, datasetName) {
|
|
64
|
+
let storageValue;
|
|
65
|
+
if (!trackingEnabled()) {
|
|
66
|
+
storageValue = {
|
|
67
|
+
createdAt: new Date().toISOString(),
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
let dataset;
|
|
72
|
+
try {
|
|
73
|
+
dataset = await testClient.readDataset({
|
|
74
|
+
datasetName,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
catch (e) {
|
|
78
|
+
if (e.message.includes("not found")) {
|
|
79
|
+
dataset = await testClient.createDataset(datasetName, {
|
|
80
|
+
description: `Dataset for unit tests created on ${new Date().toISOString()}`,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
throw e;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
const examplesList = testClient.listExamples({
|
|
88
|
+
datasetName,
|
|
89
|
+
});
|
|
90
|
+
const examples = [];
|
|
91
|
+
for await (const example of examplesList) {
|
|
92
|
+
const inputHash = objectHash(example.inputs);
|
|
93
|
+
const outputHash = objectHash(example.outputs ?? {});
|
|
94
|
+
examples.push({ ...example, inputHash, outputHash });
|
|
95
|
+
}
|
|
96
|
+
const project = await _createProject(testClient, dataset.id);
|
|
97
|
+
storageValue = {
|
|
98
|
+
dataset,
|
|
99
|
+
examples,
|
|
100
|
+
project,
|
|
101
|
+
client: testClient,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
return storageValue;
|
|
105
|
+
}
|
|
106
|
+
function wrapDescribeMethod(method) {
|
|
107
|
+
return function (datasetName, fn, config) {
|
|
108
|
+
return method(datasetName, () => {
|
|
109
|
+
const suiteUuid = v4();
|
|
110
|
+
/**
|
|
111
|
+
* We cannot rely on setting AsyncLocalStorage in beforeAll or beforeEach,
|
|
112
|
+
* due to https://github.com/jestjs/jest/issues/13653 and needing to use
|
|
113
|
+
* the janky .enterWith.
|
|
114
|
+
*
|
|
115
|
+
* We also cannot do async setup in describe due to Jest restrictions.
|
|
116
|
+
* However, .run without asynchronous logic works.
|
|
117
|
+
*/
|
|
118
|
+
void jestAsyncLocalStorageInstance.run({
|
|
119
|
+
suiteUuid,
|
|
120
|
+
suiteName: datasetName,
|
|
121
|
+
client: config?.client ?? RunTree.getSharedClient(),
|
|
122
|
+
createdAt: new Date().toISOString(),
|
|
123
|
+
}, fn);
|
|
124
|
+
});
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
const lsDescribe = Object.assign(wrapDescribeMethod(describe), {
|
|
128
|
+
only: wrapDescribeMethod(describe.only),
|
|
129
|
+
skip: wrapDescribeMethod(describe.skip),
|
|
130
|
+
});
|
|
131
|
+
function wrapTestMethod(method) {
|
|
132
|
+
return function (params, config) {
|
|
133
|
+
// Due to https://github.com/jestjs/jest/issues/13653,
|
|
134
|
+
// we must access the local store value here before
|
|
135
|
+
// entering an async context
|
|
136
|
+
const context = jestAsyncLocalStorageInstance.getStore();
|
|
137
|
+
// This typing is wrong, but necessary to avoid lint errors
|
|
138
|
+
// eslint-disable-next-line @typescript-eslint/no-misused-promises
|
|
139
|
+
return async function (...args) {
|
|
140
|
+
let createExamplePromise;
|
|
141
|
+
const totalRuns = config?.n ?? 1;
|
|
142
|
+
for (let i = 0; i < totalRuns; i += 1) {
|
|
143
|
+
// Jest will not group under the same "describe" group if you await the test and
|
|
144
|
+
// total runs is greater than 1
|
|
145
|
+
void method(`${args[0]} ${i}`, async () => {
|
|
146
|
+
if (context === undefined) {
|
|
147
|
+
throw new Error(`Could not retrieve test context.\nPlease make sure you have tracing enabled and you are wrapping all of your test cases in an "ls.describe()" function.`);
|
|
148
|
+
}
|
|
149
|
+
// Because of https://github.com/jestjs/jest/issues/13653, we have to do asynchronous setup
|
|
150
|
+
// within the test itself
|
|
151
|
+
if (!setupPromises.get(context.suiteUuid)) {
|
|
152
|
+
setupPromises.set(context.suiteUuid, runDatasetSetup(context.client, context.suiteName));
|
|
153
|
+
}
|
|
154
|
+
const { examples, dataset, createdAt, project, client } = await setupPromises.get(context.suiteUuid);
|
|
155
|
+
const testInput = params.inputs;
|
|
156
|
+
const testOutput = params.outputs;
|
|
157
|
+
const inputHash = objectHash(testInput);
|
|
158
|
+
const outputHash = objectHash(testOutput ?? {});
|
|
159
|
+
if (trackingEnabled()) {
|
|
160
|
+
const missingFields = [];
|
|
161
|
+
if (examples === undefined) {
|
|
162
|
+
missingFields.push("examples");
|
|
163
|
+
}
|
|
164
|
+
if (dataset === undefined) {
|
|
165
|
+
missingFields.push("dataset");
|
|
166
|
+
}
|
|
167
|
+
if (project === undefined) {
|
|
168
|
+
missingFields.push("project");
|
|
169
|
+
}
|
|
170
|
+
if (client === undefined) {
|
|
171
|
+
missingFields.push("client");
|
|
172
|
+
}
|
|
173
|
+
if (missingFields.length > 0) {
|
|
174
|
+
throw new Error(`Failed to initialize test tracking: Could not identify ${missingFields
|
|
175
|
+
.map((field) => `"${field}"`)
|
|
176
|
+
.join(", ")} while syncing to LangSmith. Please contact us for help.`);
|
|
177
|
+
}
|
|
178
|
+
const testClient = config?.client ?? client;
|
|
179
|
+
let example = (examples ?? []).find((example) => {
|
|
180
|
+
return (example.inputHash === inputHash &&
|
|
181
|
+
example.outputHash === outputHash);
|
|
182
|
+
});
|
|
183
|
+
if (example === undefined) {
|
|
184
|
+
// Avoid creating multiple of the same example
|
|
185
|
+
// when running the same test case multiple times
|
|
186
|
+
// Jest runs other tests serially
|
|
187
|
+
if (createExamplePromise === undefined) {
|
|
188
|
+
createExamplePromise = testClient.createExample(testInput, testOutput, {
|
|
189
|
+
datasetId: dataset?.id,
|
|
190
|
+
createdAt: new Date(createdAt ?? new Date()),
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
const newExample = await createExamplePromise;
|
|
194
|
+
example = { ...newExample, inputHash, outputHash };
|
|
195
|
+
}
|
|
196
|
+
// What do I do here?
|
|
197
|
+
// examples.push(example);
|
|
198
|
+
// .enterWith is OK here
|
|
199
|
+
jestAsyncLocalStorageInstance.enterWith({
|
|
200
|
+
...context,
|
|
201
|
+
currentExample: example,
|
|
202
|
+
client: testClient,
|
|
203
|
+
});
|
|
204
|
+
const traceableOptions = {
|
|
205
|
+
reference_example_id: example.id,
|
|
206
|
+
project_name: project.name,
|
|
207
|
+
metadata: {
|
|
208
|
+
...config?.metadata,
|
|
209
|
+
example_version: example.modified_at
|
|
210
|
+
? new Date(example.modified_at).toISOString()
|
|
211
|
+
: new Date(example.created_at).toISOString(),
|
|
212
|
+
},
|
|
213
|
+
client: testClient,
|
|
214
|
+
tracingEnabled: true,
|
|
215
|
+
name: "Unit test",
|
|
216
|
+
};
|
|
217
|
+
// Pass inputs into traceable so tracing works correctly but
|
|
218
|
+
// provide both to the user-defined test function
|
|
219
|
+
const tracedFunction = traceable(async (_) => {
|
|
220
|
+
return args[1]({
|
|
221
|
+
inputs: testInput,
|
|
222
|
+
outputs: testOutput,
|
|
223
|
+
});
|
|
224
|
+
}, { ...traceableOptions, ...config });
|
|
225
|
+
await tracedFunction(testInput);
|
|
226
|
+
await testClient.awaitPendingTraceBatches();
|
|
227
|
+
}
|
|
228
|
+
else {
|
|
229
|
+
// .enterWith is OK here
|
|
230
|
+
jestAsyncLocalStorageInstance.enterWith({
|
|
231
|
+
...context,
|
|
232
|
+
currentExample: { inputs: testInput, outputs: testOutput },
|
|
233
|
+
});
|
|
234
|
+
await args[1]({
|
|
235
|
+
inputs: testInput,
|
|
236
|
+
outputs: testOutput,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}, ...args.slice(2));
|
|
240
|
+
}
|
|
241
|
+
};
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
function eachMethod(table) {
|
|
245
|
+
return function (name, fn, timeout) {
|
|
246
|
+
for (let i = 0; i < table.length; i += 1) {
|
|
247
|
+
const example = table[i];
|
|
248
|
+
wrapTestMethod(test)(example)(`${name} ${i}`, fn, timeout);
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
const lsTest = Object.assign(wrapTestMethod(test), {
|
|
253
|
+
only: wrapTestMethod(test.only),
|
|
254
|
+
skip: wrapTestMethod(test.skip),
|
|
255
|
+
each: eachMethod,
|
|
256
|
+
});
|
|
257
|
+
const wrappedExpect = wrapExpect(expect);
|
|
258
|
+
export { lsTest as test, lsTest as it, lsDescribe as describe, wrappedExpect as expect, };
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.toBeSemanticCloseTo = exports.toBeAbsoluteCloseTo = exports.toBeRelativeCloseTo = void 0;
|
|
4
|
+
// Levenshtein distance implementation
|
|
5
|
+
function levenshteinDistance(a, b) {
|
|
6
|
+
if (a.length === 0)
|
|
7
|
+
return b.length;
|
|
8
|
+
if (b.length === 0)
|
|
9
|
+
return a.length;
|
|
10
|
+
const matrix = Array(b.length + 1)
|
|
11
|
+
.fill(null)
|
|
12
|
+
.map(() => Array(a.length + 1).fill(null));
|
|
13
|
+
for (let i = 0; i <= a.length; i++)
|
|
14
|
+
matrix[0][i] = i;
|
|
15
|
+
for (let j = 0; j <= b.length; j++)
|
|
16
|
+
matrix[j][0] = j;
|
|
17
|
+
for (let j = 1; j <= b.length; j++) {
|
|
18
|
+
for (let i = 1; i <= a.length; i++) {
|
|
19
|
+
const substitutionCost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
20
|
+
matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + substitutionCost);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return matrix[b.length][a.length];
|
|
24
|
+
}
|
|
25
|
+
async function toBeRelativeCloseTo(received, expected, options = {}) {
|
|
26
|
+
const { threshold = 0.1, algorithm = "levenshtein" } = options;
|
|
27
|
+
let distance;
|
|
28
|
+
let maxLength;
|
|
29
|
+
switch (algorithm) {
|
|
30
|
+
case "levenshtein":
|
|
31
|
+
distance = levenshteinDistance(received, expected);
|
|
32
|
+
maxLength = Math.max(received.length, expected.length);
|
|
33
|
+
break;
|
|
34
|
+
default:
|
|
35
|
+
throw new Error(`Unsupported algorithm: ${algorithm}`);
|
|
36
|
+
}
|
|
37
|
+
// Calculate relative distance (normalized between 0 and 1)
|
|
38
|
+
const relativeDistance = maxLength === 0 ? 0 : distance / maxLength;
|
|
39
|
+
const pass = relativeDistance <= threshold;
|
|
40
|
+
return {
|
|
41
|
+
pass,
|
|
42
|
+
message: () => pass
|
|
43
|
+
? `Expected "${received}" not to be relatively close to "${expected}" (threshold: ${threshold}, actual distance: ${relativeDistance})`
|
|
44
|
+
: `Expected "${received}" to be relatively close to "${expected}" (threshold: ${threshold}, actual distance: ${relativeDistance})`,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
exports.toBeRelativeCloseTo = toBeRelativeCloseTo;
|
|
48
|
+
async function toBeAbsoluteCloseTo(received, expected, options = {}) {
|
|
49
|
+
const { threshold = 3, algorithm = "levenshtein" } = options;
|
|
50
|
+
let distance;
|
|
51
|
+
switch (algorithm) {
|
|
52
|
+
case "levenshtein":
|
|
53
|
+
distance = levenshteinDistance(received, expected);
|
|
54
|
+
break;
|
|
55
|
+
default:
|
|
56
|
+
throw new Error(`Unsupported algorithm: ${algorithm}`);
|
|
57
|
+
}
|
|
58
|
+
const pass = distance <= threshold;
|
|
59
|
+
return {
|
|
60
|
+
pass,
|
|
61
|
+
message: () => pass
|
|
62
|
+
? `Expected "${received}" not to be absolutely close to "${expected}" (threshold: ${threshold}, actual distance: ${distance})`
|
|
63
|
+
: `Expected "${received}" to be absolutely close to "${expected}" (threshold: ${threshold}, actual distance: ${distance})`,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
exports.toBeAbsoluteCloseTo = toBeAbsoluteCloseTo;
|
|
67
|
+
async function toBeSemanticCloseTo(received, expected, options) {
|
|
68
|
+
const { threshold = 0.2, embedding, algorithm = "cosine" } = options;
|
|
69
|
+
// Get embeddings for both strings
|
|
70
|
+
const [receivedEmbedding, expectedEmbedding] = await Promise.all([
|
|
71
|
+
embedding.embedQuery(received),
|
|
72
|
+
embedding.embedQuery(expected),
|
|
73
|
+
]);
|
|
74
|
+
// Calculate similarity based on chosen algorithm
|
|
75
|
+
let similarity;
|
|
76
|
+
switch (algorithm) {
|
|
77
|
+
case "cosine": {
|
|
78
|
+
// Compute cosine similarity
|
|
79
|
+
const dotProduct = receivedEmbedding.reduce((sum, a, i) => sum + a * expectedEmbedding[i], 0);
|
|
80
|
+
const receivedMagnitude = Math.sqrt(receivedEmbedding.reduce((sum, a) => sum + a * a, 0));
|
|
81
|
+
const expectedMagnitude = Math.sqrt(expectedEmbedding.reduce((sum, a) => sum + a * a, 0));
|
|
82
|
+
similarity = dotProduct / (receivedMagnitude * expectedMagnitude);
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
case "dot-product": {
|
|
86
|
+
// Compute dot product similarity
|
|
87
|
+
similarity = receivedEmbedding.reduce((sum, a, i) => sum + a * expectedEmbedding[i], 0);
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
90
|
+
default:
|
|
91
|
+
throw new Error(`Unsupported algorithm: ${algorithm}`);
|
|
92
|
+
}
|
|
93
|
+
const pass = similarity >= 1 - threshold;
|
|
94
|
+
return {
|
|
95
|
+
pass,
|
|
96
|
+
message: () => pass
|
|
97
|
+
? `Expected "${received}" not to be semantically close to "${expected}" (threshold: ${threshold}, similarity: ${similarity})`
|
|
98
|
+
: `Expected "${received}" to be semantically close to "${expected}" (threshold: ${threshold}, similarity: ${similarity})`,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
exports.toBeSemanticCloseTo = toBeSemanticCloseTo;
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { MatcherContext } from "expect";
|
|
2
|
+
export declare function toBeRelativeCloseTo(this: MatcherContext, received: string, expected: string, options?: {
|
|
3
|
+
threshold?: number;
|
|
4
|
+
algorithm?: "levenshtein";
|
|
5
|
+
}): Promise<{
|
|
6
|
+
pass: boolean;
|
|
7
|
+
message: () => string;
|
|
8
|
+
}>;
|
|
9
|
+
export declare function toBeAbsoluteCloseTo(this: MatcherContext, received: string, expected: string, options?: {
|
|
10
|
+
threshold?: number;
|
|
11
|
+
algorithm?: "levenshtein";
|
|
12
|
+
}): Promise<{
|
|
13
|
+
pass: boolean;
|
|
14
|
+
message: () => string;
|
|
15
|
+
}>;
|
|
16
|
+
export declare function toBeSemanticCloseTo(this: MatcherContext, received: string, expected: string, options: {
|
|
17
|
+
threshold?: number;
|
|
18
|
+
embedding: {
|
|
19
|
+
embedQuery: (query: string) => number[] | Promise<number[]>;
|
|
20
|
+
};
|
|
21
|
+
algorithm?: "cosine" | "dot-product";
|
|
22
|
+
}): Promise<{
|
|
23
|
+
pass: boolean;
|
|
24
|
+
message: () => string;
|
|
25
|
+
}>;
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
// Levenshtein distance implementation
|
|
2
|
+
function levenshteinDistance(a, b) {
|
|
3
|
+
if (a.length === 0)
|
|
4
|
+
return b.length;
|
|
5
|
+
if (b.length === 0)
|
|
6
|
+
return a.length;
|
|
7
|
+
const matrix = Array(b.length + 1)
|
|
8
|
+
.fill(null)
|
|
9
|
+
.map(() => Array(a.length + 1).fill(null));
|
|
10
|
+
for (let i = 0; i <= a.length; i++)
|
|
11
|
+
matrix[0][i] = i;
|
|
12
|
+
for (let j = 0; j <= b.length; j++)
|
|
13
|
+
matrix[j][0] = j;
|
|
14
|
+
for (let j = 1; j <= b.length; j++) {
|
|
15
|
+
for (let i = 1; i <= a.length; i++) {
|
|
16
|
+
const substitutionCost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
17
|
+
matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + substitutionCost);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
return matrix[b.length][a.length];
|
|
21
|
+
}
|
|
22
|
+
export async function toBeRelativeCloseTo(received, expected, options = {}) {
|
|
23
|
+
const { threshold = 0.1, algorithm = "levenshtein" } = options;
|
|
24
|
+
let distance;
|
|
25
|
+
let maxLength;
|
|
26
|
+
switch (algorithm) {
|
|
27
|
+
case "levenshtein":
|
|
28
|
+
distance = levenshteinDistance(received, expected);
|
|
29
|
+
maxLength = Math.max(received.length, expected.length);
|
|
30
|
+
break;
|
|
31
|
+
default:
|
|
32
|
+
throw new Error(`Unsupported algorithm: ${algorithm}`);
|
|
33
|
+
}
|
|
34
|
+
// Calculate relative distance (normalized between 0 and 1)
|
|
35
|
+
const relativeDistance = maxLength === 0 ? 0 : distance / maxLength;
|
|
36
|
+
const pass = relativeDistance <= threshold;
|
|
37
|
+
return {
|
|
38
|
+
pass,
|
|
39
|
+
message: () => pass
|
|
40
|
+
? `Expected "${received}" not to be relatively close to "${expected}" (threshold: ${threshold}, actual distance: ${relativeDistance})`
|
|
41
|
+
: `Expected "${received}" to be relatively close to "${expected}" (threshold: ${threshold}, actual distance: ${relativeDistance})`,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
export async function toBeAbsoluteCloseTo(received, expected, options = {}) {
|
|
45
|
+
const { threshold = 3, algorithm = "levenshtein" } = options;
|
|
46
|
+
let distance;
|
|
47
|
+
switch (algorithm) {
|
|
48
|
+
case "levenshtein":
|
|
49
|
+
distance = levenshteinDistance(received, expected);
|
|
50
|
+
break;
|
|
51
|
+
default:
|
|
52
|
+
throw new Error(`Unsupported algorithm: ${algorithm}`);
|
|
53
|
+
}
|
|
54
|
+
const pass = distance <= threshold;
|
|
55
|
+
return {
|
|
56
|
+
pass,
|
|
57
|
+
message: () => pass
|
|
58
|
+
? `Expected "${received}" not to be absolutely close to "${expected}" (threshold: ${threshold}, actual distance: ${distance})`
|
|
59
|
+
: `Expected "${received}" to be absolutely close to "${expected}" (threshold: ${threshold}, actual distance: ${distance})`,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
export async function toBeSemanticCloseTo(received, expected, options) {
|
|
63
|
+
const { threshold = 0.2, embedding, algorithm = "cosine" } = options;
|
|
64
|
+
// Get embeddings for both strings
|
|
65
|
+
const [receivedEmbedding, expectedEmbedding] = await Promise.all([
|
|
66
|
+
embedding.embedQuery(received),
|
|
67
|
+
embedding.embedQuery(expected),
|
|
68
|
+
]);
|
|
69
|
+
// Calculate similarity based on chosen algorithm
|
|
70
|
+
let similarity;
|
|
71
|
+
switch (algorithm) {
|
|
72
|
+
case "cosine": {
|
|
73
|
+
// Compute cosine similarity
|
|
74
|
+
const dotProduct = receivedEmbedding.reduce((sum, a, i) => sum + a * expectedEmbedding[i], 0);
|
|
75
|
+
const receivedMagnitude = Math.sqrt(receivedEmbedding.reduce((sum, a) => sum + a * a, 0));
|
|
76
|
+
const expectedMagnitude = Math.sqrt(expectedEmbedding.reduce((sum, a) => sum + a * a, 0));
|
|
77
|
+
similarity = dotProduct / (receivedMagnitude * expectedMagnitude);
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
case "dot-product": {
|
|
81
|
+
// Compute dot product similarity
|
|
82
|
+
similarity = receivedEmbedding.reduce((sum, a, i) => sum + a * expectedEmbedding[i], 0);
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
default:
|
|
86
|
+
throw new Error(`Unsupported algorithm: ${algorithm}`);
|
|
87
|
+
}
|
|
88
|
+
const pass = similarity >= 1 - threshold;
|
|
89
|
+
return {
|
|
90
|
+
pass,
|
|
91
|
+
message: () => pass
|
|
92
|
+
? `Expected "${received}" not to be semantically close to "${expected}" (threshold: ${threshold}, similarity: ${similarity})`
|
|
93
|
+
: `Expected "${received}" to be semantically close to "${expected}" (threshold: ${threshold}, similarity: ${similarity})`,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.wrapExpect = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Adapted from https://github.com/mattphillips/jest-chain/blob/main/src/chain.js
|
|
6
|
+
*/
|
|
7
|
+
const gradedBy_js_1 = require("./gradedBy.cjs");
|
|
8
|
+
class JestAssertionError extends Error {
|
|
9
|
+
constructor(result, callsite) {
|
|
10
|
+
super(typeof result.message === "function" ? result.message() : result.message);
|
|
11
|
+
Object.defineProperty(this, "matcherResult", {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
configurable: true,
|
|
14
|
+
writable: true,
|
|
15
|
+
value: void 0
|
|
16
|
+
});
|
|
17
|
+
this.matcherResult = result;
|
|
18
|
+
if (Error.captureStackTrace) {
|
|
19
|
+
Error.captureStackTrace(this, callsite);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
const _wrapMatchers = (matchers, evaluator, originalArgs, originalExpect, staticPath = []) => {
|
|
24
|
+
return Object.keys(matchers)
|
|
25
|
+
.filter((name) => typeof matchers[name] === "function")
|
|
26
|
+
.map((name) => {
|
|
27
|
+
const newMatcher = async (...args) => {
|
|
28
|
+
try {
|
|
29
|
+
const score = await (0, gradedBy_js_1.gradedBy)(originalArgs[0], evaluator);
|
|
30
|
+
let result = originalExpect(score);
|
|
31
|
+
for (const pathEntry of staticPath) {
|
|
32
|
+
result = result[pathEntry];
|
|
33
|
+
}
|
|
34
|
+
result = result[name](...args); // run matcher up to current state
|
|
35
|
+
if (result && typeof result.then === "function") {
|
|
36
|
+
return Object.assign(Promise.resolve(result), matchers);
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
return matchers;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
catch (error) {
|
|
43
|
+
if (!error.matcherResult) {
|
|
44
|
+
throw error;
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
throw new JestAssertionError(error.matcherResult, newMatcher);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
return { [name]: newMatcher };
|
|
52
|
+
});
|
|
53
|
+
};
|
|
54
|
+
const addGradedBy = (matchers, originalArgs, originalExpect, staticPath = []) => {
|
|
55
|
+
let spreadMatchers = { ...matchers };
|
|
56
|
+
// Handle Bun, which uses a class
|
|
57
|
+
if (Object.keys(matchers).length === 0) {
|
|
58
|
+
const prototypeProps = Object.getOwnPropertyNames(Object.getPrototypeOf(matchers));
|
|
59
|
+
spreadMatchers = Object.fromEntries(prototypeProps.map((prop) => {
|
|
60
|
+
try {
|
|
61
|
+
return [prop, matchers[prop]];
|
|
62
|
+
}
|
|
63
|
+
catch (e) {
|
|
64
|
+
// Ignore bizarre Bun bug
|
|
65
|
+
return [];
|
|
66
|
+
}
|
|
67
|
+
}));
|
|
68
|
+
}
|
|
69
|
+
return Object.assign({}, matchers, {
|
|
70
|
+
gradedBy: function (evaluator) {
|
|
71
|
+
const mappedMatchers = _wrapMatchers(spreadMatchers, evaluator, originalArgs, originalExpect, []);
|
|
72
|
+
// .not etc.
|
|
73
|
+
const staticMatchers = Object.keys(spreadMatchers)
|
|
74
|
+
.filter((name) => typeof matchers[name] !== "function")
|
|
75
|
+
.map((name) => {
|
|
76
|
+
return {
|
|
77
|
+
[name]: Object.assign({}, ..._wrapMatchers(spreadMatchers, evaluator, originalArgs, originalExpect, staticPath.concat(name))),
|
|
78
|
+
};
|
|
79
|
+
});
|
|
80
|
+
return Object.assign({}, ...mappedMatchers, ...staticMatchers);
|
|
81
|
+
},
|
|
82
|
+
});
|
|
83
|
+
};
|
|
84
|
+
function wrapExpect(originalExpect) {
|
|
85
|
+
// proxy the expect function
|
|
86
|
+
const expectProxy = Object.assign((...args) => addGradedBy(originalExpect(...args), args, originalExpect, []), // partially apply expect to get all matchers and chain them
|
|
87
|
+
originalExpect // clone additional properties on expect
|
|
88
|
+
);
|
|
89
|
+
return expectProxy;
|
|
90
|
+
}
|
|
91
|
+
exports.wrapExpect = wrapExpect;
|
|
92
|
+
globalThis.expect = wrapExpect(globalThis.expect);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function wrapExpect(originalExpect: any): any;
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapted from https://github.com/mattphillips/jest-chain/blob/main/src/chain.js
|
|
3
|
+
*/
|
|
4
|
+
import { gradedBy } from "./gradedBy.js";
|
|
5
|
+
class JestAssertionError extends Error {
|
|
6
|
+
constructor(result, callsite) {
|
|
7
|
+
super(typeof result.message === "function" ? result.message() : result.message);
|
|
8
|
+
Object.defineProperty(this, "matcherResult", {
|
|
9
|
+
enumerable: true,
|
|
10
|
+
configurable: true,
|
|
11
|
+
writable: true,
|
|
12
|
+
value: void 0
|
|
13
|
+
});
|
|
14
|
+
this.matcherResult = result;
|
|
15
|
+
if (Error.captureStackTrace) {
|
|
16
|
+
Error.captureStackTrace(this, callsite);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
const _wrapMatchers = (matchers, evaluator, originalArgs, originalExpect, staticPath = []) => {
|
|
21
|
+
return Object.keys(matchers)
|
|
22
|
+
.filter((name) => typeof matchers[name] === "function")
|
|
23
|
+
.map((name) => {
|
|
24
|
+
const newMatcher = async (...args) => {
|
|
25
|
+
try {
|
|
26
|
+
const score = await gradedBy(originalArgs[0], evaluator);
|
|
27
|
+
let result = originalExpect(score);
|
|
28
|
+
for (const pathEntry of staticPath) {
|
|
29
|
+
result = result[pathEntry];
|
|
30
|
+
}
|
|
31
|
+
result = result[name](...args); // run matcher up to current state
|
|
32
|
+
if (result && typeof result.then === "function") {
|
|
33
|
+
return Object.assign(Promise.resolve(result), matchers);
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
return matchers;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
catch (error) {
|
|
40
|
+
if (!error.matcherResult) {
|
|
41
|
+
throw error;
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
throw new JestAssertionError(error.matcherResult, newMatcher);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
return { [name]: newMatcher };
|
|
49
|
+
});
|
|
50
|
+
};
|
|
51
|
+
const addGradedBy = (matchers, originalArgs, originalExpect, staticPath = []) => {
|
|
52
|
+
let spreadMatchers = { ...matchers };
|
|
53
|
+
// Handle Bun, which uses a class
|
|
54
|
+
if (Object.keys(matchers).length === 0) {
|
|
55
|
+
const prototypeProps = Object.getOwnPropertyNames(Object.getPrototypeOf(matchers));
|
|
56
|
+
spreadMatchers = Object.fromEntries(prototypeProps.map((prop) => {
|
|
57
|
+
try {
|
|
58
|
+
return [prop, matchers[prop]];
|
|
59
|
+
}
|
|
60
|
+
catch (e) {
|
|
61
|
+
// Ignore bizarre Bun bug
|
|
62
|
+
return [];
|
|
63
|
+
}
|
|
64
|
+
}));
|
|
65
|
+
}
|
|
66
|
+
return Object.assign({}, matchers, {
|
|
67
|
+
gradedBy: function (evaluator) {
|
|
68
|
+
const mappedMatchers = _wrapMatchers(spreadMatchers, evaluator, originalArgs, originalExpect, []);
|
|
69
|
+
// .not etc.
|
|
70
|
+
const staticMatchers = Object.keys(spreadMatchers)
|
|
71
|
+
.filter((name) => typeof matchers[name] !== "function")
|
|
72
|
+
.map((name) => {
|
|
73
|
+
return {
|
|
74
|
+
[name]: Object.assign({}, ..._wrapMatchers(spreadMatchers, evaluator, originalArgs, originalExpect, staticPath.concat(name))),
|
|
75
|
+
};
|
|
76
|
+
});
|
|
77
|
+
return Object.assign({}, ...mappedMatchers, ...staticMatchers);
|
|
78
|
+
},
|
|
79
|
+
});
|
|
80
|
+
};
|
|
81
|
+
export function wrapExpect(originalExpect) {
|
|
82
|
+
// proxy the expect function
|
|
83
|
+
const expectProxy = Object.assign((...args) => addGradedBy(originalExpect(...args), args, originalExpect, []), // partially apply expect to get all matchers and chain them
|
|
84
|
+
originalExpect // clone additional properties on expect
|
|
85
|
+
);
|
|
86
|
+
return expectProxy;
|
|
87
|
+
}
|
|
88
|
+
globalThis.expect = wrapExpect(globalThis.expect);
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.gradedBy = void 0;
|
|
4
|
+
const traceable_js_1 = require("../../traceable.cjs");
|
|
5
|
+
const globals_js_1 = require("../globals.cjs");
|
|
6
|
+
async function gradedBy(actual, evaluator) {
|
|
7
|
+
const context = globals_js_1.jestAsyncLocalStorageInstance.getStore();
|
|
8
|
+
if (context === undefined || context.currentExample === undefined) {
|
|
9
|
+
throw new Error(`Could not identify current LangSmith context.\nPlease ensure you are calling this matcher within "ls.test()"`);
|
|
10
|
+
}
|
|
11
|
+
if ((0, globals_js_1.trackingEnabled)()) {
|
|
12
|
+
const runTree = (0, traceable_js_1.getCurrentRunTree)();
|
|
13
|
+
const wrappedEvaluator = (0, traceable_js_1.traceable)(evaluator, {
|
|
14
|
+
reference_example_id: context.currentExample.id,
|
|
15
|
+
metadata: {
|
|
16
|
+
example_version: context.currentExample.modified_at
|
|
17
|
+
? new Date(context.currentExample.modified_at).toISOString()
|
|
18
|
+
: new Date(context.currentExample.created_at ?? new Date()).toISOString(),
|
|
19
|
+
},
|
|
20
|
+
client: context.client,
|
|
21
|
+
tracingEnabled: true,
|
|
22
|
+
});
|
|
23
|
+
const evalResult = await wrappedEvaluator({
|
|
24
|
+
input: runTree.inputs,
|
|
25
|
+
expected: context.currentExample.outputs ?? {},
|
|
26
|
+
actual,
|
|
27
|
+
});
|
|
28
|
+
await context.client?.logEvaluationFeedback(evalResult, runTree);
|
|
29
|
+
return evalResult.score;
|
|
30
|
+
}
|
|
31
|
+
else {
|
|
32
|
+
const evalResult = await evaluator({
|
|
33
|
+
input: context.currentExample.inputs ?? {},
|
|
34
|
+
expected: context.currentExample.outputs ?? {},
|
|
35
|
+
actual,
|
|
36
|
+
});
|
|
37
|
+
return evalResult.score;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
exports.gradedBy = gradedBy;
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { EvaluationResult } from "../../evaluation/evaluator.js";
|
|
2
|
+
export type SimpleEvaluator = (params: {
|
|
3
|
+
input: Record<string, any>;
|
|
4
|
+
actual: Record<string, any>;
|
|
5
|
+
expected: Record<string, any>;
|
|
6
|
+
}) => EvaluationResult | Promise<EvaluationResult>;
|
|
7
|
+
export declare function gradedBy(actual: any, evaluator: SimpleEvaluator): Promise<import("../../schemas.js").ScoreType | undefined>;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { getCurrentRunTree, traceable } from "../../traceable.js";
|
|
2
|
+
import { jestAsyncLocalStorageInstance, trackingEnabled } from "../globals.js";
|
|
3
|
+
export async function gradedBy(actual, evaluator) {
|
|
4
|
+
const context = jestAsyncLocalStorageInstance.getStore();
|
|
5
|
+
if (context === undefined || context.currentExample === undefined) {
|
|
6
|
+
throw new Error(`Could not identify current LangSmith context.\nPlease ensure you are calling this matcher within "ls.test()"`);
|
|
7
|
+
}
|
|
8
|
+
if (trackingEnabled()) {
|
|
9
|
+
const runTree = getCurrentRunTree();
|
|
10
|
+
const wrappedEvaluator = traceable(evaluator, {
|
|
11
|
+
reference_example_id: context.currentExample.id,
|
|
12
|
+
metadata: {
|
|
13
|
+
example_version: context.currentExample.modified_at
|
|
14
|
+
? new Date(context.currentExample.modified_at).toISOString()
|
|
15
|
+
: new Date(context.currentExample.created_at ?? new Date()).toISOString(),
|
|
16
|
+
},
|
|
17
|
+
client: context.client,
|
|
18
|
+
tracingEnabled: true,
|
|
19
|
+
});
|
|
20
|
+
const evalResult = await wrappedEvaluator({
|
|
21
|
+
input: runTree.inputs,
|
|
22
|
+
expected: context.currentExample.outputs ?? {},
|
|
23
|
+
actual,
|
|
24
|
+
});
|
|
25
|
+
await context.client?.logEvaluationFeedback(evalResult, runTree);
|
|
26
|
+
return evalResult.score;
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
const evalResult = await evaluator({
|
|
30
|
+
input: context.currentExample.inputs ?? {},
|
|
31
|
+
expected: context.currentExample.outputs ?? {},
|
|
32
|
+
actual,
|
|
33
|
+
});
|
|
34
|
+
return evalResult.score;
|
|
35
|
+
}
|
|
36
|
+
}
|
package/dist/run_trees.d.ts
CHANGED
|
@@ -80,7 +80,7 @@ export declare class RunTree implements BaseRun {
|
|
|
80
80
|
attachments?: Attachments;
|
|
81
81
|
constructor(originalConfig: RunTreeConfig | RunTree);
|
|
82
82
|
private static getDefaultConfig;
|
|
83
|
-
|
|
83
|
+
static getSharedClient(): Client;
|
|
84
84
|
createChild(config: RunTreeConfig): RunTree;
|
|
85
85
|
end(outputs?: KVMap, error?: string, endTime?: number, metadata?: KVMap): Promise<void>;
|
|
86
86
|
private _convertToCreate;
|
|
@@ -38,7 +38,7 @@ const getCurrentRunTree = () => {
|
|
|
38
38
|
throw new Error([
|
|
39
39
|
"Could not get the current run tree.",
|
|
40
40
|
"",
|
|
41
|
-
"Please make sure you are calling this method within a traceable function
|
|
41
|
+
"Please make sure you are calling this method within a traceable function and that tracing is enabled.",
|
|
42
42
|
].join("\n"));
|
|
43
43
|
}
|
|
44
44
|
return runTree;
|
|
@@ -35,7 +35,7 @@ export const getCurrentRunTree = () => {
|
|
|
35
35
|
throw new Error([
|
|
36
36
|
"Could not get the current run tree.",
|
|
37
37
|
"",
|
|
38
|
-
"Please make sure you are calling this method within a traceable function
|
|
38
|
+
"Please make sure you are calling this method within a traceable function and that tracing is enabled.",
|
|
39
39
|
].join("\n"));
|
|
40
40
|
}
|
|
41
41
|
return runTree;
|
package/jest.cjs
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('./dist/jest/index.cjs');
|
package/jest.d.cts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './dist/jest/index.js'
|
package/jest.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './dist/jest/index.js'
|
package/jest.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './dist/jest/index.js'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langsmith",
|
|
3
|
-
"version": "0.2.15-
|
|
3
|
+
"version": "0.2.15-rc.0",
|
|
4
4
|
"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
|
|
5
5
|
"packageManager": "yarn@1.22.19",
|
|
6
6
|
"files": [
|
|
@@ -33,6 +33,10 @@
|
|
|
33
33
|
"langchain.js",
|
|
34
34
|
"langchain.d.ts",
|
|
35
35
|
"langchain.d.cts",
|
|
36
|
+
"jest.cjs",
|
|
37
|
+
"jest.js",
|
|
38
|
+
"jest.d.ts",
|
|
39
|
+
"jest.d.cts",
|
|
36
40
|
"vercel.cjs",
|
|
37
41
|
"vercel.js",
|
|
38
42
|
"vercel.d.ts",
|
|
@@ -228,6 +232,15 @@
|
|
|
228
232
|
"import": "./langchain.js",
|
|
229
233
|
"require": "./langchain.cjs"
|
|
230
234
|
},
|
|
235
|
+
"./jest": {
|
|
236
|
+
"types": {
|
|
237
|
+
"import": "./jest.d.ts",
|
|
238
|
+
"require": "./jest.d.cts",
|
|
239
|
+
"default": "./jest.d.ts"
|
|
240
|
+
},
|
|
241
|
+
"import": "./jest.js",
|
|
242
|
+
"require": "./jest.cjs"
|
|
243
|
+
},
|
|
231
244
|
"./vercel": {
|
|
232
245
|
"types": {
|
|
233
246
|
"import": "./vercel.d.ts",
|