langsmith 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/jest/reporter.cjs +12 -1
- package/dist/jest/reporter.js +12 -1
- package/dist/utils/jestlike/globals.d.ts +3 -3
- package/dist/utils/jestlike/index.cjs +3 -3
- package/dist/utils/jestlike/index.d.ts +2 -2
- package/dist/utils/jestlike/index.js +3 -3
- package/dist/utils/jestlike/reporter.cjs +12 -5
- package/dist/utils/jestlike/reporter.js +12 -5
- package/dist/utils/jestlike/types.d.ts +6 -0
- package/dist/utils/jestlike/vendor/evaluatedBy.d.ts +5 -5
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -8,4 +8,4 @@ Object.defineProperty(exports, "RunTree", { enumerable: true, get: function () {
|
|
|
8
8
|
var fetch_js_1 = require("./singletons/fetch.cjs");
|
|
9
9
|
Object.defineProperty(exports, "overrideFetchImplementation", { enumerable: true, get: function () { return fetch_js_1.overrideFetchImplementation; } });
|
|
10
10
|
// Update using yarn bump-version
|
|
11
|
-
exports.__version__ = "0.3.
|
|
11
|
+
exports.__version__ = "0.3.2";
|
package/dist/index.d.ts
CHANGED
|
@@ -2,4 +2,4 @@ export { Client, type ClientConfig, type LangSmithTracingClientInterface, } from
|
|
|
2
2
|
export type { Dataset, Example, TracerSession, Run, Feedback, RetrieverOutput, } from "./schemas.js";
|
|
3
3
|
export { RunTree, type RunTreeConfig } from "./run_trees.js";
|
|
4
4
|
export { overrideFetchImplementation } from "./singletons/fetch.js";
|
|
5
|
-
export declare const __version__ = "0.3.
|
|
5
|
+
export declare const __version__ = "0.3.2";
|
package/dist/index.js
CHANGED
package/dist/jest/reporter.cjs
CHANGED
|
@@ -5,8 +5,19 @@ const reporters_1 = require("@jest/reporters");
|
|
|
5
5
|
const reporter_js_1 = require("../utils/jestlike/reporter.cjs");
|
|
6
6
|
class LangSmithEvalReporter extends reporters_1.DefaultReporter {
|
|
7
7
|
async onTestResult(test, testResult, aggregatedResults) {
|
|
8
|
+
const groupedTestResults = testResult.testResults.reduce((groups, testResult) => {
|
|
9
|
+
const ancestorTitle = testResult.ancestorTitles.join(" > ");
|
|
10
|
+
if (groups[ancestorTitle] === undefined) {
|
|
11
|
+
groups[ancestorTitle] = [];
|
|
12
|
+
}
|
|
13
|
+
groups[ancestorTitle].push(testResult);
|
|
14
|
+
return groups;
|
|
15
|
+
}, {});
|
|
8
16
|
try {
|
|
9
|
-
|
|
17
|
+
for (const testGroupName of Object.keys(groupedTestResults)) {
|
|
18
|
+
const resultGroup = groupedTestResults[testGroupName];
|
|
19
|
+
await (0, reporter_js_1.printReporterTable)(resultGroup, testResult.failureMessage);
|
|
20
|
+
}
|
|
10
21
|
}
|
|
11
22
|
catch (e) {
|
|
12
23
|
console.log("Failed to display LangSmith eval results:", e.message);
|
package/dist/jest/reporter.js
CHANGED
|
@@ -3,8 +3,19 @@ import { DefaultReporter } from "@jest/reporters";
|
|
|
3
3
|
import { printReporterTable } from "../utils/jestlike/reporter.js";
|
|
4
4
|
class LangSmithEvalReporter extends DefaultReporter {
|
|
5
5
|
async onTestResult(test, testResult, aggregatedResults) {
|
|
6
|
+
const groupedTestResults = testResult.testResults.reduce((groups, testResult) => {
|
|
7
|
+
const ancestorTitle = testResult.ancestorTitles.join(" > ");
|
|
8
|
+
if (groups[ancestorTitle] === undefined) {
|
|
9
|
+
groups[ancestorTitle] = [];
|
|
10
|
+
}
|
|
11
|
+
groups[ancestorTitle].push(testResult);
|
|
12
|
+
return groups;
|
|
13
|
+
}, {});
|
|
6
14
|
try {
|
|
7
|
-
|
|
15
|
+
for (const testGroupName of Object.keys(groupedTestResults)) {
|
|
16
|
+
const resultGroup = groupedTestResults[testGroupName];
|
|
17
|
+
await printReporterTable(resultGroup, testResult.failureMessage);
|
|
18
|
+
}
|
|
8
19
|
}
|
|
9
20
|
catch (e) {
|
|
10
21
|
console.log("Failed to display LangSmith eval results:", e.message);
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
3
3
|
import { Dataset, TracerSession, Example } from "../../schemas.js";
|
|
4
4
|
import { Client, CreateProjectParams } from "../../client.js";
|
|
5
|
-
import { EvaluationResult } from "../../evaluation/evaluator.js";
|
|
6
5
|
import { RunTree } from "../../run_trees.js";
|
|
6
|
+
import { SimpleEvaluationResult } from "./types.js";
|
|
7
7
|
export declare const DEFAULT_TEST_CLIENT: Client;
|
|
8
8
|
export type TestWrapperAsyncLocalStorageData = {
|
|
9
9
|
enableTestTracking?: boolean;
|
|
@@ -12,7 +12,7 @@ export type TestWrapperAsyncLocalStorageData = {
|
|
|
12
12
|
projectConfig?: Partial<CreateProjectParams>;
|
|
13
13
|
project?: TracerSession;
|
|
14
14
|
setLoggedOutput?: (value: Record<string, unknown>) => void;
|
|
15
|
-
onFeedbackLogged?: (feedback:
|
|
15
|
+
onFeedbackLogged?: (feedback: SimpleEvaluationResult) => void;
|
|
16
16
|
currentExample?: Partial<Example> & {
|
|
17
17
|
syncPromise?: Promise<Example>;
|
|
18
18
|
};
|
|
@@ -26,7 +26,7 @@ export declare const evaluatorLogFeedbackPromises: Set<unknown>;
|
|
|
26
26
|
export declare const syncExamplePromises: Map<any, any>;
|
|
27
27
|
export declare function _logTestFeedback(params: {
|
|
28
28
|
exampleId?: string;
|
|
29
|
-
feedback:
|
|
29
|
+
feedback: SimpleEvaluationResult;
|
|
30
30
|
context: TestWrapperAsyncLocalStorageData;
|
|
31
31
|
runTree?: RunTree;
|
|
32
32
|
client: Client;
|
|
@@ -139,9 +139,9 @@ function generateWrapperFromJestlikeMethods(methods, testRunnerName) {
|
|
|
139
139
|
" Please try again.");
|
|
140
140
|
}
|
|
141
141
|
const datasetSetupInfo = new Map();
|
|
142
|
-
function getExampleId(
|
|
142
|
+
function getExampleId(datasetId, inputs, outputs) {
|
|
143
143
|
const identifier = JSON.stringify({
|
|
144
|
-
|
|
144
|
+
datasetId,
|
|
145
145
|
inputsHash: objectHash(inputs),
|
|
146
146
|
outputsHash: objectHash(outputs ?? {}),
|
|
147
147
|
});
|
|
@@ -432,7 +432,7 @@ function generateWrapperFromJestlikeMethods(methods, testRunnerName) {
|
|
|
432
432
|
.map((field) => `"${field}"`)
|
|
433
433
|
.join(", ")} while syncing to LangSmith. Please contact us for help.`);
|
|
434
434
|
}
|
|
435
|
-
exampleId = getExampleId(dataset.
|
|
435
|
+
exampleId = getExampleId(dataset.id, inputs, referenceOutputs);
|
|
436
436
|
// TODO: Create or update the example in the background
|
|
437
437
|
// Currently run end time has to be after example modified time
|
|
438
438
|
// for examples to render properly, so we must modify the example
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
/// <reference types="jest" />
|
|
2
2
|
import { KVMap } from "../../schemas.js";
|
|
3
3
|
import { toBeRelativeCloseTo, toBeAbsoluteCloseTo, toBeSemanticCloseTo } from "./matchers.js";
|
|
4
|
-
import {
|
|
4
|
+
import { SimpleEvaluationResult } from "./types.js";
|
|
5
5
|
import type { LangSmithJestlikeWrapperConfig, LangSmithJestlikeWrapperParams, LangSmithJestDescribeWrapper } from "./types.js";
|
|
6
6
|
export declare const STRIP_ANSI_REGEX: RegExp;
|
|
7
7
|
export declare const TEST_ID_DELIMITER = ", test_id=";
|
|
8
|
-
export declare function logFeedback(feedback:
|
|
8
|
+
export declare function logFeedback(feedback: SimpleEvaluationResult, config?: {
|
|
9
9
|
sourceRunId?: string;
|
|
10
10
|
}): void;
|
|
11
11
|
export declare function logOutputs(output: Record<string, unknown>): void;
|
|
@@ -108,9 +108,9 @@ export function generateWrapperFromJestlikeMethods(methods, testRunnerName) {
|
|
|
108
108
|
" Please try again.");
|
|
109
109
|
}
|
|
110
110
|
const datasetSetupInfo = new Map();
|
|
111
|
-
function getExampleId(
|
|
111
|
+
function getExampleId(datasetId, inputs, outputs) {
|
|
112
112
|
const identifier = JSON.stringify({
|
|
113
|
-
|
|
113
|
+
datasetId,
|
|
114
114
|
inputsHash: objectHash(inputs),
|
|
115
115
|
outputsHash: objectHash(outputs ?? {}),
|
|
116
116
|
});
|
|
@@ -401,7 +401,7 @@ export function generateWrapperFromJestlikeMethods(methods, testRunnerName) {
|
|
|
401
401
|
.map((field) => `"${field}"`)
|
|
402
402
|
.join(", ")} while syncing to LangSmith. Please contact us for help.`);
|
|
403
403
|
}
|
|
404
|
-
exampleId = getExampleId(dataset.
|
|
404
|
+
exampleId = getExampleId(dataset.id, inputs, referenceOutputs);
|
|
405
405
|
// TODO: Create or update the example in the background
|
|
406
406
|
// Currently run end time has to be after example modified time
|
|
407
407
|
// for examples to render properly, so we must modify the example
|
|
@@ -34,6 +34,7 @@ const path = __importStar(require("node:path"));
|
|
|
34
34
|
const fs = __importStar(require("node:fs/promises"));
|
|
35
35
|
const index_js_1 = require("./index.cjs");
|
|
36
36
|
const FEEDBACK_COLLAPSE_THRESHOLD = 48;
|
|
37
|
+
const MAX_TEST_PARAMS_LENGTH = 18;
|
|
37
38
|
const RESERVED_KEYS = [
|
|
38
39
|
"Name",
|
|
39
40
|
"Result",
|
|
@@ -86,7 +87,9 @@ function formatValue(value) {
|
|
|
86
87
|
.map(([k, v]) => {
|
|
87
88
|
const rawValue = typeof v === "string" ? v : JSON.stringify(v);
|
|
88
89
|
const rawEntry = `${k}: ${rawValue}`;
|
|
89
|
-
const entry = rawEntry.length >
|
|
90
|
+
const entry = rawEntry.length > MAX_TEST_PARAMS_LENGTH
|
|
91
|
+
? rawEntry.slice(0, MAX_TEST_PARAMS_LENGTH - 3) + "..."
|
|
92
|
+
: rawEntry;
|
|
90
93
|
return entry;
|
|
91
94
|
})
|
|
92
95
|
.join("\n");
|
|
@@ -230,9 +233,13 @@ async function printReporterTable(results, failureMessage) {
|
|
|
230
233
|
}
|
|
231
234
|
const defaultColumns = [
|
|
232
235
|
{ name: "Test", alignment: "left", maxLen: 36 },
|
|
233
|
-
{ name: "Inputs", alignment: "left", minLen:
|
|
234
|
-
{
|
|
235
|
-
|
|
236
|
+
{ name: "Inputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
|
|
237
|
+
{
|
|
238
|
+
name: "Reference Outputs",
|
|
239
|
+
alignment: "left",
|
|
240
|
+
minLen: MAX_TEST_PARAMS_LENGTH,
|
|
241
|
+
},
|
|
242
|
+
{ name: "Outputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
|
|
236
243
|
{ name: "Status", alignment: "left" },
|
|
237
244
|
];
|
|
238
245
|
if (collapseFeedbackColumn) {
|
|
@@ -245,7 +252,7 @@ async function printReporterTable(results, failureMessage) {
|
|
|
245
252
|
defaultColumns.push({
|
|
246
253
|
name: "Feedback",
|
|
247
254
|
alignment: "left",
|
|
248
|
-
minLen: feedbackColumnLength +
|
|
255
|
+
minLen: feedbackColumnLength + 8,
|
|
249
256
|
});
|
|
250
257
|
}
|
|
251
258
|
console.log();
|
|
@@ -5,6 +5,7 @@ import * as path from "node:path";
|
|
|
5
5
|
import * as fs from "node:fs/promises";
|
|
6
6
|
import { STRIP_ANSI_REGEX, TEST_ID_DELIMITER } from "./index.js";
|
|
7
7
|
const FEEDBACK_COLLAPSE_THRESHOLD = 48;
|
|
8
|
+
const MAX_TEST_PARAMS_LENGTH = 18;
|
|
8
9
|
const RESERVED_KEYS = [
|
|
9
10
|
"Name",
|
|
10
11
|
"Result",
|
|
@@ -57,7 +58,9 @@ function formatValue(value) {
|
|
|
57
58
|
.map(([k, v]) => {
|
|
58
59
|
const rawValue = typeof v === "string" ? v : JSON.stringify(v);
|
|
59
60
|
const rawEntry = `${k}: ${rawValue}`;
|
|
60
|
-
const entry = rawEntry.length >
|
|
61
|
+
const entry = rawEntry.length > MAX_TEST_PARAMS_LENGTH
|
|
62
|
+
? rawEntry.slice(0, MAX_TEST_PARAMS_LENGTH - 3) + "..."
|
|
63
|
+
: rawEntry;
|
|
61
64
|
return entry;
|
|
62
65
|
})
|
|
63
66
|
.join("\n");
|
|
@@ -201,9 +204,13 @@ export async function printReporterTable(results, failureMessage) {
|
|
|
201
204
|
}
|
|
202
205
|
const defaultColumns = [
|
|
203
206
|
{ name: "Test", alignment: "left", maxLen: 36 },
|
|
204
|
-
{ name: "Inputs", alignment: "left", minLen:
|
|
205
|
-
{
|
|
206
|
-
|
|
207
|
+
{ name: "Inputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
|
|
208
|
+
{
|
|
209
|
+
name: "Reference Outputs",
|
|
210
|
+
alignment: "left",
|
|
211
|
+
minLen: MAX_TEST_PARAMS_LENGTH,
|
|
212
|
+
},
|
|
213
|
+
{ name: "Outputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
|
|
207
214
|
{ name: "Status", alignment: "left" },
|
|
208
215
|
];
|
|
209
216
|
if (collapseFeedbackColumn) {
|
|
@@ -216,7 +223,7 @@ export async function printReporterTable(results, failureMessage) {
|
|
|
216
223
|
defaultColumns.push({
|
|
217
224
|
name: "Feedback",
|
|
218
225
|
alignment: "left",
|
|
219
|
-
minLen: feedbackColumnLength +
|
|
226
|
+
minLen: feedbackColumnLength + 8,
|
|
220
227
|
});
|
|
221
228
|
}
|
|
222
229
|
console.log();
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { EvaluationResult } from "../../evaluation/evaluator.js";
|
|
1
2
|
import type { RunTreeConfig } from "../../run_trees.js";
|
|
2
3
|
import type { SimpleEvaluator } from "./vendor/evaluatedBy.js";
|
|
3
4
|
export { type SimpleEvaluator };
|
|
@@ -11,3 +12,8 @@ export type LangSmithJestlikeWrapperParams<I, O> = {
|
|
|
11
12
|
config?: LangSmithJestlikeWrapperConfig;
|
|
12
13
|
};
|
|
13
14
|
export type LangSmithJestDescribeWrapper = (name: string, fn: () => void | Promise<void>, config?: Partial<RunTreeConfig>) => void;
|
|
15
|
+
export type SimpleEvaluationResult = {
|
|
16
|
+
key: EvaluationResult["key"];
|
|
17
|
+
score: NonNullable<EvaluationResult["score"]>;
|
|
18
|
+
comment?: EvaluationResult["comment"];
|
|
19
|
+
};
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { SimpleEvaluationResult } from "../types.js";
|
|
2
2
|
import { RunTreeConfig } from "../../../run_trees.js";
|
|
3
3
|
export type SimpleEvaluatorParams = {
|
|
4
4
|
inputs: Record<string, any>;
|
|
5
5
|
referenceOutputs: Record<string, any>;
|
|
6
6
|
outputs: Record<string, any>;
|
|
7
7
|
};
|
|
8
|
-
export type SimpleEvaluator = (params: SimpleEvaluatorParams) =>
|
|
9
|
-
export declare function wrapEvaluator<I
|
|
8
|
+
export type SimpleEvaluator = (params: SimpleEvaluatorParams) => SimpleEvaluationResult | Promise<SimpleEvaluationResult>;
|
|
9
|
+
export declare function wrapEvaluator<I>(evaluator: (input: I) => SimpleEvaluationResult | Promise<SimpleEvaluationResult>): (input: I, config?: Partial<RunTreeConfig> & {
|
|
10
10
|
runId?: string;
|
|
11
|
-
}) => Promise<
|
|
12
|
-
export declare function evaluatedBy(outputs: any, evaluator: SimpleEvaluator): Promise<import("../../../schemas.js").ScoreType | undefined
|
|
11
|
+
}) => Promise<SimpleEvaluationResult>;
|
|
12
|
+
export declare function evaluatedBy(outputs: any, evaluator: SimpleEvaluator): Promise<NonNullable<import("../../../schemas.js").ScoreType | undefined>>;
|