langchain 0.1.19-rc.0 → 0.1.19-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/document_loaders/web/couchbase.cjs +88 -0
- package/dist/document_loaders/web/couchbase.d.ts +30 -0
- package/dist/document_loaders/web/couchbase.js +84 -0
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/smith/config.cjs +72 -0
- package/dist/smith/config.d.ts +31 -30
- package/dist/smith/config.js +66 -1
- package/dist/smith/runner_utils.cjs +152 -21
- package/dist/smith/runner_utils.d.ts +30 -17
- package/dist/smith/runner_utils.js +154 -23
- package/document_loaders/web/couchbase.cjs +1 -0
- package/document_loaders/web/couchbase.d.cts +1 -0
- package/document_loaders/web/couchbase.d.ts +1 -0
- package/document_loaders/web/couchbase.js +1 -0
- package/package.json +22 -4
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CouchbaseDocumentLoader = void 0;
|
|
4
|
+
const documents_1 = require("@langchain/core/documents");
|
|
5
|
+
const base_js_1 = require("../base.cjs");
|
|
6
|
+
/**
|
|
7
|
+
* loader for couchbase document
|
|
8
|
+
*/
|
|
9
|
+
class CouchbaseDocumentLoader extends base_js_1.BaseDocumentLoader {
|
|
10
|
+
/**
|
|
11
|
+
* construct Couchbase document loader with a requirement for couchbase cluster client
|
|
12
|
+
* @param client { Cluster } [ couchbase connected client to connect to database ]
|
|
13
|
+
* @param query { string } [ query to get results from while loading the data ]
|
|
14
|
+
* @param pageContentFields { Array<string> } [ filters fields of the document and shows these only ]
|
|
15
|
+
* @param metadataFields { Array<string> } [ metadata fields required ]
|
|
16
|
+
*/
|
|
17
|
+
constructor(client, query, pageContentFields, metadataFields) {
|
|
18
|
+
super();
|
|
19
|
+
Object.defineProperty(this, "cluster", {
|
|
20
|
+
enumerable: true,
|
|
21
|
+
configurable: true,
|
|
22
|
+
writable: true,
|
|
23
|
+
value: void 0
|
|
24
|
+
});
|
|
25
|
+
Object.defineProperty(this, "query", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
configurable: true,
|
|
28
|
+
writable: true,
|
|
29
|
+
value: void 0
|
|
30
|
+
});
|
|
31
|
+
Object.defineProperty(this, "pageContentFields", {
|
|
32
|
+
enumerable: true,
|
|
33
|
+
configurable: true,
|
|
34
|
+
writable: true,
|
|
35
|
+
value: void 0
|
|
36
|
+
});
|
|
37
|
+
Object.defineProperty(this, "metadataFields", {
|
|
38
|
+
enumerable: true,
|
|
39
|
+
configurable: true,
|
|
40
|
+
writable: true,
|
|
41
|
+
value: void 0
|
|
42
|
+
});
|
|
43
|
+
if (!client) {
|
|
44
|
+
throw new Error("Couchbase client cluster must be provided.");
|
|
45
|
+
}
|
|
46
|
+
this.cluster = client;
|
|
47
|
+
this.query = query;
|
|
48
|
+
this.pageContentFields = pageContentFields;
|
|
49
|
+
this.metadataFields = metadataFields;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Function to load document based on query from couchbase
|
|
53
|
+
* @returns {Promise<Document[]>} [ Returns a promise of all the documents as array ]
|
|
54
|
+
*/
|
|
55
|
+
async load() {
|
|
56
|
+
const documents = [];
|
|
57
|
+
for await (const doc of this.lazyLoad()) {
|
|
58
|
+
documents.push(doc);
|
|
59
|
+
}
|
|
60
|
+
return documents;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Function to load documents based on iterator rather than full load
|
|
64
|
+
* @returns {AsyncIterable<Document>} [ Returns an iterator to fetch documents ]
|
|
65
|
+
*/
|
|
66
|
+
async *lazyLoad() {
|
|
67
|
+
// Run SQL++ Query
|
|
68
|
+
const result = await this.cluster.query(this.query);
|
|
69
|
+
for await (const row of result.rows) {
|
|
70
|
+
let { metadataFields, pageContentFields } = this;
|
|
71
|
+
if (!pageContentFields) {
|
|
72
|
+
pageContentFields = Object.keys(row);
|
|
73
|
+
}
|
|
74
|
+
if (!metadataFields) {
|
|
75
|
+
metadataFields = [];
|
|
76
|
+
}
|
|
77
|
+
const metadata = metadataFields.reduce((obj, field) => ({ ...obj, [field]: row[field] }), {});
|
|
78
|
+
const document = pageContentFields
|
|
79
|
+
.map((k) => `${k}: ${JSON.stringify(row[k])}`)
|
|
80
|
+
.join("\n");
|
|
81
|
+
yield new documents_1.Document({
|
|
82
|
+
pageContent: document,
|
|
83
|
+
metadata,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
exports.CouchbaseDocumentLoader = CouchbaseDocumentLoader;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { Cluster } from "couchbase";
|
|
2
|
+
import { Document } from "@langchain/core/documents";
|
|
3
|
+
import { BaseDocumentLoader, DocumentLoader } from "../base.js";
|
|
4
|
+
/**
|
|
5
|
+
* loader for couchbase document
|
|
6
|
+
*/
|
|
7
|
+
export declare class CouchbaseDocumentLoader extends BaseDocumentLoader implements DocumentLoader {
|
|
8
|
+
private cluster;
|
|
9
|
+
private query;
|
|
10
|
+
private pageContentFields?;
|
|
11
|
+
private metadataFields?;
|
|
12
|
+
/**
|
|
13
|
+
* construct Couchbase document loader with a requirement for couchbase cluster client
|
|
14
|
+
* @param client { Cluster } [ couchbase connected client to connect to database ]
|
|
15
|
+
* @param query { string } [ query to get results from while loading the data ]
|
|
16
|
+
* @param pageContentFields { Array<string> } [ filters fields of the document and shows these only ]
|
|
17
|
+
* @param metadataFields { Array<string> } [ metadata fields required ]
|
|
18
|
+
*/
|
|
19
|
+
constructor(client: Cluster, query: string, pageContentFields?: string[], metadataFields?: string[]);
|
|
20
|
+
/**
|
|
21
|
+
* Function to load document based on query from couchbase
|
|
22
|
+
* @returns {Promise<Document[]>} [ Returns a promise of all the documents as array ]
|
|
23
|
+
*/
|
|
24
|
+
load(): Promise<Document[]>;
|
|
25
|
+
/**
|
|
26
|
+
* Function to load documents based on iterator rather than full load
|
|
27
|
+
* @returns {AsyncIterable<Document>} [ Returns an iterator to fetch documents ]
|
|
28
|
+
*/
|
|
29
|
+
lazyLoad(): AsyncIterable<Document>;
|
|
30
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { Document } from "@langchain/core/documents";
|
|
2
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
3
|
+
/**
|
|
4
|
+
* loader for couchbase document
|
|
5
|
+
*/
|
|
6
|
+
export class CouchbaseDocumentLoader extends BaseDocumentLoader {
|
|
7
|
+
/**
|
|
8
|
+
* construct Couchbase document loader with a requirement for couchbase cluster client
|
|
9
|
+
* @param client { Cluster } [ couchbase connected client to connect to database ]
|
|
10
|
+
* @param query { string } [ query to get results from while loading the data ]
|
|
11
|
+
* @param pageContentFields { Array<string> } [ filters fields of the document and shows these only ]
|
|
12
|
+
* @param metadataFields { Array<string> } [ metadata fields required ]
|
|
13
|
+
*/
|
|
14
|
+
constructor(client, query, pageContentFields, metadataFields) {
|
|
15
|
+
super();
|
|
16
|
+
Object.defineProperty(this, "cluster", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: void 0
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "query", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: void 0
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "pageContentFields", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: void 0
|
|
33
|
+
});
|
|
34
|
+
Object.defineProperty(this, "metadataFields", {
|
|
35
|
+
enumerable: true,
|
|
36
|
+
configurable: true,
|
|
37
|
+
writable: true,
|
|
38
|
+
value: void 0
|
|
39
|
+
});
|
|
40
|
+
if (!client) {
|
|
41
|
+
throw new Error("Couchbase client cluster must be provided.");
|
|
42
|
+
}
|
|
43
|
+
this.cluster = client;
|
|
44
|
+
this.query = query;
|
|
45
|
+
this.pageContentFields = pageContentFields;
|
|
46
|
+
this.metadataFields = metadataFields;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Function to load document based on query from couchbase
|
|
50
|
+
* @returns {Promise<Document[]>} [ Returns a promise of all the documents as array ]
|
|
51
|
+
*/
|
|
52
|
+
async load() {
|
|
53
|
+
const documents = [];
|
|
54
|
+
for await (const doc of this.lazyLoad()) {
|
|
55
|
+
documents.push(doc);
|
|
56
|
+
}
|
|
57
|
+
return documents;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Function to load documents based on iterator rather than full load
|
|
61
|
+
* @returns {AsyncIterable<Document>} [ Returns an iterator to fetch documents ]
|
|
62
|
+
*/
|
|
63
|
+
async *lazyLoad() {
|
|
64
|
+
// Run SQL++ Query
|
|
65
|
+
const result = await this.cluster.query(this.query);
|
|
66
|
+
for await (const row of result.rows) {
|
|
67
|
+
let { metadataFields, pageContentFields } = this;
|
|
68
|
+
if (!pageContentFields) {
|
|
69
|
+
pageContentFields = Object.keys(row);
|
|
70
|
+
}
|
|
71
|
+
if (!metadataFields) {
|
|
72
|
+
metadataFields = [];
|
|
73
|
+
}
|
|
74
|
+
const metadata = metadataFields.reduce((obj, field) => ({ ...obj, [field]: row[field] }), {});
|
|
75
|
+
const document = pageContentFields
|
|
76
|
+
.map((k) => `${k}: ${JSON.stringify(row[k])}`)
|
|
77
|
+
.join("\n");
|
|
78
|
+
yield new Document({
|
|
79
|
+
pageContent: document,
|
|
80
|
+
metadata,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
@@ -102,6 +102,7 @@ exports.optionalImportEntrypoints = [
|
|
|
102
102
|
"langchain/document_loaders/web/sitemap",
|
|
103
103
|
"langchain/document_loaders/web/sonix_audio",
|
|
104
104
|
"langchain/document_loaders/web/confluence",
|
|
105
|
+
"langchain/document_loaders/web/couchbase",
|
|
105
106
|
"langchain/document_loaders/web/youtube",
|
|
106
107
|
"langchain/document_loaders/fs/directory",
|
|
107
108
|
"langchain/document_loaders/fs/buffer",
|
|
@@ -99,6 +99,7 @@ export const optionalImportEntrypoints = [
|
|
|
99
99
|
"langchain/document_loaders/web/sitemap",
|
|
100
100
|
"langchain/document_loaders/web/sonix_audio",
|
|
101
101
|
"langchain/document_loaders/web/confluence",
|
|
102
|
+
"langchain/document_loaders/web/couchbase",
|
|
102
103
|
"langchain/document_loaders/web/youtube",
|
|
103
104
|
"langchain/document_loaders/fs/directory",
|
|
104
105
|
"langchain/document_loaders/fs/buffer",
|
package/dist/smith/config.cjs
CHANGED
|
@@ -1,2 +1,74 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.EmbeddingDistance = exports.LabeledCriteria = exports.Criteria = exports.isCustomEvaluator = exports.isOffTheShelfEvaluator = void 0;
|
|
4
|
+
function isOffTheShelfEvaluator(evaluator) {
|
|
5
|
+
return typeof evaluator === "string" || "evaluatorType" in evaluator;
|
|
6
|
+
}
|
|
7
|
+
exports.isOffTheShelfEvaluator = isOffTheShelfEvaluator;
|
|
8
|
+
function isCustomEvaluator(evaluator) {
|
|
9
|
+
return !isOffTheShelfEvaluator(evaluator);
|
|
10
|
+
}
|
|
11
|
+
exports.isCustomEvaluator = isCustomEvaluator;
|
|
12
|
+
const isStringifiableValue = (value) => typeof value === "string" ||
|
|
13
|
+
typeof value === "number" ||
|
|
14
|
+
typeof value === "boolean" ||
|
|
15
|
+
typeof value === "bigint";
|
|
16
|
+
const getSingleStringifiedValue = (value) => {
|
|
17
|
+
if (isStringifiableValue(value)) {
|
|
18
|
+
return `${value}`;
|
|
19
|
+
}
|
|
20
|
+
if (typeof value === "object" && value != null && !Array.isArray(value)) {
|
|
21
|
+
const entries = Object.entries(value);
|
|
22
|
+
if (entries.length === 1 && isStringifiableValue(entries[0][1])) {
|
|
23
|
+
return `${entries[0][1]}`;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
console.warn("Non-stringifiable value found when coercing", value);
|
|
27
|
+
return `${value}`;
|
|
28
|
+
};
|
|
29
|
+
function Criteria(criteria, config) {
|
|
30
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
31
|
+
((payload) => ({
|
|
32
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
33
|
+
input: getSingleStringifiedValue(payload.rawInput),
|
|
34
|
+
}));
|
|
35
|
+
return {
|
|
36
|
+
evaluatorType: "criteria",
|
|
37
|
+
criteria,
|
|
38
|
+
feedbackKey: config?.feedbackKey ?? criteria,
|
|
39
|
+
llm: config?.llm,
|
|
40
|
+
formatEvaluatorInputs,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
exports.Criteria = Criteria;
|
|
44
|
+
function LabeledCriteria(criteria, config) {
|
|
45
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
46
|
+
((payload) => ({
|
|
47
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
48
|
+
input: getSingleStringifiedValue(payload.rawInput),
|
|
49
|
+
reference: getSingleStringifiedValue(payload.rawReferenceOutput),
|
|
50
|
+
}));
|
|
51
|
+
return {
|
|
52
|
+
evaluatorType: "labeled_criteria",
|
|
53
|
+
criteria,
|
|
54
|
+
feedbackKey: config?.feedbackKey ?? criteria,
|
|
55
|
+
llm: config?.llm,
|
|
56
|
+
formatEvaluatorInputs,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
exports.LabeledCriteria = LabeledCriteria;
|
|
60
|
+
function EmbeddingDistance(distanceMetric, config) {
|
|
61
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
62
|
+
((payload) => ({
|
|
63
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
64
|
+
reference: getSingleStringifiedValue(payload.rawReferenceOutput),
|
|
65
|
+
}));
|
|
66
|
+
return {
|
|
67
|
+
evaluatorType: "embedding_distance",
|
|
68
|
+
embedding: config?.embedding,
|
|
69
|
+
distanceMetric,
|
|
70
|
+
feedbackKey: config?.feedbackKey ?? "embedding_distance",
|
|
71
|
+
formatEvaluatorInputs,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
exports.EmbeddingDistance = EmbeddingDistance;
|
package/dist/smith/config.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { BaseLanguageModel } from "@langchain/core/language_models/base";
|
|
|
2
2
|
import { RunnableConfig } from "@langchain/core/runnables";
|
|
3
3
|
import { Example, Run } from "langsmith";
|
|
4
4
|
import { EvaluationResult, RunEvaluator } from "langsmith/evaluation";
|
|
5
|
-
import { Criteria } from "../evaluation/index.js";
|
|
5
|
+
import { Criteria as CriteriaType, type EmbeddingDistanceEvalChainInput } from "../evaluation/index.js";
|
|
6
6
|
import { LoadEvaluatorOptions } from "../evaluation/loader.js";
|
|
7
7
|
import { EvaluatorType } from "../evaluation/types.js";
|
|
8
8
|
export type EvaluatorInputs = {
|
|
@@ -33,6 +33,9 @@ export type RunEvaluatorLike = ((props: DynamicRunEvaluatorParams, options?: {
|
|
|
33
33
|
}) => Promise<EvaluationResult>) | ((props: DynamicRunEvaluatorParams, options?: {
|
|
34
34
|
config?: RunnableConfig;
|
|
35
35
|
}) => EvaluationResult);
|
|
36
|
+
export declare function isOffTheShelfEvaluator<T extends keyof EvaluatorType, U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike>(evaluator: T | EvalConfig | U): evaluator is T | EvalConfig;
|
|
37
|
+
export declare function isCustomEvaluator<T extends keyof EvaluatorType, U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike>(evaluator: T | EvalConfig | U): evaluator is U;
|
|
38
|
+
export type RunEvalType<T extends keyof EvaluatorType = "criteria" | "labeled_criteria" | "embedding_distance", U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike> = T | EvalConfig | U;
|
|
36
39
|
/**
|
|
37
40
|
* Configuration class for running evaluations on datasets.
|
|
38
41
|
*
|
|
@@ -42,20 +45,13 @@ export type RunEvaluatorLike = ((props: DynamicRunEvaluatorParams, options?: {
|
|
|
42
45
|
* @typeparam T - The type of evaluators.
|
|
43
46
|
* @typeparam U - The type of custom evaluators.
|
|
44
47
|
*/
|
|
45
|
-
export type RunEvalConfig<T extends keyof EvaluatorType =
|
|
48
|
+
export type RunEvalConfig<T extends keyof EvaluatorType = "criteria" | "labeled_criteria" | "embedding_distance", U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike> = {
|
|
46
49
|
/**
|
|
47
|
-
*
|
|
48
|
-
* Each evaluator is provided with a run trace containing the model
|
|
49
|
-
* outputs, as well as an "example" object representing a record
|
|
50
|
-
* in the dataset.
|
|
51
|
-
*/
|
|
52
|
-
customEvaluators?: U[];
|
|
53
|
-
/**
|
|
54
|
-
* LangChain evaluators to apply to a dataset run.
|
|
50
|
+
* Evaluators to apply to a dataset run.
|
|
55
51
|
* You can optionally specify these by name, or by
|
|
56
52
|
* configuring them with an EvalConfig object.
|
|
57
53
|
*/
|
|
58
|
-
evaluators?:
|
|
54
|
+
evaluators?: RunEvalType<T, U>[];
|
|
59
55
|
/**
|
|
60
56
|
* Convert the evaluation data into formats that can be used by the evaluator.
|
|
61
57
|
* This should most commonly be a string.
|
|
@@ -81,9 +77,14 @@ export type RunEvalConfig<T extends keyof EvaluatorType = keyof EvaluatorType, U
|
|
|
81
77
|
*/
|
|
82
78
|
formatEvaluatorInputs?: EvaluatorInputFormatter;
|
|
83
79
|
/**
|
|
84
|
-
*
|
|
80
|
+
* Custom evaluators to apply to a dataset run.
|
|
81
|
+
* Each evaluator is provided with a run trace containing the model
|
|
82
|
+
* outputs, as well as an "example" object representing a record
|
|
83
|
+
* in the dataset.
|
|
84
|
+
*
|
|
85
|
+
* @deprecated Use `evaluators` instead.
|
|
85
86
|
*/
|
|
86
|
-
|
|
87
|
+
customEvaluators?: U[];
|
|
87
88
|
};
|
|
88
89
|
export interface EvalConfig extends LoadEvaluatorOptions {
|
|
89
90
|
/**
|
|
@@ -147,7 +148,7 @@ export interface EvalConfig extends LoadEvaluatorOptions {
|
|
|
147
148
|
* }]
|
|
148
149
|
* };
|
|
149
150
|
*/
|
|
150
|
-
export type
|
|
151
|
+
export type Criteria = EvalConfig & {
|
|
151
152
|
evaluatorType: "criteria";
|
|
152
153
|
/**
|
|
153
154
|
* The "criteria" to insert into the prompt template
|
|
@@ -155,18 +156,14 @@ export type CriteriaEvalChainConfig = EvalConfig & {
|
|
|
155
156
|
* https://smith.langchain.com/hub/langchain-ai/criteria-evaluator
|
|
156
157
|
* for more information.
|
|
157
158
|
*/
|
|
158
|
-
criteria?:
|
|
159
|
+
criteria?: CriteriaType | Record<string, string>;
|
|
159
160
|
/**
|
|
160
|
-
* The
|
|
161
|
-
* evaluation results. If none provided, we default to
|
|
162
|
-
* the evaluationName.
|
|
163
|
-
*/
|
|
164
|
-
feedbackKey?: string;
|
|
165
|
-
/**
|
|
166
|
-
* The language model to use as the evaluator.
|
|
161
|
+
* The language model to use as the evaluator, defaults to GPT-4
|
|
167
162
|
*/
|
|
168
163
|
llm?: BaseLanguageModel;
|
|
169
164
|
};
|
|
165
|
+
export type CriteriaEvalChainConfig = Criteria;
|
|
166
|
+
export declare function Criteria(criteria: CriteriaType, config?: Pick<Partial<LabeledCriteria>, "formatEvaluatorInputs" | "llm" | "feedbackKey">): EvalConfig;
|
|
170
167
|
/**
|
|
171
168
|
* Configuration to load a "LabeledCriteriaEvalChain" evaluator,
|
|
172
169
|
* which prompts an LLM to determine whether the model's
|
|
@@ -202,15 +199,19 @@ export type LabeledCriteria = EvalConfig & {
|
|
|
202
199
|
* https://smith.langchain.com/hub/langchain-ai/labeled-criteria
|
|
203
200
|
* for more information.
|
|
204
201
|
*/
|
|
205
|
-
criteria?:
|
|
202
|
+
criteria?: CriteriaType | Record<string, string>;
|
|
206
203
|
/**
|
|
207
|
-
* The
|
|
208
|
-
* evaluation results. If none provided, we default to
|
|
209
|
-
* the evaluationName.
|
|
210
|
-
*/
|
|
211
|
-
feedbackKey?: string;
|
|
212
|
-
/**
|
|
213
|
-
* The language model to use as the evaluator.
|
|
204
|
+
* The language model to use as the evaluator, defaults to GPT-4
|
|
214
205
|
*/
|
|
215
206
|
llm?: BaseLanguageModel;
|
|
216
207
|
};
|
|
208
|
+
export declare function LabeledCriteria(criteria: CriteriaType, config?: Pick<Partial<LabeledCriteria>, "formatEvaluatorInputs" | "llm" | "feedbackKey">): LabeledCriteria;
|
|
209
|
+
/**
|
|
210
|
+
* Configuration to load a "EmbeddingDistanceEvalChain" evaluator,
|
|
211
|
+
* which embeds distances to score semantic difference between
|
|
212
|
+
* a prediction and reference.
|
|
213
|
+
*/
|
|
214
|
+
export type EmbeddingDistance = EvalConfig & EmbeddingDistanceEvalChainInput & {
|
|
215
|
+
evaluatorType: "embedding_distance";
|
|
216
|
+
};
|
|
217
|
+
export declare function EmbeddingDistance(distanceMetric: EmbeddingDistanceEvalChainInput["distanceMetric"], config?: Pick<Partial<LabeledCriteria>, "formatEvaluatorInputs" | "embedding" | "feedbackKey">): EmbeddingDistance;
|
package/dist/smith/config.js
CHANGED
|
@@ -1 +1,66 @@
|
|
|
1
|
-
export {
|
|
1
|
+
export function isOffTheShelfEvaluator(evaluator) {
|
|
2
|
+
return typeof evaluator === "string" || "evaluatorType" in evaluator;
|
|
3
|
+
}
|
|
4
|
+
export function isCustomEvaluator(evaluator) {
|
|
5
|
+
return !isOffTheShelfEvaluator(evaluator);
|
|
6
|
+
}
|
|
7
|
+
const isStringifiableValue = (value) => typeof value === "string" ||
|
|
8
|
+
typeof value === "number" ||
|
|
9
|
+
typeof value === "boolean" ||
|
|
10
|
+
typeof value === "bigint";
|
|
11
|
+
const getSingleStringifiedValue = (value) => {
|
|
12
|
+
if (isStringifiableValue(value)) {
|
|
13
|
+
return `${value}`;
|
|
14
|
+
}
|
|
15
|
+
if (typeof value === "object" && value != null && !Array.isArray(value)) {
|
|
16
|
+
const entries = Object.entries(value);
|
|
17
|
+
if (entries.length === 1 && isStringifiableValue(entries[0][1])) {
|
|
18
|
+
return `${entries[0][1]}`;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
console.warn("Non-stringifiable value found when coercing", value);
|
|
22
|
+
return `${value}`;
|
|
23
|
+
};
|
|
24
|
+
export function Criteria(criteria, config) {
|
|
25
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
26
|
+
((payload) => ({
|
|
27
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
28
|
+
input: getSingleStringifiedValue(payload.rawInput),
|
|
29
|
+
}));
|
|
30
|
+
return {
|
|
31
|
+
evaluatorType: "criteria",
|
|
32
|
+
criteria,
|
|
33
|
+
feedbackKey: config?.feedbackKey ?? criteria,
|
|
34
|
+
llm: config?.llm,
|
|
35
|
+
formatEvaluatorInputs,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
export function LabeledCriteria(criteria, config) {
|
|
39
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
40
|
+
((payload) => ({
|
|
41
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
42
|
+
input: getSingleStringifiedValue(payload.rawInput),
|
|
43
|
+
reference: getSingleStringifiedValue(payload.rawReferenceOutput),
|
|
44
|
+
}));
|
|
45
|
+
return {
|
|
46
|
+
evaluatorType: "labeled_criteria",
|
|
47
|
+
criteria,
|
|
48
|
+
feedbackKey: config?.feedbackKey ?? criteria,
|
|
49
|
+
llm: config?.llm,
|
|
50
|
+
formatEvaluatorInputs,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
export function EmbeddingDistance(distanceMetric, config) {
|
|
54
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
55
|
+
((payload) => ({
|
|
56
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
57
|
+
reference: getSingleStringifiedValue(payload.rawReferenceOutput),
|
|
58
|
+
}));
|
|
59
|
+
return {
|
|
60
|
+
evaluatorType: "embedding_distance",
|
|
61
|
+
embedding: config?.embedding,
|
|
62
|
+
distanceMetric,
|
|
63
|
+
feedbackKey: config?.feedbackKey ?? "embedding_distance",
|
|
64
|
+
formatEvaluatorInputs,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
@@ -7,6 +7,7 @@ const tracer_langchain_1 = require("@langchain/core/tracers/tracer_langchain");
|
|
|
7
7
|
const base_1 = require("@langchain/core/tracers/base");
|
|
8
8
|
const langsmith_1 = require("langsmith");
|
|
9
9
|
const loader_js_1 = require("../evaluation/loader.cjs");
|
|
10
|
+
const config_js_1 = require("./config.cjs");
|
|
10
11
|
const name_generation_js_1 = require("./name_generation.cjs");
|
|
11
12
|
const progress_js_1 = require("./progress.cjs");
|
|
12
13
|
class SingleRunIdExtractor {
|
|
@@ -114,6 +115,124 @@ class DynamicRunEvaluator {
|
|
|
114
115
|
function isLLMStringEvaluator(evaluator) {
|
|
115
116
|
return evaluator && typeof evaluator.evaluateStrings === "function";
|
|
116
117
|
}
|
|
118
|
+
/**
|
|
119
|
+
* Internal implementation of RunTree, which uses the
|
|
120
|
+
* provided callback manager instead of the internal LangSmith client.
|
|
121
|
+
*
|
|
122
|
+
* The goal of this class is to ensure seamless interop when intergrated
|
|
123
|
+
* with other Runnables.
|
|
124
|
+
*/
|
|
125
|
+
class CallbackManagerRunTree extends langsmith_1.RunTree {
|
|
126
|
+
constructor(config, callbackManager) {
|
|
127
|
+
super(config);
|
|
128
|
+
Object.defineProperty(this, "callbackManager", {
|
|
129
|
+
enumerable: true,
|
|
130
|
+
configurable: true,
|
|
131
|
+
writable: true,
|
|
132
|
+
value: void 0
|
|
133
|
+
});
|
|
134
|
+
Object.defineProperty(this, "activeCallbackManager", {
|
|
135
|
+
enumerable: true,
|
|
136
|
+
configurable: true,
|
|
137
|
+
writable: true,
|
|
138
|
+
value: undefined
|
|
139
|
+
});
|
|
140
|
+
this.callbackManager = callbackManager;
|
|
141
|
+
}
|
|
142
|
+
async createChild(config) {
|
|
143
|
+
const child = new CallbackManagerRunTree({
|
|
144
|
+
...config,
|
|
145
|
+
parent_run: this,
|
|
146
|
+
project_name: this.project_name,
|
|
147
|
+
client: this.client,
|
|
148
|
+
}, this.activeCallbackManager?.getChild() ?? this.callbackManager);
|
|
149
|
+
this.child_runs.push(child);
|
|
150
|
+
return child;
|
|
151
|
+
}
|
|
152
|
+
async postRun() {
|
|
153
|
+
// how it is translated in comparison to basic RunTree?
|
|
154
|
+
this.activeCallbackManager = await this.callbackManager.handleChainStart(typeof this.serialized !== "object" &&
|
|
155
|
+
this.serialized != null &&
|
|
156
|
+
"lc" in this.serialized
|
|
157
|
+
? this.serialized
|
|
158
|
+
: {
|
|
159
|
+
id: ["langchain", "smith", "CallbackManagerRunTree"],
|
|
160
|
+
lc: 1,
|
|
161
|
+
type: "not_implemented",
|
|
162
|
+
}, this.inputs, this.id, this.run_type, undefined, undefined, this.name);
|
|
163
|
+
}
|
|
164
|
+
async patchRun() {
|
|
165
|
+
if (this.error) {
|
|
166
|
+
await this.activeCallbackManager?.handleChainError(this.error, this.id, this.parent_run?.id, undefined, undefined);
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
await this.activeCallbackManager?.handleChainEnd(this.outputs ?? {}, this.id, this.parent_run?.id, undefined, undefined);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
class RunnableTraceable extends runnables_1.Runnable {
|
|
174
|
+
constructor(fields) {
|
|
175
|
+
super(fields);
|
|
176
|
+
Object.defineProperty(this, "lc_serializable", {
|
|
177
|
+
enumerable: true,
|
|
178
|
+
configurable: true,
|
|
179
|
+
writable: true,
|
|
180
|
+
value: false
|
|
181
|
+
});
|
|
182
|
+
Object.defineProperty(this, "lc_namespace", {
|
|
183
|
+
enumerable: true,
|
|
184
|
+
configurable: true,
|
|
185
|
+
writable: true,
|
|
186
|
+
value: ["langchain_core", "runnables"]
|
|
187
|
+
});
|
|
188
|
+
Object.defineProperty(this, "func", {
|
|
189
|
+
enumerable: true,
|
|
190
|
+
configurable: true,
|
|
191
|
+
writable: true,
|
|
192
|
+
value: void 0
|
|
193
|
+
});
|
|
194
|
+
if (!isLangsmithTraceableFunction(fields.func)) {
|
|
195
|
+
throw new Error("RunnableTraceable requires a function that is wrapped in traceable higher-order function");
|
|
196
|
+
}
|
|
197
|
+
this.func = fields.func;
|
|
198
|
+
}
|
|
199
|
+
async invoke(input, options) {
|
|
200
|
+
const [config] = this._getOptionsList(options ?? {}, 1);
|
|
201
|
+
const callbackManager = await (0, runnables_1.getCallbackManagerForConfig)(config);
|
|
202
|
+
const partialConfig = "langsmith:traceable" in this.func
|
|
203
|
+
? this.func["langsmith:traceable"]
|
|
204
|
+
: { name: "<lambda>" };
|
|
205
|
+
if (!callbackManager)
|
|
206
|
+
throw new Error("CallbackManager not found");
|
|
207
|
+
const runTree = new CallbackManagerRunTree({
|
|
208
|
+
...partialConfig,
|
|
209
|
+
parent_run: callbackManager?._parentRunId
|
|
210
|
+
? new langsmith_1.RunTree({ name: "<parent>", id: callbackManager?._parentRunId })
|
|
211
|
+
: undefined,
|
|
212
|
+
}, callbackManager);
|
|
213
|
+
if (typeof input === "object" &&
|
|
214
|
+
input != null &&
|
|
215
|
+
Object.keys(input).length === 1) {
|
|
216
|
+
if ("args" in input && Array.isArray(input)) {
|
|
217
|
+
return (await this.func(runTree, ...input));
|
|
218
|
+
}
|
|
219
|
+
if ("input" in input &&
|
|
220
|
+
!(typeof input === "object" &&
|
|
221
|
+
input != null &&
|
|
222
|
+
!Array.isArray(input) &&
|
|
223
|
+
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
224
|
+
!(input instanceof Date))) {
|
|
225
|
+
try {
|
|
226
|
+
return (await this.func(runTree, input.input));
|
|
227
|
+
}
|
|
228
|
+
catch (err) {
|
|
229
|
+
return (await this.func(runTree, input));
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
return (await this.func(runTree, input));
|
|
234
|
+
}
|
|
235
|
+
}
|
|
117
236
|
/**
|
|
118
237
|
* Wraps an off-the-shelf evaluator (loaded using loadEvaluator; of EvaluatorType[T])
|
|
119
238
|
* and composes with a prepareData function so the user can prepare the trace and
|
|
@@ -155,14 +274,14 @@ class PreparedRunEvaluator {
|
|
|
155
274
|
const evalConfig = typeof config === "string" ? {} : config;
|
|
156
275
|
const evaluator = await (0, loader_js_1.loadEvaluator)(evaluatorType, evalConfig);
|
|
157
276
|
const feedbackKey = evalConfig?.feedbackKey ?? evaluator?.evaluationName;
|
|
158
|
-
if (!feedbackKey) {
|
|
159
|
-
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
|
|
160
|
-
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
|
|
161
|
-
}
|
|
162
277
|
if (!isLLMStringEvaluator(evaluator)) {
|
|
163
278
|
throw new Error(`Evaluator of type ${evaluatorType} not yet supported. ` +
|
|
164
279
|
"Please use a string evaluator, or implement your " +
|
|
165
|
-
"evaluation logic as a
|
|
280
|
+
"evaluation logic as a custom evaluator.");
|
|
281
|
+
}
|
|
282
|
+
if (!feedbackKey) {
|
|
283
|
+
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
|
|
284
|
+
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
|
|
166
285
|
}
|
|
167
286
|
return new PreparedRunEvaluator(evaluator, feedbackKey, evalConfig?.formatEvaluatorInputs);
|
|
168
287
|
}
|
|
@@ -199,7 +318,7 @@ class PreparedRunEvaluator {
|
|
|
199
318
|
}
|
|
200
319
|
throw new Error("Evaluator not yet supported. " +
|
|
201
320
|
"Please use a string evaluator, or implement your " +
|
|
202
|
-
"evaluation logic as a
|
|
321
|
+
"evaluation logic as a custom evaluator.");
|
|
203
322
|
}
|
|
204
323
|
}
|
|
205
324
|
class LoadedEvalConfig {
|
|
@@ -213,7 +332,7 @@ class LoadedEvalConfig {
|
|
|
213
332
|
}
|
|
214
333
|
static async fromRunEvalConfig(config) {
|
|
215
334
|
// Custom evaluators are applied "as-is"
|
|
216
|
-
const customEvaluators = config?.customEvaluators?.map((evaluator) => {
|
|
335
|
+
const customEvaluators = (config?.customEvaluators ?? config.evaluators?.filter(config_js_1.isCustomEvaluator))?.map((evaluator) => {
|
|
217
336
|
if (typeof evaluator === "function") {
|
|
218
337
|
return new DynamicRunEvaluator(evaluator);
|
|
219
338
|
}
|
|
@@ -221,7 +340,9 @@ class LoadedEvalConfig {
|
|
|
221
340
|
return evaluator;
|
|
222
341
|
}
|
|
223
342
|
});
|
|
224
|
-
const offTheShelfEvaluators = await Promise.all(config?.evaluators
|
|
343
|
+
const offTheShelfEvaluators = await Promise.all(config?.evaluators
|
|
344
|
+
?.filter(config_js_1.isOffTheShelfEvaluator)
|
|
345
|
+
?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
|
|
225
346
|
return new LoadedEvalConfig((customEvaluators ?? []).concat(offTheShelfEvaluators ?? []));
|
|
226
347
|
}
|
|
227
348
|
}
|
|
@@ -238,6 +359,10 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
238
359
|
return () => modelOrFactory;
|
|
239
360
|
}
|
|
240
361
|
if (typeof modelOrFactory === "function") {
|
|
362
|
+
if (isLangsmithTraceableFunction(modelOrFactory)) {
|
|
363
|
+
const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
|
|
364
|
+
return () => wrappedModel;
|
|
365
|
+
}
|
|
241
366
|
try {
|
|
242
367
|
// If it works with no arguments, assume it's a factory
|
|
243
368
|
let res = modelOrFactory();
|
|
@@ -249,7 +374,7 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
249
374
|
}
|
|
250
375
|
catch (err) {
|
|
251
376
|
// Otherwise, it's a custom UDF, and we'll wrap
|
|
252
|
-
// in a lambda
|
|
377
|
+
// the function in a lambda
|
|
253
378
|
const wrappedModel = new runnables_1.RunnableLambda({ func: modelOrFactory });
|
|
254
379
|
return () => wrappedModel;
|
|
255
380
|
}
|
|
@@ -337,11 +462,11 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
337
462
|
* for evaluation.
|
|
338
463
|
*
|
|
339
464
|
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
340
|
-
* - `
|
|
341
|
-
*
|
|
465
|
+
* - `evaluators` (RunEvalType[]): Evaluators to apply to a dataset run.
|
|
466
|
+
* - `formatEvaluatorInputs` (EvaluatorInputFormatter): Convert the evaluation data into formats that can be used by the evaluator.
|
|
342
467
|
* - `projectName` (string): Name of the project for logging and tracking.
|
|
343
468
|
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
344
|
-
* - `client` (Client): Client instance for
|
|
469
|
+
* - `client` (Client): Client instance for LangSmith service interaction.
|
|
345
470
|
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
|
|
346
471
|
*
|
|
347
472
|
* @returns A promise that resolves to an `EvalResults` object. This object includes
|
|
@@ -356,13 +481,8 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
356
481
|
* const datasetName = 'example-dataset';
|
|
357
482
|
* const client = new Client(/* ...config... *\//);
|
|
358
483
|
*
|
|
359
|
-
* const evaluationConfig = {
|
|
360
|
-
* evaluators: [/* ...evaluators... *\//],
|
|
361
|
-
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
362
|
-
* };
|
|
363
|
-
*
|
|
364
484
|
* const results = await runOnDataset(chain, datasetName, {
|
|
365
|
-
*
|
|
485
|
+
* evaluators: [/* ...evaluators... *\//],
|
|
366
486
|
* client,
|
|
367
487
|
* });
|
|
368
488
|
*
|
|
@@ -372,11 +492,19 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
372
492
|
* evaluateModel();
|
|
373
493
|
* ```
|
|
374
494
|
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
|
|
375
|
-
* a dataset named 'example-dataset'. The evaluation process is configured using `
|
|
495
|
+
* a dataset named 'example-dataset'. The evaluation process is configured using `RunOnDatasetParams["evaluators"]`, which can
|
|
376
496
|
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
377
497
|
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
378
498
|
*/
|
|
379
|
-
|
|
499
|
+
async function runOnDataset(chainOrFactory, datasetName, options) {
|
|
500
|
+
const { projectName, projectMetadata, client, maxConcurrency, } = options ?? {};
|
|
501
|
+
const evaluationConfig = options?.evaluationConfig ??
|
|
502
|
+
(options?.evaluators != null
|
|
503
|
+
? {
|
|
504
|
+
evaluators: options.evaluators,
|
|
505
|
+
formatEvaluatorInputs: options.formatEvaluatorInputs,
|
|
506
|
+
}
|
|
507
|
+
: undefined);
|
|
380
508
|
const wrappedModel = await createWrappedModel(chainOrFactory);
|
|
381
509
|
const testClient = client ?? new langsmith_1.Client();
|
|
382
510
|
const testProjectName = projectName ?? (0, name_generation_js_1.randomName)();
|
|
@@ -432,5 +560,8 @@ const runOnDataset = async (chainOrFactory, datasetName, { evaluationConfig, pro
|
|
|
432
560
|
results: evalResults ?? {},
|
|
433
561
|
};
|
|
434
562
|
return results;
|
|
435
|
-
}
|
|
563
|
+
}
|
|
436
564
|
exports.runOnDataset = runOnDataset;
|
|
565
|
+
function isLangsmithTraceableFunction(x) {
|
|
566
|
+
return typeof x === "function" && "langsmith:traceable" in x;
|
|
567
|
+
}
|
|
@@ -1,14 +1,31 @@
|
|
|
1
1
|
import { Runnable } from "@langchain/core/runnables";
|
|
2
2
|
import { Client, Feedback } from "langsmith";
|
|
3
|
-
import type {
|
|
4
|
-
|
|
5
|
-
export type
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
import type { TraceableFunction } from "langsmith/traceable";
|
|
4
|
+
import { type RunEvalConfig } from "./config.js";
|
|
5
|
+
export type ChainOrFactory = Runnable | (() => Runnable) | AnyTraceableFunction | ((obj: any) => any) | ((obj: any) => Promise<any>) | (() => (obj: unknown) => unknown) | (() => (obj: unknown) => Promise<unknown>);
|
|
6
|
+
type AnyTraceableFunction = TraceableFunction<(...any: any[]) => any>;
|
|
7
|
+
export interface RunOnDatasetParams extends Omit<RunEvalConfig, "customEvaluators"> {
|
|
8
|
+
/**
|
|
9
|
+
* Name of the project for logging and tracking.
|
|
10
|
+
*/
|
|
8
11
|
projectName?: string;
|
|
12
|
+
/**
|
|
13
|
+
* Additional metadata for the project.
|
|
14
|
+
*/
|
|
15
|
+
projectMetadata?: Record<string, unknown>;
|
|
16
|
+
/**
|
|
17
|
+
* Client instance for LangSmith service interaction.
|
|
18
|
+
*/
|
|
9
19
|
client?: Client;
|
|
20
|
+
/**
|
|
21
|
+
* Maximum concurrency level for dataset processing.
|
|
22
|
+
*/
|
|
10
23
|
maxConcurrency?: number;
|
|
11
|
-
|
|
24
|
+
/**
|
|
25
|
+
* @deprecated Pass keys directly to the RunOnDatasetParams instead
|
|
26
|
+
*/
|
|
27
|
+
evaluationConfig?: RunEvalConfig;
|
|
28
|
+
}
|
|
12
29
|
export type EvalResults = {
|
|
13
30
|
projectName: string;
|
|
14
31
|
results: {
|
|
@@ -35,11 +52,11 @@ export type EvalResults = {
|
|
|
35
52
|
* for evaluation.
|
|
36
53
|
*
|
|
37
54
|
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
38
|
-
* - `
|
|
39
|
-
*
|
|
55
|
+
* - `evaluators` (RunEvalType[]): Evaluators to apply to a dataset run.
|
|
56
|
+
* - `formatEvaluatorInputs` (EvaluatorInputFormatter): Convert the evaluation data into formats that can be used by the evaluator.
|
|
40
57
|
* - `projectName` (string): Name of the project for logging and tracking.
|
|
41
58
|
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
42
|
-
* - `client` (Client): Client instance for
|
|
59
|
+
* - `client` (Client): Client instance for LangSmith service interaction.
|
|
43
60
|
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
|
|
44
61
|
*
|
|
45
62
|
* @returns A promise that resolves to an `EvalResults` object. This object includes
|
|
@@ -54,13 +71,8 @@ export type EvalResults = {
|
|
|
54
71
|
* const datasetName = 'example-dataset';
|
|
55
72
|
* const client = new Client(/* ...config... *\//);
|
|
56
73
|
*
|
|
57
|
-
* const evaluationConfig = {
|
|
58
|
-
* evaluators: [/* ...evaluators... *\//],
|
|
59
|
-
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
60
|
-
* };
|
|
61
|
-
*
|
|
62
74
|
* const results = await runOnDataset(chain, datasetName, {
|
|
63
|
-
*
|
|
75
|
+
* evaluators: [/* ...evaluators... *\//],
|
|
64
76
|
* client,
|
|
65
77
|
* });
|
|
66
78
|
*
|
|
@@ -70,8 +82,9 @@ export type EvalResults = {
|
|
|
70
82
|
* evaluateModel();
|
|
71
83
|
* ```
|
|
72
84
|
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
|
|
73
|
-
* a dataset named 'example-dataset'. The evaluation process is configured using `
|
|
85
|
+
* a dataset named 'example-dataset'. The evaluation process is configured using `RunOnDatasetParams["evaluators"]`, which can
|
|
74
86
|
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
75
87
|
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
76
88
|
*/
|
|
77
|
-
export declare
|
|
89
|
+
export declare function runOnDataset(chainOrFactory: ChainOrFactory, datasetName: string, options?: RunOnDatasetParams): Promise<EvalResults>;
|
|
90
|
+
export {};
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { mapStoredMessagesToChatMessages } from "@langchain/core/messages";
|
|
2
|
-
import { Runnable, RunnableLambda, } from "@langchain/core/runnables";
|
|
2
|
+
import { Runnable, RunnableLambda, getCallbackManagerForConfig, } from "@langchain/core/runnables";
|
|
3
3
|
import { LangChainTracer } from "@langchain/core/tracers/tracer_langchain";
|
|
4
4
|
import { BaseTracer } from "@langchain/core/tracers/base";
|
|
5
|
-
import { Client } from "langsmith";
|
|
5
|
+
import { Client, RunTree, } from "langsmith";
|
|
6
6
|
import { loadEvaluator } from "../evaluation/loader.js";
|
|
7
|
+
import { isOffTheShelfEvaluator, isCustomEvaluator, } from "./config.js";
|
|
7
8
|
import { randomName } from "./name_generation.js";
|
|
8
9
|
import { ProgressBar } from "./progress.js";
|
|
9
10
|
class SingleRunIdExtractor {
|
|
@@ -111,6 +112,124 @@ class DynamicRunEvaluator {
|
|
|
111
112
|
function isLLMStringEvaluator(evaluator) {
|
|
112
113
|
return evaluator && typeof evaluator.evaluateStrings === "function";
|
|
113
114
|
}
|
|
115
|
+
/**
|
|
116
|
+
* Internal implementation of RunTree, which uses the
|
|
117
|
+
* provided callback manager instead of the internal LangSmith client.
|
|
118
|
+
*
|
|
119
|
+
* The goal of this class is to ensure seamless interop when intergrated
|
|
120
|
+
* with other Runnables.
|
|
121
|
+
*/
|
|
122
|
+
class CallbackManagerRunTree extends RunTree {
|
|
123
|
+
constructor(config, callbackManager) {
|
|
124
|
+
super(config);
|
|
125
|
+
Object.defineProperty(this, "callbackManager", {
|
|
126
|
+
enumerable: true,
|
|
127
|
+
configurable: true,
|
|
128
|
+
writable: true,
|
|
129
|
+
value: void 0
|
|
130
|
+
});
|
|
131
|
+
Object.defineProperty(this, "activeCallbackManager", {
|
|
132
|
+
enumerable: true,
|
|
133
|
+
configurable: true,
|
|
134
|
+
writable: true,
|
|
135
|
+
value: undefined
|
|
136
|
+
});
|
|
137
|
+
this.callbackManager = callbackManager;
|
|
138
|
+
}
|
|
139
|
+
async createChild(config) {
|
|
140
|
+
const child = new CallbackManagerRunTree({
|
|
141
|
+
...config,
|
|
142
|
+
parent_run: this,
|
|
143
|
+
project_name: this.project_name,
|
|
144
|
+
client: this.client,
|
|
145
|
+
}, this.activeCallbackManager?.getChild() ?? this.callbackManager);
|
|
146
|
+
this.child_runs.push(child);
|
|
147
|
+
return child;
|
|
148
|
+
}
|
|
149
|
+
async postRun() {
|
|
150
|
+
// how it is translated in comparison to basic RunTree?
|
|
151
|
+
this.activeCallbackManager = await this.callbackManager.handleChainStart(typeof this.serialized !== "object" &&
|
|
152
|
+
this.serialized != null &&
|
|
153
|
+
"lc" in this.serialized
|
|
154
|
+
? this.serialized
|
|
155
|
+
: {
|
|
156
|
+
id: ["langchain", "smith", "CallbackManagerRunTree"],
|
|
157
|
+
lc: 1,
|
|
158
|
+
type: "not_implemented",
|
|
159
|
+
}, this.inputs, this.id, this.run_type, undefined, undefined, this.name);
|
|
160
|
+
}
|
|
161
|
+
async patchRun() {
|
|
162
|
+
if (this.error) {
|
|
163
|
+
await this.activeCallbackManager?.handleChainError(this.error, this.id, this.parent_run?.id, undefined, undefined);
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
await this.activeCallbackManager?.handleChainEnd(this.outputs ?? {}, this.id, this.parent_run?.id, undefined, undefined);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
class RunnableTraceable extends Runnable {
|
|
171
|
+
constructor(fields) {
|
|
172
|
+
super(fields);
|
|
173
|
+
Object.defineProperty(this, "lc_serializable", {
|
|
174
|
+
enumerable: true,
|
|
175
|
+
configurable: true,
|
|
176
|
+
writable: true,
|
|
177
|
+
value: false
|
|
178
|
+
});
|
|
179
|
+
Object.defineProperty(this, "lc_namespace", {
|
|
180
|
+
enumerable: true,
|
|
181
|
+
configurable: true,
|
|
182
|
+
writable: true,
|
|
183
|
+
value: ["langchain_core", "runnables"]
|
|
184
|
+
});
|
|
185
|
+
Object.defineProperty(this, "func", {
|
|
186
|
+
enumerable: true,
|
|
187
|
+
configurable: true,
|
|
188
|
+
writable: true,
|
|
189
|
+
value: void 0
|
|
190
|
+
});
|
|
191
|
+
if (!isLangsmithTraceableFunction(fields.func)) {
|
|
192
|
+
throw new Error("RunnableTraceable requires a function that is wrapped in traceable higher-order function");
|
|
193
|
+
}
|
|
194
|
+
this.func = fields.func;
|
|
195
|
+
}
|
|
196
|
+
async invoke(input, options) {
|
|
197
|
+
const [config] = this._getOptionsList(options ?? {}, 1);
|
|
198
|
+
const callbackManager = await getCallbackManagerForConfig(config);
|
|
199
|
+
const partialConfig = "langsmith:traceable" in this.func
|
|
200
|
+
? this.func["langsmith:traceable"]
|
|
201
|
+
: { name: "<lambda>" };
|
|
202
|
+
if (!callbackManager)
|
|
203
|
+
throw new Error("CallbackManager not found");
|
|
204
|
+
const runTree = new CallbackManagerRunTree({
|
|
205
|
+
...partialConfig,
|
|
206
|
+
parent_run: callbackManager?._parentRunId
|
|
207
|
+
? new RunTree({ name: "<parent>", id: callbackManager?._parentRunId })
|
|
208
|
+
: undefined,
|
|
209
|
+
}, callbackManager);
|
|
210
|
+
if (typeof input === "object" &&
|
|
211
|
+
input != null &&
|
|
212
|
+
Object.keys(input).length === 1) {
|
|
213
|
+
if ("args" in input && Array.isArray(input)) {
|
|
214
|
+
return (await this.func(runTree, ...input));
|
|
215
|
+
}
|
|
216
|
+
if ("input" in input &&
|
|
217
|
+
!(typeof input === "object" &&
|
|
218
|
+
input != null &&
|
|
219
|
+
!Array.isArray(input) &&
|
|
220
|
+
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
221
|
+
!(input instanceof Date))) {
|
|
222
|
+
try {
|
|
223
|
+
return (await this.func(runTree, input.input));
|
|
224
|
+
}
|
|
225
|
+
catch (err) {
|
|
226
|
+
return (await this.func(runTree, input));
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return (await this.func(runTree, input));
|
|
231
|
+
}
|
|
232
|
+
}
|
|
114
233
|
/**
|
|
115
234
|
* Wraps an off-the-shelf evaluator (loaded using loadEvaluator; of EvaluatorType[T])
|
|
116
235
|
* and composes with a prepareData function so the user can prepare the trace and
|
|
@@ -152,14 +271,14 @@ class PreparedRunEvaluator {
|
|
|
152
271
|
const evalConfig = typeof config === "string" ? {} : config;
|
|
153
272
|
const evaluator = await loadEvaluator(evaluatorType, evalConfig);
|
|
154
273
|
const feedbackKey = evalConfig?.feedbackKey ?? evaluator?.evaluationName;
|
|
155
|
-
if (!feedbackKey) {
|
|
156
|
-
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
|
|
157
|
-
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
|
|
158
|
-
}
|
|
159
274
|
if (!isLLMStringEvaluator(evaluator)) {
|
|
160
275
|
throw new Error(`Evaluator of type ${evaluatorType} not yet supported. ` +
|
|
161
276
|
"Please use a string evaluator, or implement your " +
|
|
162
|
-
"evaluation logic as a
|
|
277
|
+
"evaluation logic as a custom evaluator.");
|
|
278
|
+
}
|
|
279
|
+
if (!feedbackKey) {
|
|
280
|
+
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
|
|
281
|
+
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
|
|
163
282
|
}
|
|
164
283
|
return new PreparedRunEvaluator(evaluator, feedbackKey, evalConfig?.formatEvaluatorInputs);
|
|
165
284
|
}
|
|
@@ -196,7 +315,7 @@ class PreparedRunEvaluator {
|
|
|
196
315
|
}
|
|
197
316
|
throw new Error("Evaluator not yet supported. " +
|
|
198
317
|
"Please use a string evaluator, or implement your " +
|
|
199
|
-
"evaluation logic as a
|
|
318
|
+
"evaluation logic as a custom evaluator.");
|
|
200
319
|
}
|
|
201
320
|
}
|
|
202
321
|
class LoadedEvalConfig {
|
|
@@ -210,7 +329,7 @@ class LoadedEvalConfig {
|
|
|
210
329
|
}
|
|
211
330
|
static async fromRunEvalConfig(config) {
|
|
212
331
|
// Custom evaluators are applied "as-is"
|
|
213
|
-
const customEvaluators = config?.customEvaluators?.map((evaluator) => {
|
|
332
|
+
const customEvaluators = (config?.customEvaluators ?? config.evaluators?.filter(isCustomEvaluator))?.map((evaluator) => {
|
|
214
333
|
if (typeof evaluator === "function") {
|
|
215
334
|
return new DynamicRunEvaluator(evaluator);
|
|
216
335
|
}
|
|
@@ -218,7 +337,9 @@ class LoadedEvalConfig {
|
|
|
218
337
|
return evaluator;
|
|
219
338
|
}
|
|
220
339
|
});
|
|
221
|
-
const offTheShelfEvaluators = await Promise.all(config?.evaluators
|
|
340
|
+
const offTheShelfEvaluators = await Promise.all(config?.evaluators
|
|
341
|
+
?.filter(isOffTheShelfEvaluator)
|
|
342
|
+
?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
|
|
222
343
|
return new LoadedEvalConfig((customEvaluators ?? []).concat(offTheShelfEvaluators ?? []));
|
|
223
344
|
}
|
|
224
345
|
}
|
|
@@ -235,6 +356,10 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
235
356
|
return () => modelOrFactory;
|
|
236
357
|
}
|
|
237
358
|
if (typeof modelOrFactory === "function") {
|
|
359
|
+
if (isLangsmithTraceableFunction(modelOrFactory)) {
|
|
360
|
+
const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
|
|
361
|
+
return () => wrappedModel;
|
|
362
|
+
}
|
|
238
363
|
try {
|
|
239
364
|
// If it works with no arguments, assume it's a factory
|
|
240
365
|
let res = modelOrFactory();
|
|
@@ -246,7 +371,7 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
246
371
|
}
|
|
247
372
|
catch (err) {
|
|
248
373
|
// Otherwise, it's a custom UDF, and we'll wrap
|
|
249
|
-
// in a lambda
|
|
374
|
+
// the function in a lambda
|
|
250
375
|
const wrappedModel = new RunnableLambda({ func: modelOrFactory });
|
|
251
376
|
return () => wrappedModel;
|
|
252
377
|
}
|
|
@@ -334,11 +459,11 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
334
459
|
* for evaluation.
|
|
335
460
|
*
|
|
336
461
|
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
337
|
-
* - `
|
|
338
|
-
*
|
|
462
|
+
* - `evaluators` (RunEvalType[]): Evaluators to apply to a dataset run.
|
|
463
|
+
* - `formatEvaluatorInputs` (EvaluatorInputFormatter): Convert the evaluation data into formats that can be used by the evaluator.
|
|
339
464
|
* - `projectName` (string): Name of the project for logging and tracking.
|
|
340
465
|
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
341
|
-
* - `client` (Client): Client instance for
|
|
466
|
+
* - `client` (Client): Client instance for LangSmith service interaction.
|
|
342
467
|
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
|
|
343
468
|
*
|
|
344
469
|
* @returns A promise that resolves to an `EvalResults` object. This object includes
|
|
@@ -353,13 +478,8 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
353
478
|
* const datasetName = 'example-dataset';
|
|
354
479
|
* const client = new Client(/* ...config... *\//);
|
|
355
480
|
*
|
|
356
|
-
* const evaluationConfig = {
|
|
357
|
-
* evaluators: [/* ...evaluators... *\//],
|
|
358
|
-
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
359
|
-
* };
|
|
360
|
-
*
|
|
361
481
|
* const results = await runOnDataset(chain, datasetName, {
|
|
362
|
-
*
|
|
482
|
+
* evaluators: [/* ...evaluators... *\//],
|
|
363
483
|
* client,
|
|
364
484
|
* });
|
|
365
485
|
*
|
|
@@ -369,11 +489,19 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
369
489
|
* evaluateModel();
|
|
370
490
|
* ```
|
|
371
491
|
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
|
|
372
|
-
* a dataset named 'example-dataset'. The evaluation process is configured using `
|
|
492
|
+
* a dataset named 'example-dataset'. The evaluation process is configured using `RunOnDatasetParams["evaluators"]`, which can
|
|
373
493
|
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
374
494
|
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
375
495
|
*/
|
|
376
|
-
export
|
|
496
|
+
export async function runOnDataset(chainOrFactory, datasetName, options) {
|
|
497
|
+
const { projectName, projectMetadata, client, maxConcurrency, } = options ?? {};
|
|
498
|
+
const evaluationConfig = options?.evaluationConfig ??
|
|
499
|
+
(options?.evaluators != null
|
|
500
|
+
? {
|
|
501
|
+
evaluators: options.evaluators,
|
|
502
|
+
formatEvaluatorInputs: options.formatEvaluatorInputs,
|
|
503
|
+
}
|
|
504
|
+
: undefined);
|
|
377
505
|
const wrappedModel = await createWrappedModel(chainOrFactory);
|
|
378
506
|
const testClient = client ?? new Client();
|
|
379
507
|
const testProjectName = projectName ?? randomName();
|
|
@@ -429,4 +557,7 @@ export const runOnDataset = async (chainOrFactory, datasetName, { evaluationConf
|
|
|
429
557
|
results: evalResults ?? {},
|
|
430
558
|
};
|
|
431
559
|
return results;
|
|
432
|
-
}
|
|
560
|
+
}
|
|
561
|
+
function isLangsmithTraceableFunction(x) {
|
|
562
|
+
return typeof x === "function" && "langsmith:traceable" in x;
|
|
563
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('../../dist/document_loaders/web/couchbase.cjs');
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/couchbase.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/couchbase.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/couchbase.js'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.1.19-rc.
|
|
3
|
+
"version": "0.1.19-rc.2",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -614,6 +614,10 @@
|
|
|
614
614
|
"document_loaders/web/confluence.js",
|
|
615
615
|
"document_loaders/web/confluence.d.ts",
|
|
616
616
|
"document_loaders/web/confluence.d.cts",
|
|
617
|
+
"document_loaders/web/couchbase.cjs",
|
|
618
|
+
"document_loaders/web/couchbase.js",
|
|
619
|
+
"document_loaders/web/couchbase.d.ts",
|
|
620
|
+
"document_loaders/web/couchbase.d.cts",
|
|
617
621
|
"document_loaders/web/searchapi.cjs",
|
|
618
622
|
"document_loaders/web/searchapi.js",
|
|
619
623
|
"document_loaders/web/searchapi.d.ts",
|
|
@@ -1247,6 +1251,7 @@
|
|
|
1247
1251
|
"cheerio": "^1.0.0-rc.12",
|
|
1248
1252
|
"chromadb": "^1.5.3",
|
|
1249
1253
|
"convex": "^1.3.1",
|
|
1254
|
+
"couchbase": "^4.2.10",
|
|
1250
1255
|
"d3-dsv": "^2.0.0",
|
|
1251
1256
|
"dotenv": "^16.0.3",
|
|
1252
1257
|
"dpdm": "^3.12.0",
|
|
@@ -1317,6 +1322,7 @@
|
|
|
1317
1322
|
"cheerio": "^1.0.0-rc.12",
|
|
1318
1323
|
"chromadb": "*",
|
|
1319
1324
|
"convex": "^1.3.1",
|
|
1325
|
+
"couchbase": "^4.2.10",
|
|
1320
1326
|
"d3-dsv": "^2.0.0",
|
|
1321
1327
|
"epub2": "^3.0.1",
|
|
1322
1328
|
"fast-xml-parser": "^4.2.7",
|
|
@@ -1411,6 +1417,9 @@
|
|
|
1411
1417
|
"convex": {
|
|
1412
1418
|
"optional": true
|
|
1413
1419
|
},
|
|
1420
|
+
"couchbase": {
|
|
1421
|
+
"optional": true
|
|
1422
|
+
},
|
|
1414
1423
|
"d3-dsv": {
|
|
1415
1424
|
"optional": true
|
|
1416
1425
|
},
|
|
@@ -1504,8 +1513,8 @@
|
|
|
1504
1513
|
},
|
|
1505
1514
|
"dependencies": {
|
|
1506
1515
|
"@anthropic-ai/sdk": "^0.9.1",
|
|
1507
|
-
"@langchain/community": "~0.0.
|
|
1508
|
-
"@langchain/core": "~0.1.
|
|
1516
|
+
"@langchain/community": "~0.0.29",
|
|
1517
|
+
"@langchain/core": "~0.1.29",
|
|
1509
1518
|
"@langchain/openai": "~0.0.14",
|
|
1510
1519
|
"binary-extensions": "^2.2.0",
|
|
1511
1520
|
"expr-eval": "^2.0.2",
|
|
@@ -1513,7 +1522,7 @@
|
|
|
1513
1522
|
"js-yaml": "^4.1.0",
|
|
1514
1523
|
"jsonpointer": "^5.0.1",
|
|
1515
1524
|
"langchainhub": "~0.0.8",
|
|
1516
|
-
"langsmith": "~0.
|
|
1525
|
+
"langsmith": "~0.1.1",
|
|
1517
1526
|
"ml-distance": "^4.0.0",
|
|
1518
1527
|
"openapi-types": "^12.1.3",
|
|
1519
1528
|
"p-retry": "4",
|
|
@@ -2899,6 +2908,15 @@
|
|
|
2899
2908
|
"import": "./document_loaders/web/confluence.js",
|
|
2900
2909
|
"require": "./document_loaders/web/confluence.cjs"
|
|
2901
2910
|
},
|
|
2911
|
+
"./document_loaders/web/couchbase": {
|
|
2912
|
+
"types": {
|
|
2913
|
+
"import": "./document_loaders/web/couchbase.d.ts",
|
|
2914
|
+
"require": "./document_loaders/web/couchbase.d.cts",
|
|
2915
|
+
"default": "./document_loaders/web/couchbase.d.ts"
|
|
2916
|
+
},
|
|
2917
|
+
"import": "./document_loaders/web/couchbase.js",
|
|
2918
|
+
"require": "./document_loaders/web/couchbase.cjs"
|
|
2919
|
+
},
|
|
2902
2920
|
"./document_loaders/web/searchapi": {
|
|
2903
2921
|
"types": {
|
|
2904
2922
|
"import": "./document_loaders/web/searchapi.d.ts",
|