langchain 0.1.19-rc.1 → 0.1.19-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/document_loaders/web/couchbase.cjs +88 -0
- package/dist/document_loaders/web/couchbase.d.ts +30 -0
- package/dist/document_loaders/web/couchbase.js +84 -0
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/smith/config.cjs +18 -1
- package/dist/smith/config.d.ts +26 -55
- package/dist/smith/config.js +16 -0
- package/dist/smith/runner_utils.cjs +128 -16
- package/dist/smith/runner_utils.d.ts +25 -16
- package/dist/smith/runner_utils.js +128 -16
- package/document_loaders/web/couchbase.cjs +1 -0
- package/document_loaders/web/couchbase.d.cts +1 -0
- package/document_loaders/web/couchbase.d.ts +1 -0
- package/document_loaders/web/couchbase.js +1 -0
- package/package.json +21 -3
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CouchbaseDocumentLoader = void 0;
|
|
4
|
+
const documents_1 = require("@langchain/core/documents");
|
|
5
|
+
const base_js_1 = require("../base.cjs");
|
|
6
|
+
/**
|
|
7
|
+
* loader for couchbase document
|
|
8
|
+
*/
|
|
9
|
+
class CouchbaseDocumentLoader extends base_js_1.BaseDocumentLoader {
|
|
10
|
+
/**
|
|
11
|
+
* construct Couchbase document loader with a requirement for couchbase cluster client
|
|
12
|
+
* @param client { Cluster } [ couchbase connected client to connect to database ]
|
|
13
|
+
* @param query { string } [ query to get results from while loading the data ]
|
|
14
|
+
* @param pageContentFields { Array<string> } [ filters fields of the document and shows these only ]
|
|
15
|
+
* @param metadataFields { Array<string> } [ metadata fields required ]
|
|
16
|
+
*/
|
|
17
|
+
constructor(client, query, pageContentFields, metadataFields) {
|
|
18
|
+
super();
|
|
19
|
+
Object.defineProperty(this, "cluster", {
|
|
20
|
+
enumerable: true,
|
|
21
|
+
configurable: true,
|
|
22
|
+
writable: true,
|
|
23
|
+
value: void 0
|
|
24
|
+
});
|
|
25
|
+
Object.defineProperty(this, "query", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
configurable: true,
|
|
28
|
+
writable: true,
|
|
29
|
+
value: void 0
|
|
30
|
+
});
|
|
31
|
+
Object.defineProperty(this, "pageContentFields", {
|
|
32
|
+
enumerable: true,
|
|
33
|
+
configurable: true,
|
|
34
|
+
writable: true,
|
|
35
|
+
value: void 0
|
|
36
|
+
});
|
|
37
|
+
Object.defineProperty(this, "metadataFields", {
|
|
38
|
+
enumerable: true,
|
|
39
|
+
configurable: true,
|
|
40
|
+
writable: true,
|
|
41
|
+
value: void 0
|
|
42
|
+
});
|
|
43
|
+
if (!client) {
|
|
44
|
+
throw new Error("Couchbase client cluster must be provided.");
|
|
45
|
+
}
|
|
46
|
+
this.cluster = client;
|
|
47
|
+
this.query = query;
|
|
48
|
+
this.pageContentFields = pageContentFields;
|
|
49
|
+
this.metadataFields = metadataFields;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Function to load document based on query from couchbase
|
|
53
|
+
* @returns {Promise<Document[]>} [ Returns a promise of all the documents as array ]
|
|
54
|
+
*/
|
|
55
|
+
async load() {
|
|
56
|
+
const documents = [];
|
|
57
|
+
for await (const doc of this.lazyLoad()) {
|
|
58
|
+
documents.push(doc);
|
|
59
|
+
}
|
|
60
|
+
return documents;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Function to load documents based on iterator rather than full load
|
|
64
|
+
* @returns {AsyncIterable<Document>} [ Returns an iterator to fetch documents ]
|
|
65
|
+
*/
|
|
66
|
+
async *lazyLoad() {
|
|
67
|
+
// Run SQL++ Query
|
|
68
|
+
const result = await this.cluster.query(this.query);
|
|
69
|
+
for await (const row of result.rows) {
|
|
70
|
+
let { metadataFields, pageContentFields } = this;
|
|
71
|
+
if (!pageContentFields) {
|
|
72
|
+
pageContentFields = Object.keys(row);
|
|
73
|
+
}
|
|
74
|
+
if (!metadataFields) {
|
|
75
|
+
metadataFields = [];
|
|
76
|
+
}
|
|
77
|
+
const metadata = metadataFields.reduce((obj, field) => ({ ...obj, [field]: row[field] }), {});
|
|
78
|
+
const document = pageContentFields
|
|
79
|
+
.map((k) => `${k}: ${JSON.stringify(row[k])}`)
|
|
80
|
+
.join("\n");
|
|
81
|
+
yield new documents_1.Document({
|
|
82
|
+
pageContent: document,
|
|
83
|
+
metadata,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
exports.CouchbaseDocumentLoader = CouchbaseDocumentLoader;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { Cluster } from "couchbase";
|
|
2
|
+
import { Document } from "@langchain/core/documents";
|
|
3
|
+
import { BaseDocumentLoader, DocumentLoader } from "../base.js";
|
|
4
|
+
/**
|
|
5
|
+
* loader for couchbase document
|
|
6
|
+
*/
|
|
7
|
+
export declare class CouchbaseDocumentLoader extends BaseDocumentLoader implements DocumentLoader {
|
|
8
|
+
private cluster;
|
|
9
|
+
private query;
|
|
10
|
+
private pageContentFields?;
|
|
11
|
+
private metadataFields?;
|
|
12
|
+
/**
|
|
13
|
+
* construct Couchbase document loader with a requirement for couchbase cluster client
|
|
14
|
+
* @param client { Cluster } [ couchbase connected client to connect to database ]
|
|
15
|
+
* @param query { string } [ query to get results from while loading the data ]
|
|
16
|
+
* @param pageContentFields { Array<string> } [ filters fields of the document and shows these only ]
|
|
17
|
+
* @param metadataFields { Array<string> } [ metadata fields required ]
|
|
18
|
+
*/
|
|
19
|
+
constructor(client: Cluster, query: string, pageContentFields?: string[], metadataFields?: string[]);
|
|
20
|
+
/**
|
|
21
|
+
* Function to load document based on query from couchbase
|
|
22
|
+
* @returns {Promise<Document[]>} [ Returns a promise of all the documents as array ]
|
|
23
|
+
*/
|
|
24
|
+
load(): Promise<Document[]>;
|
|
25
|
+
/**
|
|
26
|
+
* Function to load documents based on iterator rather than full load
|
|
27
|
+
* @returns {AsyncIterable<Document>} [ Returns an iterator to fetch documents ]
|
|
28
|
+
*/
|
|
29
|
+
lazyLoad(): AsyncIterable<Document>;
|
|
30
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { Document } from "@langchain/core/documents";
|
|
2
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
3
|
+
/**
|
|
4
|
+
* loader for couchbase document
|
|
5
|
+
*/
|
|
6
|
+
export class CouchbaseDocumentLoader extends BaseDocumentLoader {
|
|
7
|
+
/**
|
|
8
|
+
* construct Couchbase document loader with a requirement for couchbase cluster client
|
|
9
|
+
* @param client { Cluster } [ couchbase connected client to connect to database ]
|
|
10
|
+
* @param query { string } [ query to get results from while loading the data ]
|
|
11
|
+
* @param pageContentFields { Array<string> } [ filters fields of the document and shows these only ]
|
|
12
|
+
* @param metadataFields { Array<string> } [ metadata fields required ]
|
|
13
|
+
*/
|
|
14
|
+
constructor(client, query, pageContentFields, metadataFields) {
|
|
15
|
+
super();
|
|
16
|
+
Object.defineProperty(this, "cluster", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: void 0
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "query", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: void 0
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "pageContentFields", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: void 0
|
|
33
|
+
});
|
|
34
|
+
Object.defineProperty(this, "metadataFields", {
|
|
35
|
+
enumerable: true,
|
|
36
|
+
configurable: true,
|
|
37
|
+
writable: true,
|
|
38
|
+
value: void 0
|
|
39
|
+
});
|
|
40
|
+
if (!client) {
|
|
41
|
+
throw new Error("Couchbase client cluster must be provided.");
|
|
42
|
+
}
|
|
43
|
+
this.cluster = client;
|
|
44
|
+
this.query = query;
|
|
45
|
+
this.pageContentFields = pageContentFields;
|
|
46
|
+
this.metadataFields = metadataFields;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Function to load document based on query from couchbase
|
|
50
|
+
* @returns {Promise<Document[]>} [ Returns a promise of all the documents as array ]
|
|
51
|
+
*/
|
|
52
|
+
async load() {
|
|
53
|
+
const documents = [];
|
|
54
|
+
for await (const doc of this.lazyLoad()) {
|
|
55
|
+
documents.push(doc);
|
|
56
|
+
}
|
|
57
|
+
return documents;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Function to load documents based on iterator rather than full load
|
|
61
|
+
* @returns {AsyncIterable<Document>} [ Returns an iterator to fetch documents ]
|
|
62
|
+
*/
|
|
63
|
+
async *lazyLoad() {
|
|
64
|
+
// Run SQL++ Query
|
|
65
|
+
const result = await this.cluster.query(this.query);
|
|
66
|
+
for await (const row of result.rows) {
|
|
67
|
+
let { metadataFields, pageContentFields } = this;
|
|
68
|
+
if (!pageContentFields) {
|
|
69
|
+
pageContentFields = Object.keys(row);
|
|
70
|
+
}
|
|
71
|
+
if (!metadataFields) {
|
|
72
|
+
metadataFields = [];
|
|
73
|
+
}
|
|
74
|
+
const metadata = metadataFields.reduce((obj, field) => ({ ...obj, [field]: row[field] }), {});
|
|
75
|
+
const document = pageContentFields
|
|
76
|
+
.map((k) => `${k}: ${JSON.stringify(row[k])}`)
|
|
77
|
+
.join("\n");
|
|
78
|
+
yield new Document({
|
|
79
|
+
pageContent: document,
|
|
80
|
+
metadata,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
@@ -102,6 +102,7 @@ exports.optionalImportEntrypoints = [
|
|
|
102
102
|
"langchain/document_loaders/web/sitemap",
|
|
103
103
|
"langchain/document_loaders/web/sonix_audio",
|
|
104
104
|
"langchain/document_loaders/web/confluence",
|
|
105
|
+
"langchain/document_loaders/web/couchbase",
|
|
105
106
|
"langchain/document_loaders/web/youtube",
|
|
106
107
|
"langchain/document_loaders/fs/directory",
|
|
107
108
|
"langchain/document_loaders/fs/buffer",
|
|
@@ -99,6 +99,7 @@ export const optionalImportEntrypoints = [
|
|
|
99
99
|
"langchain/document_loaders/web/sitemap",
|
|
100
100
|
"langchain/document_loaders/web/sonix_audio",
|
|
101
101
|
"langchain/document_loaders/web/confluence",
|
|
102
|
+
"langchain/document_loaders/web/couchbase",
|
|
102
103
|
"langchain/document_loaders/web/youtube",
|
|
103
104
|
"langchain/document_loaders/fs/directory",
|
|
104
105
|
"langchain/document_loaders/fs/buffer",
|
package/dist/smith/config.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.LabeledCriteria = exports.Criteria = exports.isCustomEvaluator = exports.isOffTheShelfEvaluator = void 0;
|
|
3
|
+
exports.EmbeddingDistance = exports.LabeledCriteria = exports.Criteria = exports.isCustomEvaluator = exports.isOffTheShelfEvaluator = void 0;
|
|
4
4
|
function isOffTheShelfEvaluator(evaluator) {
|
|
5
5
|
return typeof evaluator === "string" || "evaluatorType" in evaluator;
|
|
6
6
|
}
|
|
@@ -36,6 +36,7 @@ function Criteria(criteria, config) {
|
|
|
36
36
|
evaluatorType: "criteria",
|
|
37
37
|
criteria,
|
|
38
38
|
feedbackKey: config?.feedbackKey ?? criteria,
|
|
39
|
+
llm: config?.llm,
|
|
39
40
|
formatEvaluatorInputs,
|
|
40
41
|
};
|
|
41
42
|
}
|
|
@@ -51,7 +52,23 @@ function LabeledCriteria(criteria, config) {
|
|
|
51
52
|
evaluatorType: "labeled_criteria",
|
|
52
53
|
criteria,
|
|
53
54
|
feedbackKey: config?.feedbackKey ?? criteria,
|
|
55
|
+
llm: config?.llm,
|
|
54
56
|
formatEvaluatorInputs,
|
|
55
57
|
};
|
|
56
58
|
}
|
|
57
59
|
exports.LabeledCriteria = LabeledCriteria;
|
|
60
|
+
function EmbeddingDistance(distanceMetric, config) {
|
|
61
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
62
|
+
((payload) => ({
|
|
63
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
64
|
+
reference: getSingleStringifiedValue(payload.rawReferenceOutput),
|
|
65
|
+
}));
|
|
66
|
+
return {
|
|
67
|
+
evaluatorType: "embedding_distance",
|
|
68
|
+
embedding: config?.embedding,
|
|
69
|
+
distanceMetric,
|
|
70
|
+
feedbackKey: config?.feedbackKey ?? "embedding_distance",
|
|
71
|
+
formatEvaluatorInputs,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
exports.EmbeddingDistance = EmbeddingDistance;
|
package/dist/smith/config.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { BaseLanguageModel } from "@langchain/core/language_models/base";
|
|
|
2
2
|
import { RunnableConfig } from "@langchain/core/runnables";
|
|
3
3
|
import { Example, Run } from "langsmith";
|
|
4
4
|
import { EvaluationResult, RunEvaluator } from "langsmith/evaluation";
|
|
5
|
-
import { Criteria as CriteriaType } from "../evaluation/index.js";
|
|
5
|
+
import { Criteria as CriteriaType, type EmbeddingDistanceEvalChainInput } from "../evaluation/index.js";
|
|
6
6
|
import { LoadEvaluatorOptions } from "../evaluation/loader.js";
|
|
7
7
|
import { EvaluatorType } from "../evaluation/types.js";
|
|
8
8
|
export type EvaluatorInputs = {
|
|
@@ -35,6 +35,7 @@ export type RunEvaluatorLike = ((props: DynamicRunEvaluatorParams, options?: {
|
|
|
35
35
|
}) => EvaluationResult);
|
|
36
36
|
export declare function isOffTheShelfEvaluator<T extends keyof EvaluatorType, U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike>(evaluator: T | EvalConfig | U): evaluator is T | EvalConfig;
|
|
37
37
|
export declare function isCustomEvaluator<T extends keyof EvaluatorType, U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike>(evaluator: T | EvalConfig | U): evaluator is U;
|
|
38
|
+
export type RunEvalType<T extends keyof EvaluatorType = "criteria" | "labeled_criteria" | "embedding_distance", U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike> = T | EvalConfig | U;
|
|
38
39
|
/**
|
|
39
40
|
* Configuration class for running evaluations on datasets.
|
|
40
41
|
*
|
|
@@ -44,22 +45,13 @@ export declare function isCustomEvaluator<T extends keyof EvaluatorType, U exten
|
|
|
44
45
|
* @typeparam T - The type of evaluators.
|
|
45
46
|
* @typeparam U - The type of custom evaluators.
|
|
46
47
|
*/
|
|
47
|
-
export type RunEvalConfig<T extends keyof EvaluatorType =
|
|
48
|
+
export type RunEvalConfig<T extends keyof EvaluatorType = "criteria" | "labeled_criteria" | "embedding_distance", U extends RunEvaluator | RunEvaluatorLike = RunEvaluator | RunEvaluatorLike> = {
|
|
48
49
|
/**
|
|
49
|
-
*
|
|
50
|
-
* Each evaluator is provided with a run trace containing the model
|
|
51
|
-
* outputs, as well as an "example" object representing a record
|
|
52
|
-
* in the dataset.
|
|
53
|
-
*
|
|
54
|
-
* @deprecated Use `evaluators` instead.
|
|
55
|
-
*/
|
|
56
|
-
customEvaluators?: U[];
|
|
57
|
-
/**
|
|
58
|
-
* LangChain evaluators to apply to a dataset run.
|
|
50
|
+
* Evaluators to apply to a dataset run.
|
|
59
51
|
* You can optionally specify these by name, or by
|
|
60
52
|
* configuring them with an EvalConfig object.
|
|
61
53
|
*/
|
|
62
|
-
evaluators?:
|
|
54
|
+
evaluators?: RunEvalType<T, U>[];
|
|
63
55
|
/**
|
|
64
56
|
* Convert the evaluation data into formats that can be used by the evaluator.
|
|
65
57
|
* This should most commonly be a string.
|
|
@@ -85,9 +77,14 @@ export type RunEvalConfig<T extends keyof EvaluatorType = keyof EvaluatorType, U
|
|
|
85
77
|
*/
|
|
86
78
|
formatEvaluatorInputs?: EvaluatorInputFormatter;
|
|
87
79
|
/**
|
|
88
|
-
*
|
|
80
|
+
* Custom evaluators to apply to a dataset run.
|
|
81
|
+
* Each evaluator is provided with a run trace containing the model
|
|
82
|
+
* outputs, as well as an "example" object representing a record
|
|
83
|
+
* in the dataset.
|
|
84
|
+
*
|
|
85
|
+
* @deprecated Use `evaluators` instead.
|
|
89
86
|
*/
|
|
90
|
-
|
|
87
|
+
customEvaluators?: U[];
|
|
91
88
|
};
|
|
92
89
|
export interface EvalConfig extends LoadEvaluatorOptions {
|
|
93
90
|
/**
|
|
@@ -151,7 +148,7 @@ export interface EvalConfig extends LoadEvaluatorOptions {
|
|
|
151
148
|
* }]
|
|
152
149
|
* };
|
|
153
150
|
*/
|
|
154
|
-
export type
|
|
151
|
+
export type Criteria = EvalConfig & {
|
|
155
152
|
evaluatorType: "criteria";
|
|
156
153
|
/**
|
|
157
154
|
* The "criteria" to insert into the prompt template
|
|
@@ -161,16 +158,12 @@ export type CriteriaEvalChainConfig = EvalConfig & {
|
|
|
161
158
|
*/
|
|
162
159
|
criteria?: CriteriaType | Record<string, string>;
|
|
163
160
|
/**
|
|
164
|
-
* The
|
|
165
|
-
* evaluation results. If none provided, we default to
|
|
166
|
-
* the evaluationName.
|
|
167
|
-
*/
|
|
168
|
-
feedbackKey?: string;
|
|
169
|
-
/**
|
|
170
|
-
* The language model to use as the evaluator.
|
|
161
|
+
* The language model to use as the evaluator, defaults to GPT-4
|
|
171
162
|
*/
|
|
172
163
|
llm?: BaseLanguageModel;
|
|
173
164
|
};
|
|
165
|
+
export type CriteriaEvalChainConfig = Criteria;
|
|
166
|
+
export declare function Criteria(criteria: CriteriaType, config?: Pick<Partial<LabeledCriteria>, "formatEvaluatorInputs" | "llm" | "feedbackKey">): EvalConfig;
|
|
174
167
|
/**
|
|
175
168
|
* Configuration to load a "LabeledCriteriaEvalChain" evaluator,
|
|
176
169
|
* which prompts an LLM to determine whether the model's
|
|
@@ -208,39 +201,17 @@ export type LabeledCriteria = EvalConfig & {
|
|
|
208
201
|
*/
|
|
209
202
|
criteria?: CriteriaType | Record<string, string>;
|
|
210
203
|
/**
|
|
211
|
-
* The
|
|
212
|
-
* evaluation results. If none provided, we default to
|
|
213
|
-
* the evaluationName.
|
|
214
|
-
*/
|
|
215
|
-
feedbackKey?: string;
|
|
216
|
-
/**
|
|
217
|
-
* The language model to use as the evaluator.
|
|
204
|
+
* The language model to use as the evaluator, defaults to GPT-4
|
|
218
205
|
*/
|
|
219
206
|
llm?: BaseLanguageModel;
|
|
220
207
|
};
|
|
221
|
-
export declare function
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
rawInput: any;
|
|
230
|
-
rawPrediction: any;
|
|
231
|
-
rawReferenceOutput?: any;
|
|
232
|
-
run: Run;
|
|
233
|
-
}) => {
|
|
234
|
-
prediction: string;
|
|
235
|
-
input: string;
|
|
236
|
-
});
|
|
237
|
-
};
|
|
238
|
-
export declare function LabeledCriteria(criteria: CriteriaType, config?: {
|
|
239
|
-
formatEvaluatorInputs?: EvaluatorInputFormatter;
|
|
240
|
-
feedbackKey?: string;
|
|
241
|
-
}): {
|
|
242
|
-
evaluatorType: "labeled_criteria";
|
|
243
|
-
criteria: CriteriaType;
|
|
244
|
-
feedbackKey: string;
|
|
245
|
-
formatEvaluatorInputs: EvaluatorInputFormatter;
|
|
208
|
+
export declare function LabeledCriteria(criteria: CriteriaType, config?: Pick<Partial<LabeledCriteria>, "formatEvaluatorInputs" | "llm" | "feedbackKey">): LabeledCriteria;
|
|
209
|
+
/**
|
|
210
|
+
* Configuration to load a "EmbeddingDistanceEvalChain" evaluator,
|
|
211
|
+
* which embeds distances to score semantic difference between
|
|
212
|
+
* a prediction and reference.
|
|
213
|
+
*/
|
|
214
|
+
export type EmbeddingDistance = EvalConfig & EmbeddingDistanceEvalChainInput & {
|
|
215
|
+
evaluatorType: "embedding_distance";
|
|
246
216
|
};
|
|
217
|
+
export declare function EmbeddingDistance(distanceMetric: EmbeddingDistanceEvalChainInput["distanceMetric"], config?: Pick<Partial<LabeledCriteria>, "formatEvaluatorInputs" | "embedding" | "feedbackKey">): EmbeddingDistance;
|
package/dist/smith/config.js
CHANGED
|
@@ -31,6 +31,7 @@ export function Criteria(criteria, config) {
|
|
|
31
31
|
evaluatorType: "criteria",
|
|
32
32
|
criteria,
|
|
33
33
|
feedbackKey: config?.feedbackKey ?? criteria,
|
|
34
|
+
llm: config?.llm,
|
|
34
35
|
formatEvaluatorInputs,
|
|
35
36
|
};
|
|
36
37
|
}
|
|
@@ -45,6 +46,21 @@ export function LabeledCriteria(criteria, config) {
|
|
|
45
46
|
evaluatorType: "labeled_criteria",
|
|
46
47
|
criteria,
|
|
47
48
|
feedbackKey: config?.feedbackKey ?? criteria,
|
|
49
|
+
llm: config?.llm,
|
|
50
|
+
formatEvaluatorInputs,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
export function EmbeddingDistance(distanceMetric, config) {
|
|
54
|
+
const formatEvaluatorInputs = config?.formatEvaluatorInputs ??
|
|
55
|
+
((payload) => ({
|
|
56
|
+
prediction: getSingleStringifiedValue(payload.rawPrediction),
|
|
57
|
+
reference: getSingleStringifiedValue(payload.rawReferenceOutput),
|
|
58
|
+
}));
|
|
59
|
+
return {
|
|
60
|
+
evaluatorType: "embedding_distance",
|
|
61
|
+
embedding: config?.embedding,
|
|
62
|
+
distanceMetric,
|
|
63
|
+
feedbackKey: config?.feedbackKey ?? "embedding_distance",
|
|
48
64
|
formatEvaluatorInputs,
|
|
49
65
|
};
|
|
50
66
|
}
|
|
@@ -115,6 +115,61 @@ class DynamicRunEvaluator {
|
|
|
115
115
|
function isLLMStringEvaluator(evaluator) {
|
|
116
116
|
return evaluator && typeof evaluator.evaluateStrings === "function";
|
|
117
117
|
}
|
|
118
|
+
/**
|
|
119
|
+
* Internal implementation of RunTree, which uses the
|
|
120
|
+
* provided callback manager instead of the internal LangSmith client.
|
|
121
|
+
*
|
|
122
|
+
* The goal of this class is to ensure seamless interop when intergrated
|
|
123
|
+
* with other Runnables.
|
|
124
|
+
*/
|
|
125
|
+
class CallbackManagerRunTree extends langsmith_1.RunTree {
|
|
126
|
+
constructor(config, callbackManager) {
|
|
127
|
+
super(config);
|
|
128
|
+
Object.defineProperty(this, "callbackManager", {
|
|
129
|
+
enumerable: true,
|
|
130
|
+
configurable: true,
|
|
131
|
+
writable: true,
|
|
132
|
+
value: void 0
|
|
133
|
+
});
|
|
134
|
+
Object.defineProperty(this, "activeCallbackManager", {
|
|
135
|
+
enumerable: true,
|
|
136
|
+
configurable: true,
|
|
137
|
+
writable: true,
|
|
138
|
+
value: undefined
|
|
139
|
+
});
|
|
140
|
+
this.callbackManager = callbackManager;
|
|
141
|
+
}
|
|
142
|
+
async createChild(config) {
|
|
143
|
+
const child = new CallbackManagerRunTree({
|
|
144
|
+
...config,
|
|
145
|
+
parent_run: this,
|
|
146
|
+
project_name: this.project_name,
|
|
147
|
+
client: this.client,
|
|
148
|
+
}, this.activeCallbackManager?.getChild() ?? this.callbackManager);
|
|
149
|
+
this.child_runs.push(child);
|
|
150
|
+
return child;
|
|
151
|
+
}
|
|
152
|
+
async postRun() {
|
|
153
|
+
// how it is translated in comparison to basic RunTree?
|
|
154
|
+
this.activeCallbackManager = await this.callbackManager.handleChainStart(typeof this.serialized !== "object" &&
|
|
155
|
+
this.serialized != null &&
|
|
156
|
+
"lc" in this.serialized
|
|
157
|
+
? this.serialized
|
|
158
|
+
: {
|
|
159
|
+
id: ["langchain", "smith", "CallbackManagerRunTree"],
|
|
160
|
+
lc: 1,
|
|
161
|
+
type: "not_implemented",
|
|
162
|
+
}, this.inputs, this.id, this.run_type, undefined, undefined, this.name);
|
|
163
|
+
}
|
|
164
|
+
async patchRun() {
|
|
165
|
+
if (this.error) {
|
|
166
|
+
await this.activeCallbackManager?.handleChainError(this.error, this.id, this.parent_run?.id, undefined, undefined);
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
await this.activeCallbackManager?.handleChainEnd(this.outputs ?? {}, this.id, this.parent_run?.id, undefined, undefined);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
118
173
|
class RunnableTraceable extends runnables_1.Runnable {
|
|
119
174
|
constructor(fields) {
|
|
120
175
|
super(fields);
|
|
@@ -147,12 +202,14 @@ class RunnableTraceable extends runnables_1.Runnable {
|
|
|
147
202
|
const partialConfig = "langsmith:traceable" in this.func
|
|
148
203
|
? this.func["langsmith:traceable"]
|
|
149
204
|
: { name: "<lambda>" };
|
|
150
|
-
|
|
205
|
+
if (!callbackManager)
|
|
206
|
+
throw new Error("CallbackManager not found");
|
|
207
|
+
const runTree = new CallbackManagerRunTree({
|
|
151
208
|
...partialConfig,
|
|
152
209
|
parent_run: callbackManager?._parentRunId
|
|
153
210
|
? new langsmith_1.RunTree({ name: "<parent>", id: callbackManager?._parentRunId })
|
|
154
211
|
: undefined,
|
|
155
|
-
});
|
|
212
|
+
}, callbackManager);
|
|
156
213
|
if (typeof input === "object" &&
|
|
157
214
|
input != null &&
|
|
158
215
|
Object.keys(input).length === 1) {
|
|
@@ -217,14 +274,14 @@ class PreparedRunEvaluator {
|
|
|
217
274
|
const evalConfig = typeof config === "string" ? {} : config;
|
|
218
275
|
const evaluator = await (0, loader_js_1.loadEvaluator)(evaluatorType, evalConfig);
|
|
219
276
|
const feedbackKey = evalConfig?.feedbackKey ?? evaluator?.evaluationName;
|
|
220
|
-
if (!feedbackKey) {
|
|
221
|
-
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
|
|
222
|
-
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
|
|
223
|
-
}
|
|
224
277
|
if (!isLLMStringEvaluator(evaluator)) {
|
|
225
278
|
throw new Error(`Evaluator of type ${evaluatorType} not yet supported. ` +
|
|
226
279
|
"Please use a string evaluator, or implement your " +
|
|
227
|
-
"evaluation logic as a
|
|
280
|
+
"evaluation logic as a custom evaluator.");
|
|
281
|
+
}
|
|
282
|
+
if (!feedbackKey) {
|
|
283
|
+
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
|
|
284
|
+
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
|
|
228
285
|
}
|
|
229
286
|
return new PreparedRunEvaluator(evaluator, feedbackKey, evalConfig?.formatEvaluatorInputs);
|
|
230
287
|
}
|
|
@@ -261,7 +318,7 @@ class PreparedRunEvaluator {
|
|
|
261
318
|
}
|
|
262
319
|
throw new Error("Evaluator not yet supported. " +
|
|
263
320
|
"Please use a string evaluator, or implement your " +
|
|
264
|
-
"evaluation logic as a
|
|
321
|
+
"evaluation logic as a custom evaluator.");
|
|
265
322
|
}
|
|
266
323
|
}
|
|
267
324
|
class LoadedEvalConfig {
|
|
@@ -302,6 +359,10 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
302
359
|
return () => modelOrFactory;
|
|
303
360
|
}
|
|
304
361
|
if (typeof modelOrFactory === "function") {
|
|
362
|
+
if (isLangsmithTraceableFunction(modelOrFactory)) {
|
|
363
|
+
const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
|
|
364
|
+
return () => wrappedModel;
|
|
365
|
+
}
|
|
305
366
|
try {
|
|
306
367
|
// If it works with no arguments, assume it's a factory
|
|
307
368
|
let res = modelOrFactory();
|
|
@@ -313,11 +374,7 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
313
374
|
}
|
|
314
375
|
catch (err) {
|
|
315
376
|
// Otherwise, it's a custom UDF, and we'll wrap
|
|
316
|
-
// in a lambda
|
|
317
|
-
if (isLangsmithTraceableFunction(modelOrFactory)) {
|
|
318
|
-
const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
|
|
319
|
-
return () => wrappedModel;
|
|
320
|
-
}
|
|
377
|
+
// the function in a lambda
|
|
321
378
|
const wrappedModel = new runnables_1.RunnableLambda({ func: modelOrFactory });
|
|
322
379
|
return () => wrappedModel;
|
|
323
380
|
}
|
|
@@ -389,10 +446,65 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
389
446
|
}
|
|
390
447
|
return examples.map(({ inputs }) => inputs);
|
|
391
448
|
};
|
|
449
|
+
/**
|
|
450
|
+
* Evaluates a given model or chain against a specified LangSmith dataset.
|
|
451
|
+
*
|
|
452
|
+
* This function fetches example records from the specified dataset,
|
|
453
|
+
* runs the model or chain against each example, and returns the evaluation
|
|
454
|
+
* results.
|
|
455
|
+
*
|
|
456
|
+
* @param chainOrFactory - A model or factory/constructor function to be evaluated. It can be a
|
|
457
|
+
* Runnable instance, a factory function that returns a Runnable, or a user-defined
|
|
458
|
+
* function or factory.
|
|
459
|
+
*
|
|
460
|
+
* @param datasetName - The name of the dataset against which the evaluation will be
|
|
461
|
+
* performed. This dataset should already be defined and contain the relevant data
|
|
462
|
+
* for evaluation.
|
|
463
|
+
*
|
|
464
|
+
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
465
|
+
* - `evaluators` (RunEvalType[]): Evaluators to apply to a dataset run.
|
|
466
|
+
* - `formatEvaluatorInputs` (EvaluatorInputFormatter): Convert the evaluation data into formats that can be used by the evaluator.
|
|
467
|
+
* - `projectName` (string): Name of the project for logging and tracking.
|
|
468
|
+
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
469
|
+
* - `client` (Client): Client instance for LangSmith service interaction.
|
|
470
|
+
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
|
|
471
|
+
*
|
|
472
|
+
* @returns A promise that resolves to an `EvalResults` object. This object includes
|
|
473
|
+
* detailed results of the evaluation, such as execution time, run IDs, and feedback
|
|
474
|
+
* for each entry in the dataset.
|
|
475
|
+
*
|
|
476
|
+
* @example
|
|
477
|
+
* ```typescript
|
|
478
|
+
* // Example usage for evaluating a model on a dataset
|
|
479
|
+
* async function evaluateModel() {
|
|
480
|
+
* const chain = /* ...create your model or chain...*\//
|
|
481
|
+
* const datasetName = 'example-dataset';
|
|
482
|
+
* const client = new Client(/* ...config... *\//);
|
|
483
|
+
*
|
|
484
|
+
* const results = await runOnDataset(chain, datasetName, {
|
|
485
|
+
* evaluators: [/* ...evaluators... *\//],
|
|
486
|
+
* client,
|
|
487
|
+
* });
|
|
488
|
+
*
|
|
489
|
+
* console.log('Evaluation Results:', results);
|
|
490
|
+
* }
|
|
491
|
+
*
|
|
492
|
+
* evaluateModel();
|
|
493
|
+
* ```
|
|
494
|
+
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
|
|
495
|
+
* a dataset named 'example-dataset'. The evaluation process is configured using `RunOnDatasetParams["evaluators"]`, which can
|
|
496
|
+
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
497
|
+
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
498
|
+
*/
|
|
392
499
|
async function runOnDataset(chainOrFactory, datasetName, options) {
|
|
393
|
-
const {
|
|
394
|
-
|
|
395
|
-
|
|
500
|
+
const { projectName, projectMetadata, client, maxConcurrency, } = options ?? {};
|
|
501
|
+
const evaluationConfig = options?.evaluationConfig ??
|
|
502
|
+
(options?.evaluators != null
|
|
503
|
+
? {
|
|
504
|
+
evaluators: options.evaluators,
|
|
505
|
+
formatEvaluatorInputs: options.formatEvaluatorInputs,
|
|
506
|
+
}
|
|
507
|
+
: undefined);
|
|
396
508
|
const wrappedModel = await createWrappedModel(chainOrFactory);
|
|
397
509
|
const testClient = client ?? new langsmith_1.Client();
|
|
398
510
|
const testProjectName = projectName ?? (0, name_generation_js_1.randomName)();
|
|
@@ -4,13 +4,28 @@ import type { TraceableFunction } from "langsmith/traceable";
|
|
|
4
4
|
import { type RunEvalConfig } from "./config.js";
|
|
5
5
|
export type ChainOrFactory = Runnable | (() => Runnable) | AnyTraceableFunction | ((obj: any) => any) | ((obj: any) => Promise<any>) | (() => (obj: unknown) => unknown) | (() => (obj: unknown) => Promise<unknown>);
|
|
6
6
|
type AnyTraceableFunction = TraceableFunction<(...any: any[]) => any>;
|
|
7
|
-
export
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
export interface RunOnDatasetParams extends Omit<RunEvalConfig, "customEvaluators"> {
|
|
8
|
+
/**
|
|
9
|
+
* Name of the project for logging and tracking.
|
|
10
|
+
*/
|
|
10
11
|
projectName?: string;
|
|
12
|
+
/**
|
|
13
|
+
* Additional metadata for the project.
|
|
14
|
+
*/
|
|
15
|
+
projectMetadata?: Record<string, unknown>;
|
|
16
|
+
/**
|
|
17
|
+
* Client instance for LangSmith service interaction.
|
|
18
|
+
*/
|
|
11
19
|
client?: Client;
|
|
20
|
+
/**
|
|
21
|
+
* Maximum concurrency level for dataset processing.
|
|
22
|
+
*/
|
|
12
23
|
maxConcurrency?: number;
|
|
13
|
-
|
|
24
|
+
/**
|
|
25
|
+
* @deprecated Pass keys directly to the RunOnDatasetParams instead
|
|
26
|
+
*/
|
|
27
|
+
evaluationConfig?: RunEvalConfig;
|
|
28
|
+
}
|
|
14
29
|
export type EvalResults = {
|
|
15
30
|
projectName: string;
|
|
16
31
|
results: {
|
|
@@ -37,11 +52,11 @@ export type EvalResults = {
|
|
|
37
52
|
* for evaluation.
|
|
38
53
|
*
|
|
39
54
|
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
40
|
-
* - `
|
|
41
|
-
*
|
|
55
|
+
* - `evaluators` (RunEvalType[]): Evaluators to apply to a dataset run.
|
|
56
|
+
* - `formatEvaluatorInputs` (EvaluatorInputFormatter): Convert the evaluation data into formats that can be used by the evaluator.
|
|
42
57
|
* - `projectName` (string): Name of the project for logging and tracking.
|
|
43
58
|
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
44
|
-
* - `client` (Client): Client instance for
|
|
59
|
+
* - `client` (Client): Client instance for LangSmith service interaction.
|
|
45
60
|
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
|
|
46
61
|
*
|
|
47
62
|
* @returns A promise that resolves to an `EvalResults` object. This object includes
|
|
@@ -56,13 +71,8 @@ export type EvalResults = {
|
|
|
56
71
|
* const datasetName = 'example-dataset';
|
|
57
72
|
* const client = new Client(/* ...config... *\//);
|
|
58
73
|
*
|
|
59
|
-
* const evaluationConfig = {
|
|
60
|
-
* evaluators: [/* ...evaluators... *\//],
|
|
61
|
-
* customEvaluators: [/* ...custom evaluators... *\//],
|
|
62
|
-
* };
|
|
63
|
-
*
|
|
64
74
|
* const results = await runOnDataset(chain, datasetName, {
|
|
65
|
-
*
|
|
75
|
+
* evaluators: [/* ...evaluators... *\//],
|
|
66
76
|
* client,
|
|
67
77
|
* });
|
|
68
78
|
*
|
|
@@ -72,10 +82,9 @@ export type EvalResults = {
|
|
|
72
82
|
* evaluateModel();
|
|
73
83
|
* ```
|
|
74
84
|
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
|
|
75
|
-
* a dataset named 'example-dataset'. The evaluation process is configured using `
|
|
85
|
+
* a dataset named 'example-dataset'. The evaluation process is configured using `RunOnDatasetParams["evaluators"]`, which can
|
|
76
86
|
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
77
87
|
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
78
88
|
*/
|
|
79
|
-
export declare function runOnDataset(chainOrFactory: ChainOrFactory, datasetName: string,
|
|
80
|
-
export declare function runOnDataset(chainOrFactory: ChainOrFactory, datasetName: string, evaluators: RunEvalConfig["evaluators"]): Promise<EvalResults>;
|
|
89
|
+
export declare function runOnDataset(chainOrFactory: ChainOrFactory, datasetName: string, options?: RunOnDatasetParams): Promise<EvalResults>;
|
|
81
90
|
export {};
|
|
@@ -112,6 +112,61 @@ class DynamicRunEvaluator {
|
|
|
112
112
|
function isLLMStringEvaluator(evaluator) {
|
|
113
113
|
return evaluator && typeof evaluator.evaluateStrings === "function";
|
|
114
114
|
}
|
|
115
|
+
/**
|
|
116
|
+
* Internal implementation of RunTree, which uses the
|
|
117
|
+
* provided callback manager instead of the internal LangSmith client.
|
|
118
|
+
*
|
|
119
|
+
* The goal of this class is to ensure seamless interop when intergrated
|
|
120
|
+
* with other Runnables.
|
|
121
|
+
*/
|
|
122
|
+
class CallbackManagerRunTree extends RunTree {
|
|
123
|
+
constructor(config, callbackManager) {
|
|
124
|
+
super(config);
|
|
125
|
+
Object.defineProperty(this, "callbackManager", {
|
|
126
|
+
enumerable: true,
|
|
127
|
+
configurable: true,
|
|
128
|
+
writable: true,
|
|
129
|
+
value: void 0
|
|
130
|
+
});
|
|
131
|
+
Object.defineProperty(this, "activeCallbackManager", {
|
|
132
|
+
enumerable: true,
|
|
133
|
+
configurable: true,
|
|
134
|
+
writable: true,
|
|
135
|
+
value: undefined
|
|
136
|
+
});
|
|
137
|
+
this.callbackManager = callbackManager;
|
|
138
|
+
}
|
|
139
|
+
async createChild(config) {
|
|
140
|
+
const child = new CallbackManagerRunTree({
|
|
141
|
+
...config,
|
|
142
|
+
parent_run: this,
|
|
143
|
+
project_name: this.project_name,
|
|
144
|
+
client: this.client,
|
|
145
|
+
}, this.activeCallbackManager?.getChild() ?? this.callbackManager);
|
|
146
|
+
this.child_runs.push(child);
|
|
147
|
+
return child;
|
|
148
|
+
}
|
|
149
|
+
async postRun() {
|
|
150
|
+
// how it is translated in comparison to basic RunTree?
|
|
151
|
+
this.activeCallbackManager = await this.callbackManager.handleChainStart(typeof this.serialized !== "object" &&
|
|
152
|
+
this.serialized != null &&
|
|
153
|
+
"lc" in this.serialized
|
|
154
|
+
? this.serialized
|
|
155
|
+
: {
|
|
156
|
+
id: ["langchain", "smith", "CallbackManagerRunTree"],
|
|
157
|
+
lc: 1,
|
|
158
|
+
type: "not_implemented",
|
|
159
|
+
}, this.inputs, this.id, this.run_type, undefined, undefined, this.name);
|
|
160
|
+
}
|
|
161
|
+
async patchRun() {
|
|
162
|
+
if (this.error) {
|
|
163
|
+
await this.activeCallbackManager?.handleChainError(this.error, this.id, this.parent_run?.id, undefined, undefined);
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
await this.activeCallbackManager?.handleChainEnd(this.outputs ?? {}, this.id, this.parent_run?.id, undefined, undefined);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
115
170
|
class RunnableTraceable extends Runnable {
|
|
116
171
|
constructor(fields) {
|
|
117
172
|
super(fields);
|
|
@@ -144,12 +199,14 @@ class RunnableTraceable extends Runnable {
|
|
|
144
199
|
const partialConfig = "langsmith:traceable" in this.func
|
|
145
200
|
? this.func["langsmith:traceable"]
|
|
146
201
|
: { name: "<lambda>" };
|
|
147
|
-
|
|
202
|
+
if (!callbackManager)
|
|
203
|
+
throw new Error("CallbackManager not found");
|
|
204
|
+
const runTree = new CallbackManagerRunTree({
|
|
148
205
|
...partialConfig,
|
|
149
206
|
parent_run: callbackManager?._parentRunId
|
|
150
207
|
? new RunTree({ name: "<parent>", id: callbackManager?._parentRunId })
|
|
151
208
|
: undefined,
|
|
152
|
-
});
|
|
209
|
+
}, callbackManager);
|
|
153
210
|
if (typeof input === "object" &&
|
|
154
211
|
input != null &&
|
|
155
212
|
Object.keys(input).length === 1) {
|
|
@@ -214,14 +271,14 @@ class PreparedRunEvaluator {
|
|
|
214
271
|
const evalConfig = typeof config === "string" ? {} : config;
|
|
215
272
|
const evaluator = await loadEvaluator(evaluatorType, evalConfig);
|
|
216
273
|
const feedbackKey = evalConfig?.feedbackKey ?? evaluator?.evaluationName;
|
|
217
|
-
if (!feedbackKey) {
|
|
218
|
-
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
|
|
219
|
-
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
|
|
220
|
-
}
|
|
221
274
|
if (!isLLMStringEvaluator(evaluator)) {
|
|
222
275
|
throw new Error(`Evaluator of type ${evaluatorType} not yet supported. ` +
|
|
223
276
|
"Please use a string evaluator, or implement your " +
|
|
224
|
-
"evaluation logic as a
|
|
277
|
+
"evaluation logic as a custom evaluator.");
|
|
278
|
+
}
|
|
279
|
+
if (!feedbackKey) {
|
|
280
|
+
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
|
|
281
|
+
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
|
|
225
282
|
}
|
|
226
283
|
return new PreparedRunEvaluator(evaluator, feedbackKey, evalConfig?.formatEvaluatorInputs);
|
|
227
284
|
}
|
|
@@ -258,7 +315,7 @@ class PreparedRunEvaluator {
|
|
|
258
315
|
}
|
|
259
316
|
throw new Error("Evaluator not yet supported. " +
|
|
260
317
|
"Please use a string evaluator, or implement your " +
|
|
261
|
-
"evaluation logic as a
|
|
318
|
+
"evaluation logic as a custom evaluator.");
|
|
262
319
|
}
|
|
263
320
|
}
|
|
264
321
|
class LoadedEvalConfig {
|
|
@@ -299,6 +356,10 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
299
356
|
return () => modelOrFactory;
|
|
300
357
|
}
|
|
301
358
|
if (typeof modelOrFactory === "function") {
|
|
359
|
+
if (isLangsmithTraceableFunction(modelOrFactory)) {
|
|
360
|
+
const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
|
|
361
|
+
return () => wrappedModel;
|
|
362
|
+
}
|
|
302
363
|
try {
|
|
303
364
|
// If it works with no arguments, assume it's a factory
|
|
304
365
|
let res = modelOrFactory();
|
|
@@ -310,11 +371,7 @@ const createWrappedModel = async (modelOrFactory) => {
|
|
|
310
371
|
}
|
|
311
372
|
catch (err) {
|
|
312
373
|
// Otherwise, it's a custom UDF, and we'll wrap
|
|
313
|
-
// in a lambda
|
|
314
|
-
if (isLangsmithTraceableFunction(modelOrFactory)) {
|
|
315
|
-
const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
|
|
316
|
-
return () => wrappedModel;
|
|
317
|
-
}
|
|
374
|
+
// the function in a lambda
|
|
318
375
|
const wrappedModel = new RunnableLambda({ func: modelOrFactory });
|
|
319
376
|
return () => wrappedModel;
|
|
320
377
|
}
|
|
@@ -386,10 +443,65 @@ const getExamplesInputs = (examples, chainOrFactory, dataType) => {
|
|
|
386
443
|
}
|
|
387
444
|
return examples.map(({ inputs }) => inputs);
|
|
388
445
|
};
|
|
446
|
+
/**
|
|
447
|
+
* Evaluates a given model or chain against a specified LangSmith dataset.
|
|
448
|
+
*
|
|
449
|
+
* This function fetches example records from the specified dataset,
|
|
450
|
+
* runs the model or chain against each example, and returns the evaluation
|
|
451
|
+
* results.
|
|
452
|
+
*
|
|
453
|
+
* @param chainOrFactory - A model or factory/constructor function to be evaluated. It can be a
|
|
454
|
+
* Runnable instance, a factory function that returns a Runnable, or a user-defined
|
|
455
|
+
* function or factory.
|
|
456
|
+
*
|
|
457
|
+
* @param datasetName - The name of the dataset against which the evaluation will be
|
|
458
|
+
* performed. This dataset should already be defined and contain the relevant data
|
|
459
|
+
* for evaluation.
|
|
460
|
+
*
|
|
461
|
+
* @param options - (Optional) Additional parameters for the evaluation process:
|
|
462
|
+
* - `evaluators` (RunEvalType[]): Evaluators to apply to a dataset run.
|
|
463
|
+
* - `formatEvaluatorInputs` (EvaluatorInputFormatter): Convert the evaluation data into formats that can be used by the evaluator.
|
|
464
|
+
* - `projectName` (string): Name of the project for logging and tracking.
|
|
465
|
+
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
|
|
466
|
+
* - `client` (Client): Client instance for LangSmith service interaction.
|
|
467
|
+
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
|
|
468
|
+
*
|
|
469
|
+
* @returns A promise that resolves to an `EvalResults` object. This object includes
|
|
470
|
+
* detailed results of the evaluation, such as execution time, run IDs, and feedback
|
|
471
|
+
* for each entry in the dataset.
|
|
472
|
+
*
|
|
473
|
+
* @example
|
|
474
|
+
* ```typescript
|
|
475
|
+
* // Example usage for evaluating a model on a dataset
|
|
476
|
+
* async function evaluateModel() {
|
|
477
|
+
* const chain = /* ...create your model or chain...*\//
|
|
478
|
+
* const datasetName = 'example-dataset';
|
|
479
|
+
* const client = new Client(/* ...config... *\//);
|
|
480
|
+
*
|
|
481
|
+
* const results = await runOnDataset(chain, datasetName, {
|
|
482
|
+
* evaluators: [/* ...evaluators... *\//],
|
|
483
|
+
* client,
|
|
484
|
+
* });
|
|
485
|
+
*
|
|
486
|
+
* console.log('Evaluation Results:', results);
|
|
487
|
+
* }
|
|
488
|
+
*
|
|
489
|
+
* evaluateModel();
|
|
490
|
+
* ```
|
|
491
|
+
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
|
|
492
|
+
* a dataset named 'example-dataset'. The evaluation process is configured using `RunOnDatasetParams["evaluators"]`, which can
|
|
493
|
+
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
|
|
494
|
+
* The function returns the evaluation results, which can be logged or further processed as needed.
|
|
495
|
+
*/
|
|
389
496
|
export async function runOnDataset(chainOrFactory, datasetName, options) {
|
|
390
|
-
const {
|
|
391
|
-
|
|
392
|
-
|
|
497
|
+
const { projectName, projectMetadata, client, maxConcurrency, } = options ?? {};
|
|
498
|
+
const evaluationConfig = options?.evaluationConfig ??
|
|
499
|
+
(options?.evaluators != null
|
|
500
|
+
? {
|
|
501
|
+
evaluators: options.evaluators,
|
|
502
|
+
formatEvaluatorInputs: options.formatEvaluatorInputs,
|
|
503
|
+
}
|
|
504
|
+
: undefined);
|
|
393
505
|
const wrappedModel = await createWrappedModel(chainOrFactory);
|
|
394
506
|
const testClient = client ?? new Client();
|
|
395
507
|
const testProjectName = projectName ?? randomName();
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('../../dist/document_loaders/web/couchbase.cjs');
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/couchbase.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/couchbase.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/couchbase.js'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.1.19-rc.
|
|
3
|
+
"version": "0.1.19-rc.2",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -614,6 +614,10 @@
|
|
|
614
614
|
"document_loaders/web/confluence.js",
|
|
615
615
|
"document_loaders/web/confluence.d.ts",
|
|
616
616
|
"document_loaders/web/confluence.d.cts",
|
|
617
|
+
"document_loaders/web/couchbase.cjs",
|
|
618
|
+
"document_loaders/web/couchbase.js",
|
|
619
|
+
"document_loaders/web/couchbase.d.ts",
|
|
620
|
+
"document_loaders/web/couchbase.d.cts",
|
|
617
621
|
"document_loaders/web/searchapi.cjs",
|
|
618
622
|
"document_loaders/web/searchapi.js",
|
|
619
623
|
"document_loaders/web/searchapi.d.ts",
|
|
@@ -1247,6 +1251,7 @@
|
|
|
1247
1251
|
"cheerio": "^1.0.0-rc.12",
|
|
1248
1252
|
"chromadb": "^1.5.3",
|
|
1249
1253
|
"convex": "^1.3.1",
|
|
1254
|
+
"couchbase": "^4.2.10",
|
|
1250
1255
|
"d3-dsv": "^2.0.0",
|
|
1251
1256
|
"dotenv": "^16.0.3",
|
|
1252
1257
|
"dpdm": "^3.12.0",
|
|
@@ -1317,6 +1322,7 @@
|
|
|
1317
1322
|
"cheerio": "^1.0.0-rc.12",
|
|
1318
1323
|
"chromadb": "*",
|
|
1319
1324
|
"convex": "^1.3.1",
|
|
1325
|
+
"couchbase": "^4.2.10",
|
|
1320
1326
|
"d3-dsv": "^2.0.0",
|
|
1321
1327
|
"epub2": "^3.0.1",
|
|
1322
1328
|
"fast-xml-parser": "^4.2.7",
|
|
@@ -1411,6 +1417,9 @@
|
|
|
1411
1417
|
"convex": {
|
|
1412
1418
|
"optional": true
|
|
1413
1419
|
},
|
|
1420
|
+
"couchbase": {
|
|
1421
|
+
"optional": true
|
|
1422
|
+
},
|
|
1414
1423
|
"d3-dsv": {
|
|
1415
1424
|
"optional": true
|
|
1416
1425
|
},
|
|
@@ -1504,8 +1513,8 @@
|
|
|
1504
1513
|
},
|
|
1505
1514
|
"dependencies": {
|
|
1506
1515
|
"@anthropic-ai/sdk": "^0.9.1",
|
|
1507
|
-
"@langchain/community": "~0.0.
|
|
1508
|
-
"@langchain/core": "~0.1.
|
|
1516
|
+
"@langchain/community": "~0.0.29",
|
|
1517
|
+
"@langchain/core": "~0.1.29",
|
|
1509
1518
|
"@langchain/openai": "~0.0.14",
|
|
1510
1519
|
"binary-extensions": "^2.2.0",
|
|
1511
1520
|
"expr-eval": "^2.0.2",
|
|
@@ -2899,6 +2908,15 @@
|
|
|
2899
2908
|
"import": "./document_loaders/web/confluence.js",
|
|
2900
2909
|
"require": "./document_loaders/web/confluence.cjs"
|
|
2901
2910
|
},
|
|
2911
|
+
"./document_loaders/web/couchbase": {
|
|
2912
|
+
"types": {
|
|
2913
|
+
"import": "./document_loaders/web/couchbase.d.ts",
|
|
2914
|
+
"require": "./document_loaders/web/couchbase.d.cts",
|
|
2915
|
+
"default": "./document_loaders/web/couchbase.d.ts"
|
|
2916
|
+
},
|
|
2917
|
+
"import": "./document_loaders/web/couchbase.js",
|
|
2918
|
+
"require": "./document_loaders/web/couchbase.cjs"
|
|
2919
|
+
},
|
|
2902
2920
|
"./document_loaders/web/searchapi": {
|
|
2903
2921
|
"types": {
|
|
2904
2922
|
"import": "./document_loaders/web/searchapi.d.ts",
|