langchain 0.0.79 → 0.0.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/document_loaders/index.cjs +1 -3
- package/dist/document_loaders/index.d.ts +0 -1
- package/dist/document_loaders/index.js +0 -1
- package/dist/document_loaders/web/github.cjs +38 -23
- package/dist/document_loaders/web/github.d.ts +5 -2
- package/dist/document_loaders/web/github.js +38 -23
- package/dist/llms/googlevertexai.cjs +97 -0
- package/dist/llms/googlevertexai.d.ts +43 -0
- package/dist/llms/googlevertexai.js +93 -0
- package/dist/text_splitter.cjs +11 -4
- package/dist/text_splitter.d.ts +7 -2
- package/dist/text_splitter.js +11 -4
- package/dist/types/googlevertexai-types.cjs +2 -0
- package/dist/types/googlevertexai-types.d.ts +47 -0
- package/dist/types/googlevertexai-types.js +1 -0
- package/dist/util/googlevertexai-connection.cjs +66 -0
- package/dist/util/googlevertexai-connection.d.ts +13 -0
- package/dist/util/googlevertexai-connection.js +62 -0
- package/dist/vectorstores/chroma.cjs +34 -7
- package/dist/vectorstores/chroma.d.ts +5 -1
- package/dist/vectorstores/chroma.js +34 -7
- package/llms/googlevertexai.cjs +1 -0
- package/llms/googlevertexai.d.ts +1 -0
- package/llms/googlevertexai.js +1 -0
- package/package.json +18 -3
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.UnstructuredLoader = exports.
|
|
3
|
+
exports.UnstructuredLoader = exports.NotionLoader = exports.CSVLoader = exports.JSONLinesLoader = exports.JSONLoader = exports.TextLoader = exports.EPubLoader = exports.DocxLoader = exports.PDFLoader = exports.SRTLoader = exports.UnknownHandling = exports.DirectoryLoader = exports.IMSDBLoader = exports.HNLoader = exports.GitbookLoader = exports.CollegeConfidentialLoader = exports.PuppeteerWebBaseLoader = exports.CheerioWebBaseLoader = exports.BaseDocumentLoader = void 0;
|
|
4
4
|
/* #__PURE__ */ console.error("[WARN] Importing from 'langchain/document_loaders' is deprecated. Import from eg. 'langchain/document_loaders/fs/text' or 'langchain/document_loaders/web/cheerio' instead. See https://js.langchain.com/docs/getting-started/install#updating-from-0052 for upgrade instructions.");
|
|
5
5
|
var base_js_1 = require("./base.cjs");
|
|
6
6
|
Object.defineProperty(exports, "BaseDocumentLoader", { enumerable: true, get: function () { return base_js_1.BaseDocumentLoader; } });
|
|
@@ -36,7 +36,5 @@ var csv_js_1 = require("./fs/csv.cjs");
|
|
|
36
36
|
Object.defineProperty(exports, "CSVLoader", { enumerable: true, get: function () { return csv_js_1.CSVLoader; } });
|
|
37
37
|
var notion_js_1 = require("./fs/notion.cjs");
|
|
38
38
|
Object.defineProperty(exports, "NotionLoader", { enumerable: true, get: function () { return notion_js_1.NotionLoader; } });
|
|
39
|
-
var github_js_1 = require("./web/github.cjs");
|
|
40
|
-
Object.defineProperty(exports, "GithubRepoLoader", { enumerable: true, get: function () { return github_js_1.GithubRepoLoader; } });
|
|
41
39
|
var unstructured_js_1 = require("./fs/unstructured.cjs");
|
|
42
40
|
Object.defineProperty(exports, "UnstructuredLoader", { enumerable: true, get: function () { return unstructured_js_1.UnstructuredLoader; } });
|
|
@@ -15,5 +15,4 @@ export { TextLoader } from "./fs/text.js";
|
|
|
15
15
|
export { JSONLoader, JSONLinesLoader } from "./fs/json.js";
|
|
16
16
|
export { CSVLoader } from "./fs/csv.js";
|
|
17
17
|
export { NotionLoader } from "./fs/notion.js";
|
|
18
|
-
export { GithubRepoLoader, GithubRepoLoaderParams } from "./web/github.js";
|
|
19
18
|
export { UnstructuredLoader } from "./fs/unstructured.js";
|
|
@@ -15,5 +15,4 @@ export { TextLoader } from "./fs/text.js";
|
|
|
15
15
|
export { JSONLoader, JSONLinesLoader } from "./fs/json.js";
|
|
16
16
|
export { CSVLoader } from "./fs/csv.js";
|
|
17
17
|
export { NotionLoader } from "./fs/notion.js";
|
|
18
|
-
export { GithubRepoLoader } from "./web/github.js";
|
|
19
18
|
export { UnstructuredLoader } from "./fs/unstructured.js";
|
|
@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.GithubRepoLoader = void 0;
|
|
7
|
+
const ignore_1 = __importDefault(require("ignore"));
|
|
7
8
|
const binary_extensions_1 = __importDefault(require("binary-extensions"));
|
|
8
9
|
const document_js_1 = require("../../document.cjs");
|
|
9
10
|
const base_js_1 = require("../base.cjs");
|
|
@@ -17,7 +18,7 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
17
18
|
constructor(githubUrl, { accessToken = typeof process !== "undefined"
|
|
18
19
|
? // eslint-disable-next-line no-process-env
|
|
19
20
|
process.env?.GITHUB_ACCESS_TOKEN
|
|
20
|
-
: undefined, branch = "main", recursive = true, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], } = {}) {
|
|
21
|
+
: undefined, branch = "main", recursive = true, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], ignorePaths, } = {}) {
|
|
21
22
|
super();
|
|
22
23
|
Object.defineProperty(this, "owner", {
|
|
23
24
|
enumerable: true,
|
|
@@ -73,6 +74,12 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
73
74
|
writable: true,
|
|
74
75
|
value: void 0
|
|
75
76
|
});
|
|
77
|
+
Object.defineProperty(this, "ignore", {
|
|
78
|
+
enumerable: true,
|
|
79
|
+
configurable: true,
|
|
80
|
+
writable: true,
|
|
81
|
+
value: void 0
|
|
82
|
+
});
|
|
76
83
|
const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
|
|
77
84
|
this.owner = owner;
|
|
78
85
|
this.repo = repo;
|
|
@@ -82,6 +89,9 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
82
89
|
this.unknown = unknown;
|
|
83
90
|
this.accessToken = accessToken;
|
|
84
91
|
this.ignoreFiles = ignoreFiles;
|
|
92
|
+
if (ignorePaths) {
|
|
93
|
+
this.ignore = ignore_1.default.default().add(ignorePaths);
|
|
94
|
+
}
|
|
85
95
|
if (this.accessToken) {
|
|
86
96
|
this.headers = {
|
|
87
97
|
Authorization: `Bearer ${this.accessToken}`,
|
|
@@ -100,38 +110,43 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
100
110
|
await this.processDirectory(this.initialPath, documents);
|
|
101
111
|
return documents;
|
|
102
112
|
}
|
|
103
|
-
shouldIgnore(path) {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
async shouldIgnore(path, fileType) {
|
|
114
|
+
if (fileType !== "dir" && isBinaryPath(path)) {
|
|
115
|
+
return true;
|
|
116
|
+
}
|
|
117
|
+
if (this.ignore !== undefined) {
|
|
118
|
+
return this.ignore.ignores(path);
|
|
119
|
+
}
|
|
120
|
+
return (fileType !== "dir" &&
|
|
121
|
+
this.ignoreFiles.some((pattern) => {
|
|
122
|
+
if (typeof pattern === "string") {
|
|
123
|
+
return path === pattern;
|
|
124
|
+
}
|
|
125
|
+
try {
|
|
126
|
+
return pattern.test(path);
|
|
127
|
+
}
|
|
128
|
+
catch {
|
|
129
|
+
throw new Error(`Unknown ignore file pattern: ${pattern}`);
|
|
130
|
+
}
|
|
131
|
+
}));
|
|
115
132
|
}
|
|
116
133
|
async processDirectory(path, documents) {
|
|
117
134
|
try {
|
|
118
135
|
const files = await this.fetchRepoFiles(path);
|
|
119
136
|
for (const file of files) {
|
|
120
|
-
if (file.type
|
|
121
|
-
if (
|
|
122
|
-
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
else {
|
|
126
|
-
try {
|
|
127
|
-
if (!isBinaryPath(file.name) && !this.shouldIgnore(file.path)) {
|
|
137
|
+
if (!(await this.shouldIgnore(file.path, file.type))) {
|
|
138
|
+
if (file.type !== "dir") {
|
|
139
|
+
try {
|
|
128
140
|
const fileContent = await this.fetchFileContent(file);
|
|
129
141
|
const metadata = { source: file.path };
|
|
130
142
|
documents.push(new document_js_1.Document({ pageContent: fileContent, metadata }));
|
|
131
143
|
}
|
|
144
|
+
catch (e) {
|
|
145
|
+
this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
|
|
146
|
+
}
|
|
132
147
|
}
|
|
133
|
-
|
|
134
|
-
this.
|
|
148
|
+
else if (this.recursive) {
|
|
149
|
+
await this.processDirectory(file.path, documents);
|
|
135
150
|
}
|
|
136
151
|
}
|
|
137
152
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Ignore } from "ignore";
|
|
1
2
|
import { Document } from "../../document.js";
|
|
2
3
|
import { BaseDocumentLoader } from "../base.js";
|
|
3
4
|
import { UnknownHandling } from "../fs/directory.js";
|
|
@@ -7,6 +8,7 @@ export interface GithubRepoLoaderParams {
|
|
|
7
8
|
unknown?: UnknownHandling;
|
|
8
9
|
accessToken?: string;
|
|
9
10
|
ignoreFiles?: (string | RegExp)[];
|
|
11
|
+
ignorePaths?: string[];
|
|
10
12
|
}
|
|
11
13
|
export declare class GithubRepoLoader extends BaseDocumentLoader implements GithubRepoLoaderParams {
|
|
12
14
|
private readonly owner;
|
|
@@ -18,10 +20,11 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
|
|
|
18
20
|
unknown: UnknownHandling;
|
|
19
21
|
accessToken?: string;
|
|
20
22
|
ignoreFiles: (string | RegExp)[];
|
|
21
|
-
|
|
23
|
+
ignore?: Ignore;
|
|
24
|
+
constructor(githubUrl: string, { accessToken, branch, recursive, unknown, ignoreFiles, ignorePaths, }?: GithubRepoLoaderParams);
|
|
22
25
|
private extractOwnerAndRepoAndPath;
|
|
23
26
|
load(): Promise<Document[]>;
|
|
24
|
-
|
|
27
|
+
protected shouldIgnore(path: string, fileType: string): Promise<boolean>;
|
|
25
28
|
private processDirectory;
|
|
26
29
|
private fetchRepoFiles;
|
|
27
30
|
private fetchFileContent;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import ignore from "ignore";
|
|
1
2
|
import binaryExtensions from "binary-extensions";
|
|
2
3
|
import { Document } from "../../document.js";
|
|
3
4
|
import { BaseDocumentLoader } from "../base.js";
|
|
@@ -11,7 +12,7 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
11
12
|
constructor(githubUrl, { accessToken = typeof process !== "undefined"
|
|
12
13
|
? // eslint-disable-next-line no-process-env
|
|
13
14
|
process.env?.GITHUB_ACCESS_TOKEN
|
|
14
|
-
: undefined, branch = "main", recursive = true, unknown = UnknownHandling.Warn, ignoreFiles = [], } = {}) {
|
|
15
|
+
: undefined, branch = "main", recursive = true, unknown = UnknownHandling.Warn, ignoreFiles = [], ignorePaths, } = {}) {
|
|
15
16
|
super();
|
|
16
17
|
Object.defineProperty(this, "owner", {
|
|
17
18
|
enumerable: true,
|
|
@@ -67,6 +68,12 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
67
68
|
writable: true,
|
|
68
69
|
value: void 0
|
|
69
70
|
});
|
|
71
|
+
Object.defineProperty(this, "ignore", {
|
|
72
|
+
enumerable: true,
|
|
73
|
+
configurable: true,
|
|
74
|
+
writable: true,
|
|
75
|
+
value: void 0
|
|
76
|
+
});
|
|
70
77
|
const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
|
|
71
78
|
this.owner = owner;
|
|
72
79
|
this.repo = repo;
|
|
@@ -76,6 +83,9 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
76
83
|
this.unknown = unknown;
|
|
77
84
|
this.accessToken = accessToken;
|
|
78
85
|
this.ignoreFiles = ignoreFiles;
|
|
86
|
+
if (ignorePaths) {
|
|
87
|
+
this.ignore = ignore.default().add(ignorePaths);
|
|
88
|
+
}
|
|
79
89
|
if (this.accessToken) {
|
|
80
90
|
this.headers = {
|
|
81
91
|
Authorization: `Bearer ${this.accessToken}`,
|
|
@@ -94,38 +104,43 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
94
104
|
await this.processDirectory(this.initialPath, documents);
|
|
95
105
|
return documents;
|
|
96
106
|
}
|
|
97
|
-
shouldIgnore(path) {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
async shouldIgnore(path, fileType) {
|
|
108
|
+
if (fileType !== "dir" && isBinaryPath(path)) {
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
if (this.ignore !== undefined) {
|
|
112
|
+
return this.ignore.ignores(path);
|
|
113
|
+
}
|
|
114
|
+
return (fileType !== "dir" &&
|
|
115
|
+
this.ignoreFiles.some((pattern) => {
|
|
116
|
+
if (typeof pattern === "string") {
|
|
117
|
+
return path === pattern;
|
|
118
|
+
}
|
|
119
|
+
try {
|
|
120
|
+
return pattern.test(path);
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
throw new Error(`Unknown ignore file pattern: ${pattern}`);
|
|
124
|
+
}
|
|
125
|
+
}));
|
|
109
126
|
}
|
|
110
127
|
async processDirectory(path, documents) {
|
|
111
128
|
try {
|
|
112
129
|
const files = await this.fetchRepoFiles(path);
|
|
113
130
|
for (const file of files) {
|
|
114
|
-
if (file.type
|
|
115
|
-
if (
|
|
116
|
-
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
else {
|
|
120
|
-
try {
|
|
121
|
-
if (!isBinaryPath(file.name) && !this.shouldIgnore(file.path)) {
|
|
131
|
+
if (!(await this.shouldIgnore(file.path, file.type))) {
|
|
132
|
+
if (file.type !== "dir") {
|
|
133
|
+
try {
|
|
122
134
|
const fileContent = await this.fetchFileContent(file);
|
|
123
135
|
const metadata = { source: file.path };
|
|
124
136
|
documents.push(new Document({ pageContent: fileContent, metadata }));
|
|
125
137
|
}
|
|
138
|
+
catch (e) {
|
|
139
|
+
this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
|
|
140
|
+
}
|
|
126
141
|
}
|
|
127
|
-
|
|
128
|
-
this.
|
|
142
|
+
else if (this.recursive) {
|
|
143
|
+
await this.processDirectory(file.path, documents);
|
|
129
144
|
}
|
|
130
145
|
}
|
|
131
146
|
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GoogleVertexAI = void 0;
|
|
4
|
+
const base_js_1 = require("./base.cjs");
|
|
5
|
+
const googlevertexai_connection_js_1 = require("../util/googlevertexai-connection.cjs");
|
|
6
|
+
/**
|
|
7
|
+
* Enables calls to the Google Cloud's Vertex AI API to access
|
|
8
|
+
* Large Language Models.
|
|
9
|
+
*
|
|
10
|
+
* To use, you will need to have one of the following authentication
|
|
11
|
+
* methods in place:
|
|
12
|
+
* - You are logged into an account permitted to the Google Cloud project
|
|
13
|
+
* using Vertex AI.
|
|
14
|
+
* - You are running this on a machine using a service account permitted to
|
|
15
|
+
* the Google Cloud project using Vertex AI.
|
|
16
|
+
* - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the
|
|
17
|
+
* path of a credentials file for a service account permitted to the
|
|
18
|
+
* Google Cloud project using Vertex AI.
|
|
19
|
+
*/
|
|
20
|
+
class GoogleVertexAI extends base_js_1.BaseLLM {
|
|
21
|
+
constructor(fields) {
|
|
22
|
+
super(fields ?? {});
|
|
23
|
+
Object.defineProperty(this, "model", {
|
|
24
|
+
enumerable: true,
|
|
25
|
+
configurable: true,
|
|
26
|
+
writable: true,
|
|
27
|
+
value: "text-bison"
|
|
28
|
+
});
|
|
29
|
+
Object.defineProperty(this, "temperature", {
|
|
30
|
+
enumerable: true,
|
|
31
|
+
configurable: true,
|
|
32
|
+
writable: true,
|
|
33
|
+
value: 0.7
|
|
34
|
+
});
|
|
35
|
+
Object.defineProperty(this, "maxOutputTokens", {
|
|
36
|
+
enumerable: true,
|
|
37
|
+
configurable: true,
|
|
38
|
+
writable: true,
|
|
39
|
+
value: 1024
|
|
40
|
+
});
|
|
41
|
+
Object.defineProperty(this, "topP", {
|
|
42
|
+
enumerable: true,
|
|
43
|
+
configurable: true,
|
|
44
|
+
writable: true,
|
|
45
|
+
value: 0.8
|
|
46
|
+
});
|
|
47
|
+
Object.defineProperty(this, "topK", {
|
|
48
|
+
enumerable: true,
|
|
49
|
+
configurable: true,
|
|
50
|
+
writable: true,
|
|
51
|
+
value: 40
|
|
52
|
+
});
|
|
53
|
+
Object.defineProperty(this, "connection", {
|
|
54
|
+
enumerable: true,
|
|
55
|
+
configurable: true,
|
|
56
|
+
writable: true,
|
|
57
|
+
value: void 0
|
|
58
|
+
});
|
|
59
|
+
this.model = fields?.model ?? this.model;
|
|
60
|
+
this.temperature = fields?.temperature ?? this.temperature;
|
|
61
|
+
this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens;
|
|
62
|
+
this.topP = fields?.topP ?? this.topP;
|
|
63
|
+
this.topK = fields?.topK ?? this.topK;
|
|
64
|
+
this.connection = new googlevertexai_connection_js_1.GoogleVertexAIConnection({ ...fields, ...this }, this.caller);
|
|
65
|
+
}
|
|
66
|
+
_llmType() {
|
|
67
|
+
return "googlevertexai";
|
|
68
|
+
}
|
|
69
|
+
async _generate(prompts, options) {
|
|
70
|
+
const generations = await Promise.all(prompts.map((prompt) => this._generatePrompt(prompt, options)));
|
|
71
|
+
return { generations };
|
|
72
|
+
}
|
|
73
|
+
async _generatePrompt(prompt, options) {
|
|
74
|
+
const instance = this.formatInstance(prompt);
|
|
75
|
+
const parameters = {
|
|
76
|
+
temperature: this.temperature,
|
|
77
|
+
topK: this.topK,
|
|
78
|
+
topP: this.topP,
|
|
79
|
+
maxOutputTokens: this.maxOutputTokens,
|
|
80
|
+
};
|
|
81
|
+
const result = await this.connection.request([instance], parameters, options);
|
|
82
|
+
const prediction = this.extractPredictionFromResponse(result);
|
|
83
|
+
return [
|
|
84
|
+
{
|
|
85
|
+
text: prediction.content,
|
|
86
|
+
generationInfo: prediction,
|
|
87
|
+
},
|
|
88
|
+
];
|
|
89
|
+
}
|
|
90
|
+
formatInstance(prompt) {
|
|
91
|
+
return { content: prompt };
|
|
92
|
+
}
|
|
93
|
+
extractPredictionFromResponse(result) {
|
|
94
|
+
return result?.data?.predictions[0];
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
exports.GoogleVertexAI = GoogleVertexAI;
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { BaseLLM } from "./base.js";
|
|
2
|
+
import { Generation, LLMResult } from "../schema/index.js";
|
|
3
|
+
import { GoogleVertexAIBaseLLMInput, GoogleVertexAIBasePrediction, GoogleVertexAILLMResponse } from "../types/googlevertexai-types.js";
|
|
4
|
+
export interface GoogleVertexAITextInput extends GoogleVertexAIBaseLLMInput {
|
|
5
|
+
}
|
|
6
|
+
interface GoogleVertexAILLMTextInstance {
|
|
7
|
+
content: string;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Models the data returned from the API call
|
|
11
|
+
*/
|
|
12
|
+
interface TextPrediction extends GoogleVertexAIBasePrediction {
|
|
13
|
+
content: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Enables calls to the Google Cloud's Vertex AI API to access
|
|
17
|
+
* Large Language Models.
|
|
18
|
+
*
|
|
19
|
+
* To use, you will need to have one of the following authentication
|
|
20
|
+
* methods in place:
|
|
21
|
+
* - You are logged into an account permitted to the Google Cloud project
|
|
22
|
+
* using Vertex AI.
|
|
23
|
+
* - You are running this on a machine using a service account permitted to
|
|
24
|
+
* the Google Cloud project using Vertex AI.
|
|
25
|
+
* - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the
|
|
26
|
+
* path of a credentials file for a service account permitted to the
|
|
27
|
+
* Google Cloud project using Vertex AI.
|
|
28
|
+
*/
|
|
29
|
+
export declare class GoogleVertexAI extends BaseLLM implements GoogleVertexAITextInput {
|
|
30
|
+
model: string;
|
|
31
|
+
temperature: number;
|
|
32
|
+
maxOutputTokens: number;
|
|
33
|
+
topP: number;
|
|
34
|
+
topK: number;
|
|
35
|
+
private connection;
|
|
36
|
+
constructor(fields?: GoogleVertexAITextInput);
|
|
37
|
+
_llmType(): string;
|
|
38
|
+
_generate(prompts: string[], options: this["ParsedCallOptions"]): Promise<LLMResult>;
|
|
39
|
+
_generatePrompt(prompt: string, options: this["ParsedCallOptions"]): Promise<Generation[]>;
|
|
40
|
+
formatInstance(prompt: string): GoogleVertexAILLMTextInstance;
|
|
41
|
+
extractPredictionFromResponse(result: GoogleVertexAILLMResponse<TextPrediction>): TextPrediction;
|
|
42
|
+
}
|
|
43
|
+
export {};
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { BaseLLM } from "./base.js";
|
|
2
|
+
import { GoogleVertexAIConnection } from "../util/googlevertexai-connection.js";
|
|
3
|
+
/**
|
|
4
|
+
* Enables calls to the Google Cloud's Vertex AI API to access
|
|
5
|
+
* Large Language Models.
|
|
6
|
+
*
|
|
7
|
+
* To use, you will need to have one of the following authentication
|
|
8
|
+
* methods in place:
|
|
9
|
+
* - You are logged into an account permitted to the Google Cloud project
|
|
10
|
+
* using Vertex AI.
|
|
11
|
+
* - You are running this on a machine using a service account permitted to
|
|
12
|
+
* the Google Cloud project using Vertex AI.
|
|
13
|
+
* - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the
|
|
14
|
+
* path of a credentials file for a service account permitted to the
|
|
15
|
+
* Google Cloud project using Vertex AI.
|
|
16
|
+
*/
|
|
17
|
+
export class GoogleVertexAI extends BaseLLM {
|
|
18
|
+
constructor(fields) {
|
|
19
|
+
super(fields ?? {});
|
|
20
|
+
Object.defineProperty(this, "model", {
|
|
21
|
+
enumerable: true,
|
|
22
|
+
configurable: true,
|
|
23
|
+
writable: true,
|
|
24
|
+
value: "text-bison"
|
|
25
|
+
});
|
|
26
|
+
Object.defineProperty(this, "temperature", {
|
|
27
|
+
enumerable: true,
|
|
28
|
+
configurable: true,
|
|
29
|
+
writable: true,
|
|
30
|
+
value: 0.7
|
|
31
|
+
});
|
|
32
|
+
Object.defineProperty(this, "maxOutputTokens", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: 1024
|
|
37
|
+
});
|
|
38
|
+
Object.defineProperty(this, "topP", {
|
|
39
|
+
enumerable: true,
|
|
40
|
+
configurable: true,
|
|
41
|
+
writable: true,
|
|
42
|
+
value: 0.8
|
|
43
|
+
});
|
|
44
|
+
Object.defineProperty(this, "topK", {
|
|
45
|
+
enumerable: true,
|
|
46
|
+
configurable: true,
|
|
47
|
+
writable: true,
|
|
48
|
+
value: 40
|
|
49
|
+
});
|
|
50
|
+
Object.defineProperty(this, "connection", {
|
|
51
|
+
enumerable: true,
|
|
52
|
+
configurable: true,
|
|
53
|
+
writable: true,
|
|
54
|
+
value: void 0
|
|
55
|
+
});
|
|
56
|
+
this.model = fields?.model ?? this.model;
|
|
57
|
+
this.temperature = fields?.temperature ?? this.temperature;
|
|
58
|
+
this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens;
|
|
59
|
+
this.topP = fields?.topP ?? this.topP;
|
|
60
|
+
this.topK = fields?.topK ?? this.topK;
|
|
61
|
+
this.connection = new GoogleVertexAIConnection({ ...fields, ...this }, this.caller);
|
|
62
|
+
}
|
|
63
|
+
_llmType() {
|
|
64
|
+
return "googlevertexai";
|
|
65
|
+
}
|
|
66
|
+
async _generate(prompts, options) {
|
|
67
|
+
const generations = await Promise.all(prompts.map((prompt) => this._generatePrompt(prompt, options)));
|
|
68
|
+
return { generations };
|
|
69
|
+
}
|
|
70
|
+
async _generatePrompt(prompt, options) {
|
|
71
|
+
const instance = this.formatInstance(prompt);
|
|
72
|
+
const parameters = {
|
|
73
|
+
temperature: this.temperature,
|
|
74
|
+
topK: this.topK,
|
|
75
|
+
topP: this.topP,
|
|
76
|
+
maxOutputTokens: this.maxOutputTokens,
|
|
77
|
+
};
|
|
78
|
+
const result = await this.connection.request([instance], parameters, options);
|
|
79
|
+
const prediction = this.extractPredictionFromResponse(result);
|
|
80
|
+
return [
|
|
81
|
+
{
|
|
82
|
+
text: prediction.content,
|
|
83
|
+
generationInfo: prediction,
|
|
84
|
+
},
|
|
85
|
+
];
|
|
86
|
+
}
|
|
87
|
+
formatInstance(prompt) {
|
|
88
|
+
return { content: prompt };
|
|
89
|
+
}
|
|
90
|
+
extractPredictionFromResponse(result) {
|
|
91
|
+
return result?.data?.predictions[0];
|
|
92
|
+
}
|
|
93
|
+
}
|
package/dist/text_splitter.cjs
CHANGED
|
@@ -25,14 +25,17 @@ class TextSplitter {
|
|
|
25
25
|
}
|
|
26
26
|
async createDocuments(texts,
|
|
27
27
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
28
|
-
metadatas = []) {
|
|
28
|
+
metadatas = [], chunkHeaderOptions = {}) {
|
|
29
|
+
// if no metadata is provided, we create an empty one for each text
|
|
29
30
|
const _metadatas = metadatas.length > 0 ? metadatas : new Array(texts.length).fill({});
|
|
31
|
+
const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions;
|
|
30
32
|
const documents = new Array();
|
|
31
33
|
for (let i = 0; i < texts.length; i += 1) {
|
|
32
34
|
const text = texts[i];
|
|
33
35
|
let lineCounterIndex = 1;
|
|
34
36
|
let prevChunk = null;
|
|
35
37
|
for (const chunk of await this.splitText(text)) {
|
|
38
|
+
let pageContent = chunkHeader;
|
|
36
39
|
// we need to count the \n that are in the text before getting removed by the splitting
|
|
37
40
|
let numberOfIntermediateNewLines = 0;
|
|
38
41
|
if (prevChunk) {
|
|
@@ -40,6 +43,9 @@ class TextSplitter {
|
|
|
40
43
|
const indexEndPrevChunk = text.indexOf(prevChunk) + prevChunk.length;
|
|
41
44
|
const removedNewlinesFromSplittingText = text.slice(indexEndPrevChunk, indexChunk);
|
|
42
45
|
numberOfIntermediateNewLines = (removedNewlinesFromSplittingText.match(/\n/g) || []).length;
|
|
46
|
+
if (appendChunkOverlapHeader) {
|
|
47
|
+
pageContent += chunkOverlapHeader;
|
|
48
|
+
}
|
|
43
49
|
}
|
|
44
50
|
lineCounterIndex += numberOfIntermediateNewLines;
|
|
45
51
|
const newLinesCount = (chunk.match(/\n/g) || []).length;
|
|
@@ -54,8 +60,9 @@ class TextSplitter {
|
|
|
54
60
|
..._metadatas[i],
|
|
55
61
|
loc,
|
|
56
62
|
};
|
|
63
|
+
pageContent += chunk;
|
|
57
64
|
documents.push(new document_js_1.Document({
|
|
58
|
-
pageContent
|
|
65
|
+
pageContent,
|
|
59
66
|
metadata: metadataWithLinesNumber,
|
|
60
67
|
}));
|
|
61
68
|
lineCounterIndex += newLinesCount;
|
|
@@ -64,11 +71,11 @@ class TextSplitter {
|
|
|
64
71
|
}
|
|
65
72
|
return documents;
|
|
66
73
|
}
|
|
67
|
-
async splitDocuments(documents) {
|
|
74
|
+
async splitDocuments(documents, chunkHeaderOptions = {}) {
|
|
68
75
|
const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined);
|
|
69
76
|
const texts = selectedDocuments.map((doc) => doc.pageContent);
|
|
70
77
|
const metadatas = selectedDocuments.map((doc) => doc.metadata);
|
|
71
|
-
return this.createDocuments(texts, metadatas);
|
|
78
|
+
return this.createDocuments(texts, metadatas, chunkHeaderOptions);
|
|
72
79
|
}
|
|
73
80
|
joinDocs(docs, separator) {
|
|
74
81
|
const text = docs.join(separator).trim();
|
package/dist/text_splitter.d.ts
CHANGED
|
@@ -4,13 +4,18 @@ export interface TextSplitterParams {
|
|
|
4
4
|
chunkSize: number;
|
|
5
5
|
chunkOverlap: number;
|
|
6
6
|
}
|
|
7
|
+
export type TextSplitterChunkHeaderOptions = {
|
|
8
|
+
chunkHeader?: string;
|
|
9
|
+
chunkOverlapHeader?: string;
|
|
10
|
+
appendChunkOverlapHeader?: boolean;
|
|
11
|
+
};
|
|
7
12
|
export declare abstract class TextSplitter implements TextSplitterParams {
|
|
8
13
|
chunkSize: number;
|
|
9
14
|
chunkOverlap: number;
|
|
10
15
|
constructor(fields?: Partial<TextSplitterParams>);
|
|
11
16
|
abstract splitText(text: string): Promise<string[]>;
|
|
12
|
-
createDocuments(texts: string[], metadatas?: Record<string, any>[]): Promise<Document[]>;
|
|
13
|
-
splitDocuments(documents: Document[]): Promise<Document[]>;
|
|
17
|
+
createDocuments(texts: string[], metadatas?: Record<string, any>[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise<Document[]>;
|
|
18
|
+
splitDocuments(documents: Document[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise<Document[]>;
|
|
14
19
|
private joinDocs;
|
|
15
20
|
mergeSplits(splits: string[], separator: string): string[];
|
|
16
21
|
}
|
package/dist/text_splitter.js
CHANGED
|
@@ -22,14 +22,17 @@ export class TextSplitter {
|
|
|
22
22
|
}
|
|
23
23
|
async createDocuments(texts,
|
|
24
24
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
25
|
-
metadatas = []) {
|
|
25
|
+
metadatas = [], chunkHeaderOptions = {}) {
|
|
26
|
+
// if no metadata is provided, we create an empty one for each text
|
|
26
27
|
const _metadatas = metadatas.length > 0 ? metadatas : new Array(texts.length).fill({});
|
|
28
|
+
const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions;
|
|
27
29
|
const documents = new Array();
|
|
28
30
|
for (let i = 0; i < texts.length; i += 1) {
|
|
29
31
|
const text = texts[i];
|
|
30
32
|
let lineCounterIndex = 1;
|
|
31
33
|
let prevChunk = null;
|
|
32
34
|
for (const chunk of await this.splitText(text)) {
|
|
35
|
+
let pageContent = chunkHeader;
|
|
33
36
|
// we need to count the \n that are in the text before getting removed by the splitting
|
|
34
37
|
let numberOfIntermediateNewLines = 0;
|
|
35
38
|
if (prevChunk) {
|
|
@@ -37,6 +40,9 @@ export class TextSplitter {
|
|
|
37
40
|
const indexEndPrevChunk = text.indexOf(prevChunk) + prevChunk.length;
|
|
38
41
|
const removedNewlinesFromSplittingText = text.slice(indexEndPrevChunk, indexChunk);
|
|
39
42
|
numberOfIntermediateNewLines = (removedNewlinesFromSplittingText.match(/\n/g) || []).length;
|
|
43
|
+
if (appendChunkOverlapHeader) {
|
|
44
|
+
pageContent += chunkOverlapHeader;
|
|
45
|
+
}
|
|
40
46
|
}
|
|
41
47
|
lineCounterIndex += numberOfIntermediateNewLines;
|
|
42
48
|
const newLinesCount = (chunk.match(/\n/g) || []).length;
|
|
@@ -51,8 +57,9 @@ export class TextSplitter {
|
|
|
51
57
|
..._metadatas[i],
|
|
52
58
|
loc,
|
|
53
59
|
};
|
|
60
|
+
pageContent += chunk;
|
|
54
61
|
documents.push(new Document({
|
|
55
|
-
pageContent
|
|
62
|
+
pageContent,
|
|
56
63
|
metadata: metadataWithLinesNumber,
|
|
57
64
|
}));
|
|
58
65
|
lineCounterIndex += newLinesCount;
|
|
@@ -61,11 +68,11 @@ export class TextSplitter {
|
|
|
61
68
|
}
|
|
62
69
|
return documents;
|
|
63
70
|
}
|
|
64
|
-
async splitDocuments(documents) {
|
|
71
|
+
async splitDocuments(documents, chunkHeaderOptions = {}) {
|
|
65
72
|
const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined);
|
|
66
73
|
const texts = selectedDocuments.map((doc) => doc.pageContent);
|
|
67
74
|
const metadatas = selectedDocuments.map((doc) => doc.metadata);
|
|
68
|
-
return this.createDocuments(texts, metadatas);
|
|
75
|
+
return this.createDocuments(texts, metadatas, chunkHeaderOptions);
|
|
69
76
|
}
|
|
70
77
|
joinDocs(docs, separator) {
|
|
71
78
|
const text = docs.join(separator).trim();
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { BaseLLMParams } from "../llms/index.js";
|
|
2
|
+
export interface GoogleVertexAIConnectionParams {
|
|
3
|
+
/** Hostname for the API call */
|
|
4
|
+
endpoint?: string;
|
|
5
|
+
/** Region where the LLM is stored */
|
|
6
|
+
location?: string;
|
|
7
|
+
/** Model to use */
|
|
8
|
+
model?: string;
|
|
9
|
+
}
|
|
10
|
+
export interface GoogleVertexAIModelParams {
|
|
11
|
+
/** Sampling temperature to use */
|
|
12
|
+
temperature?: number;
|
|
13
|
+
/**
|
|
14
|
+
* Maximum number of tokens to generate in the completion.
|
|
15
|
+
*/
|
|
16
|
+
maxOutputTokens?: number;
|
|
17
|
+
/**
|
|
18
|
+
* Top-p changes how the model selects tokens for output.
|
|
19
|
+
*
|
|
20
|
+
* Tokens are selected from most probable to least until the sum
|
|
21
|
+
* of their probabilities equals the top-p value.
|
|
22
|
+
*
|
|
23
|
+
* For example, if tokens A, B, and C have a probability of
|
|
24
|
+
* .3, .2, and .1 and the top-p value is .5, then the model will
|
|
25
|
+
* select either A or B as the next token (using temperature).
|
|
26
|
+
*/
|
|
27
|
+
topP?: number;
|
|
28
|
+
/**
|
|
29
|
+
* Top-k changes how the model selects tokens for output.
|
|
30
|
+
*
|
|
31
|
+
* A top-k of 1 means the selected token is the most probable among
|
|
32
|
+
* all tokens in the model’s vocabulary (also called greedy decoding),
|
|
33
|
+
* while a top-k of 3 means that the next token is selected from
|
|
34
|
+
* among the 3 most probable tokens (using temperature).
|
|
35
|
+
*/
|
|
36
|
+
topK?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface GoogleVertexAIBaseLLMInput extends BaseLLMParams, GoogleVertexAIConnectionParams, GoogleVertexAIModelParams {
|
|
39
|
+
}
|
|
40
|
+
export interface GoogleVertexAIBasePrediction {
|
|
41
|
+
safetyAttributes?: any;
|
|
42
|
+
}
|
|
43
|
+
export interface GoogleVertexAILLMResponse<PredictionType extends GoogleVertexAIBasePrediction> {
|
|
44
|
+
data: {
|
|
45
|
+
predictions: PredictionType[];
|
|
46
|
+
};
|
|
47
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GoogleVertexAIConnection = void 0;
|
|
4
|
+
const google_auth_library_1 = require("google-auth-library");
|
|
5
|
+
class GoogleVertexAIConnection {
|
|
6
|
+
constructor(fields, caller) {
|
|
7
|
+
Object.defineProperty(this, "caller", {
|
|
8
|
+
enumerable: true,
|
|
9
|
+
configurable: true,
|
|
10
|
+
writable: true,
|
|
11
|
+
value: void 0
|
|
12
|
+
});
|
|
13
|
+
Object.defineProperty(this, "endpoint", {
|
|
14
|
+
enumerable: true,
|
|
15
|
+
configurable: true,
|
|
16
|
+
writable: true,
|
|
17
|
+
value: "us-central1-aiplatform.googleapis.com"
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(this, "location", {
|
|
20
|
+
enumerable: true,
|
|
21
|
+
configurable: true,
|
|
22
|
+
writable: true,
|
|
23
|
+
value: "us-central1"
|
|
24
|
+
});
|
|
25
|
+
Object.defineProperty(this, "model", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
configurable: true,
|
|
28
|
+
writable: true,
|
|
29
|
+
value: void 0
|
|
30
|
+
});
|
|
31
|
+
Object.defineProperty(this, "auth", {
|
|
32
|
+
enumerable: true,
|
|
33
|
+
configurable: true,
|
|
34
|
+
writable: true,
|
|
35
|
+
value: void 0
|
|
36
|
+
});
|
|
37
|
+
this.caller = caller;
|
|
38
|
+
this.endpoint = fields?.endpoint ?? this.endpoint;
|
|
39
|
+
this.location = fields?.location ?? this.location;
|
|
40
|
+
this.model = fields?.model ?? this.model;
|
|
41
|
+
this.auth = new google_auth_library_1.GoogleAuth({
|
|
42
|
+
scopes: "https://www.googleapis.com/auth/cloud-platform",
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
async request(instances, parameters, options) {
|
|
46
|
+
const client = await this.auth.getClient();
|
|
47
|
+
const projectId = await this.auth.getProjectId();
|
|
48
|
+
const url = `https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:predict`;
|
|
49
|
+
const method = "POST";
|
|
50
|
+
const data = {
|
|
51
|
+
instances,
|
|
52
|
+
parameters,
|
|
53
|
+
};
|
|
54
|
+
const opts = {
|
|
55
|
+
url,
|
|
56
|
+
method,
|
|
57
|
+
data,
|
|
58
|
+
};
|
|
59
|
+
async function _request() {
|
|
60
|
+
return client.request(opts);
|
|
61
|
+
}
|
|
62
|
+
const response = await this.caller.callWithOptions({ signal: options.signal }, _request.bind(client));
|
|
63
|
+
return response;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
exports.GoogleVertexAIConnection = GoogleVertexAIConnection;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { GoogleAuth } from "google-auth-library";
|
|
2
|
+
import { BaseLanguageModelCallOptions } from "../base_language/index.js";
|
|
3
|
+
import { AsyncCaller } from "./async_caller.js";
|
|
4
|
+
import { GoogleVertexAIBasePrediction, GoogleVertexAIConnectionParams, GoogleVertexAILLMResponse, GoogleVertexAIModelParams } from "../types/googlevertexai-types.js";
|
|
5
|
+
export declare class GoogleVertexAIConnection<CallOptions extends BaseLanguageModelCallOptions, InstanceType, PredictionType extends GoogleVertexAIBasePrediction> implements GoogleVertexAIConnectionParams {
|
|
6
|
+
caller: AsyncCaller;
|
|
7
|
+
endpoint: string;
|
|
8
|
+
location: string;
|
|
9
|
+
model: string;
|
|
10
|
+
auth: GoogleAuth;
|
|
11
|
+
constructor(fields: GoogleVertexAIConnectionParams | undefined, caller: AsyncCaller);
|
|
12
|
+
request(instances: [InstanceType], parameters: GoogleVertexAIModelParams, options: CallOptions): Promise<GoogleVertexAILLMResponse<PredictionType>>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { GoogleAuth } from "google-auth-library";
|
|
2
|
+
export class GoogleVertexAIConnection {
|
|
3
|
+
constructor(fields, caller) {
|
|
4
|
+
Object.defineProperty(this, "caller", {
|
|
5
|
+
enumerable: true,
|
|
6
|
+
configurable: true,
|
|
7
|
+
writable: true,
|
|
8
|
+
value: void 0
|
|
9
|
+
});
|
|
10
|
+
Object.defineProperty(this, "endpoint", {
|
|
11
|
+
enumerable: true,
|
|
12
|
+
configurable: true,
|
|
13
|
+
writable: true,
|
|
14
|
+
value: "us-central1-aiplatform.googleapis.com"
|
|
15
|
+
});
|
|
16
|
+
Object.defineProperty(this, "location", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: "us-central1"
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "model", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: void 0
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "auth", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: void 0
|
|
33
|
+
});
|
|
34
|
+
this.caller = caller;
|
|
35
|
+
this.endpoint = fields?.endpoint ?? this.endpoint;
|
|
36
|
+
this.location = fields?.location ?? this.location;
|
|
37
|
+
this.model = fields?.model ?? this.model;
|
|
38
|
+
this.auth = new GoogleAuth({
|
|
39
|
+
scopes: "https://www.googleapis.com/auth/cloud-platform",
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
async request(instances, parameters, options) {
|
|
43
|
+
const client = await this.auth.getClient();
|
|
44
|
+
const projectId = await this.auth.getProjectId();
|
|
45
|
+
const url = `https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:predict`;
|
|
46
|
+
const method = "POST";
|
|
47
|
+
const data = {
|
|
48
|
+
instances,
|
|
49
|
+
parameters,
|
|
50
|
+
};
|
|
51
|
+
const opts = {
|
|
52
|
+
url,
|
|
53
|
+
method,
|
|
54
|
+
data,
|
|
55
|
+
};
|
|
56
|
+
async function _request() {
|
|
57
|
+
return client.request(opts);
|
|
58
|
+
}
|
|
59
|
+
const response = await this.caller.callWithOptions({ signal: options.signal }, _request.bind(client));
|
|
60
|
+
return response;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
@@ -60,6 +60,12 @@ class Chroma extends base_js_1.VectorStore {
|
|
|
60
60
|
writable: true,
|
|
61
61
|
value: void 0
|
|
62
62
|
});
|
|
63
|
+
Object.defineProperty(this, "filter", {
|
|
64
|
+
enumerable: true,
|
|
65
|
+
configurable: true,
|
|
66
|
+
writable: true,
|
|
67
|
+
value: void 0
|
|
68
|
+
});
|
|
63
69
|
this.numDimensions = args.numDimensions;
|
|
64
70
|
this.embeddings = embeddings;
|
|
65
71
|
this.collectionName = ensureCollectionName(args.collectionName);
|
|
@@ -69,6 +75,7 @@ class Chroma extends base_js_1.VectorStore {
|
|
|
69
75
|
else if ("url" in args) {
|
|
70
76
|
this.url = args.url || "http://localhost:8000";
|
|
71
77
|
}
|
|
78
|
+
this.filter = args.filter;
|
|
72
79
|
}
|
|
73
80
|
async addDocuments(documents) {
|
|
74
81
|
const texts = documents.map(({ pageContent }) => pageContent);
|
|
@@ -78,9 +85,16 @@ class Chroma extends base_js_1.VectorStore {
|
|
|
78
85
|
if (!this.collection) {
|
|
79
86
|
if (!this.index) {
|
|
80
87
|
const { ChromaClient } = await Chroma.imports();
|
|
81
|
-
this.index = new ChromaClient(this.url);
|
|
88
|
+
this.index = new ChromaClient({ path: this.url });
|
|
89
|
+
}
|
|
90
|
+
try {
|
|
91
|
+
this.collection = await this.index.getOrCreateCollection({
|
|
92
|
+
name: this.collectionName,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
catch (err) {
|
|
96
|
+
throw new Error(`Chroma getOrCreateCollection error: ${err}`);
|
|
82
97
|
}
|
|
83
|
-
this.collection = await this.index.getOrCreateCollection(this.collectionName);
|
|
84
98
|
}
|
|
85
99
|
return this.collection;
|
|
86
100
|
}
|
|
@@ -99,13 +113,26 @@ class Chroma extends base_js_1.VectorStore {
|
|
|
99
113
|
}
|
|
100
114
|
const collection = await this.ensureCollection();
|
|
101
115
|
const docstoreSize = await collection.count();
|
|
102
|
-
await collection.add(
|
|
116
|
+
await collection.add({
|
|
117
|
+
ids: Array.from({ length: vectors.length }, (_, i) => (docstoreSize + i).toString()),
|
|
118
|
+
embeddings: vectors,
|
|
119
|
+
metadatas: documents.map(({ metadata }) => metadata),
|
|
120
|
+
documents: documents.map(({ pageContent }) => pageContent),
|
|
121
|
+
});
|
|
103
122
|
}
|
|
104
|
-
async similaritySearchVectorWithScore(query, k) {
|
|
123
|
+
async similaritySearchVectorWithScore(query, k, filter) {
|
|
124
|
+
if (filter && this.filter) {
|
|
125
|
+
throw new Error("cannot provide both `filter` and `this.filter`");
|
|
126
|
+
}
|
|
127
|
+
const _filter = filter ?? this.filter;
|
|
105
128
|
const collection = await this.ensureCollection();
|
|
106
129
|
// similaritySearchVectorWithScore supports one query vector at a time
|
|
107
130
|
// chroma supports multiple query vectors at a time
|
|
108
|
-
const result = await collection.query(
|
|
131
|
+
const result = await collection.query({
|
|
132
|
+
query_embeddings: query,
|
|
133
|
+
n_results: k,
|
|
134
|
+
where: { ..._filter },
|
|
135
|
+
});
|
|
109
136
|
const { ids, distances, documents, metadatas } = result;
|
|
110
137
|
if (!ids || !distances || !documents || !metadatas) {
|
|
111
138
|
return [];
|
|
@@ -119,8 +146,8 @@ class Chroma extends base_js_1.VectorStore {
|
|
|
119
146
|
for (let i = 0; i < firstIds.length; i += 1) {
|
|
120
147
|
results.push([
|
|
121
148
|
new document_js_1.Document({
|
|
122
|
-
pageContent: firstDocuments[i],
|
|
123
|
-
metadata: firstMetadatas[i],
|
|
149
|
+
pageContent: firstDocuments?.[i] ?? "",
|
|
150
|
+
metadata: firstMetadatas?.[i] ?? {},
|
|
124
151
|
}),
|
|
125
152
|
firstDistances[i],
|
|
126
153
|
]);
|
|
@@ -6,22 +6,26 @@ export type ChromaLibArgs = {
|
|
|
6
6
|
url?: string;
|
|
7
7
|
numDimensions?: number;
|
|
8
8
|
collectionName?: string;
|
|
9
|
+
filter?: object;
|
|
9
10
|
} | {
|
|
10
11
|
index?: ChromaClientT;
|
|
11
12
|
numDimensions?: number;
|
|
12
13
|
collectionName?: string;
|
|
14
|
+
filter?: object;
|
|
13
15
|
};
|
|
14
16
|
export declare class Chroma extends VectorStore {
|
|
17
|
+
FilterType: object;
|
|
15
18
|
index?: ChromaClientT;
|
|
16
19
|
collection?: Collection;
|
|
17
20
|
collectionName: string;
|
|
18
21
|
numDimensions?: number;
|
|
19
22
|
url: string;
|
|
23
|
+
filter?: object;
|
|
20
24
|
constructor(embeddings: Embeddings, args: ChromaLibArgs);
|
|
21
25
|
addDocuments(documents: Document[]): Promise<void>;
|
|
22
26
|
ensureCollection(): Promise<Collection>;
|
|
23
27
|
addVectors(vectors: number[][], documents: Document[]): Promise<void>;
|
|
24
|
-
similaritySearchVectorWithScore(query: number[], k: number): Promise<[Document<Record<string, any>>, number][]>;
|
|
28
|
+
similaritySearchVectorWithScore(query: number[], k: number, filter?: this["FilterType"]): Promise<[Document<Record<string, any>>, number][]>;
|
|
25
29
|
static fromTexts(texts: string[], metadatas: object[] | object, embeddings: Embeddings, dbConfig: {
|
|
26
30
|
collectionName?: string;
|
|
27
31
|
url?: string;
|
|
@@ -34,6 +34,12 @@ export class Chroma extends VectorStore {
|
|
|
34
34
|
writable: true,
|
|
35
35
|
value: void 0
|
|
36
36
|
});
|
|
37
|
+
Object.defineProperty(this, "filter", {
|
|
38
|
+
enumerable: true,
|
|
39
|
+
configurable: true,
|
|
40
|
+
writable: true,
|
|
41
|
+
value: void 0
|
|
42
|
+
});
|
|
37
43
|
this.numDimensions = args.numDimensions;
|
|
38
44
|
this.embeddings = embeddings;
|
|
39
45
|
this.collectionName = ensureCollectionName(args.collectionName);
|
|
@@ -43,6 +49,7 @@ export class Chroma extends VectorStore {
|
|
|
43
49
|
else if ("url" in args) {
|
|
44
50
|
this.url = args.url || "http://localhost:8000";
|
|
45
51
|
}
|
|
52
|
+
this.filter = args.filter;
|
|
46
53
|
}
|
|
47
54
|
async addDocuments(documents) {
|
|
48
55
|
const texts = documents.map(({ pageContent }) => pageContent);
|
|
@@ -52,9 +59,16 @@ export class Chroma extends VectorStore {
|
|
|
52
59
|
if (!this.collection) {
|
|
53
60
|
if (!this.index) {
|
|
54
61
|
const { ChromaClient } = await Chroma.imports();
|
|
55
|
-
this.index = new ChromaClient(this.url);
|
|
62
|
+
this.index = new ChromaClient({ path: this.url });
|
|
63
|
+
}
|
|
64
|
+
try {
|
|
65
|
+
this.collection = await this.index.getOrCreateCollection({
|
|
66
|
+
name: this.collectionName,
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
catch (err) {
|
|
70
|
+
throw new Error(`Chroma getOrCreateCollection error: ${err}`);
|
|
56
71
|
}
|
|
57
|
-
this.collection = await this.index.getOrCreateCollection(this.collectionName);
|
|
58
72
|
}
|
|
59
73
|
return this.collection;
|
|
60
74
|
}
|
|
@@ -73,13 +87,26 @@ export class Chroma extends VectorStore {
|
|
|
73
87
|
}
|
|
74
88
|
const collection = await this.ensureCollection();
|
|
75
89
|
const docstoreSize = await collection.count();
|
|
76
|
-
await collection.add(
|
|
90
|
+
await collection.add({
|
|
91
|
+
ids: Array.from({ length: vectors.length }, (_, i) => (docstoreSize + i).toString()),
|
|
92
|
+
embeddings: vectors,
|
|
93
|
+
metadatas: documents.map(({ metadata }) => metadata),
|
|
94
|
+
documents: documents.map(({ pageContent }) => pageContent),
|
|
95
|
+
});
|
|
77
96
|
}
|
|
78
|
-
async similaritySearchVectorWithScore(query, k) {
|
|
97
|
+
async similaritySearchVectorWithScore(query, k, filter) {
|
|
98
|
+
if (filter && this.filter) {
|
|
99
|
+
throw new Error("cannot provide both `filter` and `this.filter`");
|
|
100
|
+
}
|
|
101
|
+
const _filter = filter ?? this.filter;
|
|
79
102
|
const collection = await this.ensureCollection();
|
|
80
103
|
// similaritySearchVectorWithScore supports one query vector at a time
|
|
81
104
|
// chroma supports multiple query vectors at a time
|
|
82
|
-
const result = await collection.query(
|
|
105
|
+
const result = await collection.query({
|
|
106
|
+
query_embeddings: query,
|
|
107
|
+
n_results: k,
|
|
108
|
+
where: { ..._filter },
|
|
109
|
+
});
|
|
83
110
|
const { ids, distances, documents, metadatas } = result;
|
|
84
111
|
if (!ids || !distances || !documents || !metadatas) {
|
|
85
112
|
return [];
|
|
@@ -93,8 +120,8 @@ export class Chroma extends VectorStore {
|
|
|
93
120
|
for (let i = 0; i < firstIds.length; i += 1) {
|
|
94
121
|
results.push([
|
|
95
122
|
new Document({
|
|
96
|
-
pageContent: firstDocuments[i],
|
|
97
|
-
metadata: firstMetadatas[i],
|
|
123
|
+
pageContent: firstDocuments?.[i] ?? "",
|
|
124
|
+
metadata: firstMetadatas?.[i] ?? {},
|
|
98
125
|
}),
|
|
99
126
|
firstDistances[i],
|
|
100
127
|
]);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('../dist/llms/googlevertexai.cjs');
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../dist/llms/googlevertexai.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../dist/llms/googlevertexai.js'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.80",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -85,6 +85,9 @@
|
|
|
85
85
|
"llms/replicate.cjs",
|
|
86
86
|
"llms/replicate.js",
|
|
87
87
|
"llms/replicate.d.ts",
|
|
88
|
+
"llms/googlevertexai.cjs",
|
|
89
|
+
"llms/googlevertexai.js",
|
|
90
|
+
"llms/googlevertexai.d.ts",
|
|
88
91
|
"llms/sagemaker_endpoint.cjs",
|
|
89
92
|
"llms/sagemaker_endpoint.js",
|
|
90
93
|
"llms/sagemaker_endpoint.d.ts",
|
|
@@ -377,7 +380,7 @@
|
|
|
377
380
|
"apify-client": "^2.7.1",
|
|
378
381
|
"axios": "^0.26.0",
|
|
379
382
|
"cheerio": "^1.0.0-rc.12",
|
|
380
|
-
"chromadb": "^1.4.
|
|
383
|
+
"chromadb": "^1.4.2",
|
|
381
384
|
"cohere-ai": "^5.0.2",
|
|
382
385
|
"d3-dsv": "^2.0.0",
|
|
383
386
|
"dotenv": "^16.0.3",
|
|
@@ -390,9 +393,11 @@
|
|
|
390
393
|
"eslint-plugin-no-instanceof": "^1.0.1",
|
|
391
394
|
"eslint-plugin-prettier": "^4.2.1",
|
|
392
395
|
"faiss-node": "^0.1.1",
|
|
396
|
+
"google-auth-library": "^8.8.0",
|
|
393
397
|
"graphql": "^16.6.0",
|
|
394
398
|
"hnswlib-node": "^1.4.2",
|
|
395
399
|
"html-to-text": "^9.0.5",
|
|
400
|
+
"ignore": "^5.2.0",
|
|
396
401
|
"jest": "^29.5.0",
|
|
397
402
|
"mammoth": "^1.5.1",
|
|
398
403
|
"meriyah": "^4.3.7",
|
|
@@ -431,13 +436,15 @@
|
|
|
431
436
|
"apify-client": "^2.7.1",
|
|
432
437
|
"axios": "*",
|
|
433
438
|
"cheerio": "^1.0.0-rc.12",
|
|
434
|
-
"chromadb": "^1.4.
|
|
439
|
+
"chromadb": "^1.4.2",
|
|
435
440
|
"cohere-ai": "^5.0.2",
|
|
436
441
|
"d3-dsv": "^2.0.0",
|
|
437
442
|
"epub2": "^3.0.1",
|
|
438
443
|
"faiss-node": "^0.1.1",
|
|
444
|
+
"google-auth-library": "^8.8.0",
|
|
439
445
|
"hnswlib-node": "^1.4.2",
|
|
440
446
|
"html-to-text": "^9.0.5",
|
|
447
|
+
"ignore": "^5.2.0",
|
|
441
448
|
"mammoth": "*",
|
|
442
449
|
"meriyah": "*",
|
|
443
450
|
"mongodb": "^5.2.0",
|
|
@@ -524,6 +531,9 @@
|
|
|
524
531
|
"html-to-text": {
|
|
525
532
|
"optional": true
|
|
526
533
|
},
|
|
534
|
+
"ignore": {
|
|
535
|
+
"optional": true
|
|
536
|
+
},
|
|
527
537
|
"mammoth": {
|
|
528
538
|
"optional": true
|
|
529
539
|
},
|
|
@@ -731,6 +741,11 @@
|
|
|
731
741
|
"import": "./llms/replicate.js",
|
|
732
742
|
"require": "./llms/replicate.cjs"
|
|
733
743
|
},
|
|
744
|
+
"./llms/googlevertexai": {
|
|
745
|
+
"types": "./llms/googlevertexai.d.ts",
|
|
746
|
+
"import": "./llms/googlevertexai.js",
|
|
747
|
+
"require": "./llms/googlevertexai.cjs"
|
|
748
|
+
},
|
|
734
749
|
"./llms/sagemaker_endpoint": {
|
|
735
750
|
"types": "./llms/sagemaker_endpoint.d.ts",
|
|
736
751
|
"import": "./llms/sagemaker_endpoint.js",
|