langchain 0.0.78 → 0.0.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chains/query_constructor/prompt.cjs +5 -5
- package/dist/chains/query_constructor/prompt.d.ts +2 -2
- package/dist/chains/query_constructor/prompt.js +5 -5
- package/dist/chains/sql_db/sql_db_chain.cjs +0 -3
- package/dist/chains/sql_db/sql_db_chain.js +0 -3
- package/dist/chains/vector_db_qa.cjs +1 -1
- package/dist/chains/vector_db_qa.js +1 -1
- package/dist/client/langchainplus.cjs +143 -52
- package/dist/client/langchainplus.d.ts +72 -15
- package/dist/client/langchainplus.js +144 -53
- package/dist/document_loaders/index.cjs +1 -3
- package/dist/document_loaders/index.d.ts +0 -1
- package/dist/document_loaders/index.js +0 -1
- package/dist/document_loaders/web/github.cjs +38 -23
- package/dist/document_loaders/web/github.d.ts +5 -2
- package/dist/document_loaders/web/github.js +38 -23
- package/dist/llms/googlevertexai.cjs +97 -0
- package/dist/llms/googlevertexai.d.ts +43 -0
- package/dist/llms/googlevertexai.js +93 -0
- package/dist/prompts/selectors/conditional.cjs +4 -0
- package/dist/prompts/selectors/conditional.d.ts +5 -0
- package/dist/prompts/selectors/conditional.js +4 -0
- package/dist/retrievers/metal.d.ts +2 -1
- package/dist/stores/message/redis.cjs +1 -10
- package/dist/stores/message/redis.js +1 -10
- package/dist/text_splitter.cjs +11 -4
- package/dist/text_splitter.d.ts +7 -2
- package/dist/text_splitter.js +11 -4
- package/dist/types/googlevertexai-types.cjs +2 -0
- package/dist/types/googlevertexai-types.d.ts +47 -0
- package/dist/types/googlevertexai-types.js +1 -0
- package/dist/util/googlevertexai-connection.cjs +66 -0
- package/dist/util/googlevertexai-connection.d.ts +13 -0
- package/dist/util/googlevertexai-connection.js +62 -0
- package/dist/vectorstores/chroma.cjs +34 -7
- package/dist/vectorstores/chroma.d.ts +5 -1
- package/dist/vectorstores/chroma.js +34 -7
- package/dist/vectorstores/milvus.cjs +9 -30
- package/dist/vectorstores/milvus.d.ts +0 -3
- package/dist/vectorstores/milvus.js +9 -30
- package/llms/googlevertexai.cjs +1 -0
- package/llms/googlevertexai.d.ts +1 -0
- package/llms/googlevertexai.js +1 -0
- package/package.json +21 -5
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { LangChainTracer } from "../callbacks/handlers/tracer_langchain.js";
|
|
1
|
+
import { LangChainTracer, } from "../callbacks/handlers/tracer_langchain.js";
|
|
2
2
|
import { mapStoredMessagesToChatMessages } from "../stores/message/utils.js";
|
|
3
3
|
import { AsyncCaller } from "../util/async_caller.js";
|
|
4
4
|
// utility functions
|
|
@@ -7,7 +7,7 @@ const isLocalhost = (url) => {
|
|
|
7
7
|
const hostname = strippedUrl.split("/")[0].split(":")[0];
|
|
8
8
|
return (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1");
|
|
9
9
|
};
|
|
10
|
-
const getSeededTenantId = async (apiUrl, apiKey, callerOptions
|
|
10
|
+
const getSeededTenantId = async (apiUrl, { apiKey, callerOptions, }) => {
|
|
11
11
|
// Get the tenant ID from the seeded tenant
|
|
12
12
|
const caller = new AsyncCaller(callerOptions ?? {});
|
|
13
13
|
const url = `${apiUrl}/tenants`;
|
|
@@ -15,7 +15,7 @@ const getSeededTenantId = async (apiUrl, apiKey, callerOptions = undefined) => {
|
|
|
15
15
|
try {
|
|
16
16
|
response = await caller.call(fetch, url, {
|
|
17
17
|
method: "GET",
|
|
18
|
-
headers: apiKey ? {
|
|
18
|
+
headers: apiKey ? { "x-api-key": apiKey } : undefined,
|
|
19
19
|
});
|
|
20
20
|
}
|
|
21
21
|
catch (err) {
|
|
@@ -76,18 +76,24 @@ async function getModelOrFactoryType(llm) {
|
|
|
76
76
|
throw new Error("Unknown model or factory type");
|
|
77
77
|
}
|
|
78
78
|
export class LangChainPlusClient {
|
|
79
|
-
constructor(
|
|
79
|
+
constructor(config) {
|
|
80
80
|
Object.defineProperty(this, "apiKey", {
|
|
81
81
|
enumerable: true,
|
|
82
82
|
configurable: true,
|
|
83
83
|
writable: true,
|
|
84
|
-
value:
|
|
84
|
+
value: typeof process !== "undefined"
|
|
85
|
+
? // eslint-disable-next-line no-process-env
|
|
86
|
+
process.env?.LANGCHAIN_API_KEY
|
|
87
|
+
: undefined
|
|
85
88
|
});
|
|
86
89
|
Object.defineProperty(this, "apiUrl", {
|
|
87
90
|
enumerable: true,
|
|
88
91
|
configurable: true,
|
|
89
92
|
writable: true,
|
|
90
|
-
value:
|
|
93
|
+
value: (typeof process !== "undefined"
|
|
94
|
+
? // eslint-disable-next-line no-process-env
|
|
95
|
+
process.env?.LANGCHAIN_ENDPOINT
|
|
96
|
+
: undefined) || "http://localhost:8000"
|
|
91
97
|
});
|
|
92
98
|
Object.defineProperty(this, "tenantId", {
|
|
93
99
|
enumerable: true,
|
|
@@ -101,15 +107,45 @@ export class LangChainPlusClient {
|
|
|
101
107
|
writable: true,
|
|
102
108
|
value: void 0
|
|
103
109
|
});
|
|
104
|
-
this.apiUrl = apiUrl;
|
|
105
|
-
this.apiKey = apiKey;
|
|
106
|
-
|
|
110
|
+
this.apiUrl = config.apiUrl ?? this.apiUrl;
|
|
111
|
+
this.apiKey = config.apiKey;
|
|
112
|
+
const tenantId = config.tenantId ??
|
|
113
|
+
(typeof process !== "undefined"
|
|
114
|
+
? // eslint-disable-next-line no-process-env
|
|
115
|
+
process.env?.LANGCHAIN_TENANT_ID
|
|
116
|
+
: undefined);
|
|
117
|
+
if (tenantId === undefined) {
|
|
118
|
+
throw new Error("No tenant ID provided and no LANGCHAIN_TENANT_ID env var");
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
this.tenantId = tenantId;
|
|
122
|
+
}
|
|
107
123
|
this.validateApiKeyIfHosted();
|
|
108
|
-
this.caller = new AsyncCaller(callerOptions ?? {});
|
|
124
|
+
this.caller = new AsyncCaller(config.callerOptions ?? {});
|
|
109
125
|
}
|
|
110
|
-
static async create(
|
|
111
|
-
const
|
|
112
|
-
|
|
126
|
+
static async create(config = {}) {
|
|
127
|
+
const apiUrl_ = config.apiUrl ??
|
|
128
|
+
((typeof process !== "undefined"
|
|
129
|
+
? // eslint-disable-next-line no-process-env
|
|
130
|
+
process.env?.LANGCHAIN_ENDPOINT
|
|
131
|
+
: undefined) ||
|
|
132
|
+
"http://localhost:8000");
|
|
133
|
+
const apiKey_ = config.apiKey ??
|
|
134
|
+
(typeof process !== "undefined"
|
|
135
|
+
? // eslint-disable-next-line no-process-env
|
|
136
|
+
process.env?.LANGCHAIN_API_KEY
|
|
137
|
+
: undefined);
|
|
138
|
+
const tenantId_ = config.tenantId ??
|
|
139
|
+
((typeof process !== "undefined"
|
|
140
|
+
? // eslint-disable-next-line no-process-env
|
|
141
|
+
process.env?.LANGCHAIN_TENANT_ID
|
|
142
|
+
: undefined) ||
|
|
143
|
+
(await getSeededTenantId(apiUrl_, { apiKey: apiKey_ })));
|
|
144
|
+
return new LangChainPlusClient({
|
|
145
|
+
tenantId: tenantId_,
|
|
146
|
+
apiKey: apiKey_,
|
|
147
|
+
apiUrl: apiUrl_,
|
|
148
|
+
});
|
|
113
149
|
}
|
|
114
150
|
validateApiKeyIfHosted() {
|
|
115
151
|
const isLocal = isLocalhost(this.apiUrl);
|
|
@@ -120,24 +156,21 @@ export class LangChainPlusClient {
|
|
|
120
156
|
get headers() {
|
|
121
157
|
const headers = {};
|
|
122
158
|
if (this.apiKey) {
|
|
123
|
-
headers
|
|
159
|
+
headers["x-api-key"] = `${this.apiKey}`;
|
|
124
160
|
}
|
|
125
161
|
return headers;
|
|
126
162
|
}
|
|
127
163
|
get queryParams() {
|
|
128
|
-
return { tenant_id: this.tenantId };
|
|
129
|
-
}
|
|
130
|
-
async _get(path, queryParams
|
|
131
|
-
const params =
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
? `${queryString}&${encodeURIComponent(key)}=${encodeURIComponent(params[key])}`
|
|
137
|
-
: `${encodeURIComponent(key)}=${encodeURIComponent(params[key])}`;
|
|
138
|
-
}
|
|
164
|
+
return new URLSearchParams({ tenant_id: this.tenantId });
|
|
165
|
+
}
|
|
166
|
+
async _get(path, queryParams) {
|
|
167
|
+
const params = this.queryParams;
|
|
168
|
+
if (queryParams) {
|
|
169
|
+
queryParams.forEach((value, key) => {
|
|
170
|
+
params.append(key, value);
|
|
171
|
+
});
|
|
139
172
|
}
|
|
140
|
-
const url = `${this.apiUrl}${path}
|
|
173
|
+
const url = `${this.apiUrl}${path}?${params.toString()}`;
|
|
141
174
|
const response = await this.caller.call(fetch, url, {
|
|
142
175
|
method: "GET",
|
|
143
176
|
headers: this.headers,
|
|
@@ -147,14 +180,73 @@ export class LangChainPlusClient {
|
|
|
147
180
|
}
|
|
148
181
|
return response.json();
|
|
149
182
|
}
|
|
150
|
-
async
|
|
183
|
+
async readRun(runId) {
|
|
184
|
+
return await this._get(`/runs/${runId}`);
|
|
185
|
+
}
|
|
186
|
+
async listRuns({ sessionId, sessionName, executionOrder = 1, runType, error, }) {
|
|
187
|
+
const queryParams = new URLSearchParams();
|
|
188
|
+
let sessionId_ = sessionId;
|
|
189
|
+
if (sessionName) {
|
|
190
|
+
if (sessionId) {
|
|
191
|
+
throw new Error("Only one of session_id or session_name may be given");
|
|
192
|
+
}
|
|
193
|
+
sessionId_ = (await this.readSession({ sessionName })).id;
|
|
194
|
+
}
|
|
195
|
+
if (sessionId_) {
|
|
196
|
+
queryParams.append("session", sessionId_);
|
|
197
|
+
}
|
|
198
|
+
if (executionOrder) {
|
|
199
|
+
queryParams.append("execution_order", executionOrder.toString());
|
|
200
|
+
}
|
|
201
|
+
if (runType) {
|
|
202
|
+
queryParams.append("run_type", runType);
|
|
203
|
+
}
|
|
204
|
+
if (error !== undefined) {
|
|
205
|
+
queryParams.append("error", error.toString());
|
|
206
|
+
}
|
|
207
|
+
return this._get("/runs", queryParams);
|
|
208
|
+
}
|
|
209
|
+
async readSession({ sessionId, sessionName, }) {
|
|
210
|
+
let path = "/sessions";
|
|
211
|
+
const params = new URLSearchParams();
|
|
212
|
+
if (sessionId !== undefined && sessionName !== undefined) {
|
|
213
|
+
throw new Error("Must provide either sessionName or sessionId, not both");
|
|
214
|
+
}
|
|
215
|
+
else if (sessionId !== undefined) {
|
|
216
|
+
path += `/${sessionId}`;
|
|
217
|
+
}
|
|
218
|
+
else if (sessionName !== undefined) {
|
|
219
|
+
params.append("name", sessionName);
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
throw new Error("Must provide sessionName or sessionId");
|
|
223
|
+
}
|
|
224
|
+
const response = await this._get(path, params);
|
|
225
|
+
let result;
|
|
226
|
+
if (Array.isArray(response)) {
|
|
227
|
+
if (response.length === 0) {
|
|
228
|
+
throw new Error(`Session[id=${sessionId}, name=${sessionName}] not found`);
|
|
229
|
+
}
|
|
230
|
+
result = response[0];
|
|
231
|
+
}
|
|
232
|
+
else {
|
|
233
|
+
result = response;
|
|
234
|
+
}
|
|
235
|
+
return result;
|
|
236
|
+
}
|
|
237
|
+
async listSessions() {
|
|
238
|
+
return this._get("/sessions");
|
|
239
|
+
}
|
|
240
|
+
async uploadCsv({ csvFile, fileName, inputKeys, outputKeys, description, }) {
|
|
151
241
|
const url = `${this.apiUrl}/datasets/upload`;
|
|
152
242
|
const formData = new FormData();
|
|
153
243
|
formData.append("file", csvFile, fileName);
|
|
154
244
|
formData.append("input_keys", inputKeys.join(","));
|
|
155
245
|
formData.append("output_keys", outputKeys.join(","));
|
|
156
|
-
formData.append("description", description);
|
|
157
246
|
formData.append("tenant_id", this.tenantId);
|
|
247
|
+
if (description) {
|
|
248
|
+
formData.append("description", description);
|
|
249
|
+
}
|
|
158
250
|
const response = await this.caller.call(fetch, url, {
|
|
159
251
|
method: "POST",
|
|
160
252
|
headers: this.headers,
|
|
@@ -170,7 +262,7 @@ export class LangChainPlusClient {
|
|
|
170
262
|
const result = await response.json();
|
|
171
263
|
return result;
|
|
172
264
|
}
|
|
173
|
-
async createDataset(name, description) {
|
|
265
|
+
async createDataset(name, { description }) {
|
|
174
266
|
const response = await this.caller.call(fetch, `${this.apiUrl}/datasets`, {
|
|
175
267
|
method: "POST",
|
|
176
268
|
headers: { ...this.headers, "Content-Type": "application/json" },
|
|
@@ -190,10 +282,10 @@ export class LangChainPlusClient {
|
|
|
190
282
|
const result = await response.json();
|
|
191
283
|
return result;
|
|
192
284
|
}
|
|
193
|
-
async readDataset(datasetId, datasetName) {
|
|
285
|
+
async readDataset({ datasetId, datasetName, }) {
|
|
194
286
|
let path = "/datasets";
|
|
195
|
-
//
|
|
196
|
-
const params = { limit: 1 };
|
|
287
|
+
// limit to 1 result
|
|
288
|
+
const params = new URLSearchParams({ limit: "1" });
|
|
197
289
|
if (datasetId !== undefined && datasetName !== undefined) {
|
|
198
290
|
throw new Error("Must provide either datasetName or datasetId, not both");
|
|
199
291
|
}
|
|
@@ -201,7 +293,7 @@ export class LangChainPlusClient {
|
|
|
201
293
|
path += `/${datasetId}`;
|
|
202
294
|
}
|
|
203
295
|
else if (datasetName !== undefined) {
|
|
204
|
-
params.name
|
|
296
|
+
params.append("name", datasetName);
|
|
205
297
|
}
|
|
206
298
|
else {
|
|
207
299
|
throw new Error("Must provide datasetName or datasetId");
|
|
@@ -219,24 +311,23 @@ export class LangChainPlusClient {
|
|
|
219
311
|
}
|
|
220
312
|
return result;
|
|
221
313
|
}
|
|
222
|
-
async listDatasets(limit = 100) {
|
|
314
|
+
async listDatasets({ limit = 100, } = {}) {
|
|
223
315
|
const path = "/datasets";
|
|
224
|
-
|
|
225
|
-
const params = { limit };
|
|
316
|
+
const params = new URLSearchParams({ limit: limit.toString() });
|
|
226
317
|
const response = await this._get(path, params);
|
|
227
318
|
if (!Array.isArray(response)) {
|
|
228
319
|
throw new Error(`Expected ${path} to return an array, but got ${response}`);
|
|
229
320
|
}
|
|
230
321
|
return response;
|
|
231
322
|
}
|
|
232
|
-
async deleteDataset(datasetId, datasetName) {
|
|
323
|
+
async deleteDataset({ datasetId, datasetName, }) {
|
|
233
324
|
let path = "/datasets";
|
|
234
325
|
let datasetId_ = datasetId;
|
|
235
326
|
if (datasetId !== undefined && datasetName !== undefined) {
|
|
236
327
|
throw new Error("Must provide either datasetName or datasetId, not both");
|
|
237
328
|
}
|
|
238
329
|
else if (datasetName !== undefined) {
|
|
239
|
-
const dataset = await this.readDataset(
|
|
330
|
+
const dataset = await this.readDataset({ datasetName });
|
|
240
331
|
datasetId_ = dataset.id;
|
|
241
332
|
}
|
|
242
333
|
if (datasetId_ !== undefined) {
|
|
@@ -255,7 +346,7 @@ export class LangChainPlusClient {
|
|
|
255
346
|
const results = await response.json();
|
|
256
347
|
return results;
|
|
257
348
|
}
|
|
258
|
-
async createExample(inputs, outputs
|
|
349
|
+
async createExample(inputs, outputs, { datasetId, datasetName, createdAt, }) {
|
|
259
350
|
let datasetId_ = datasetId;
|
|
260
351
|
if (datasetId_ === undefined && datasetName === undefined) {
|
|
261
352
|
throw new Error("Must provide either datasetName or datasetId");
|
|
@@ -264,7 +355,7 @@ export class LangChainPlusClient {
|
|
|
264
355
|
throw new Error("Must provide either datasetName or datasetId, not both");
|
|
265
356
|
}
|
|
266
357
|
else if (datasetId_ === undefined) {
|
|
267
|
-
const dataset = await this.readDataset(
|
|
358
|
+
const dataset = await this.readDataset({ datasetName });
|
|
268
359
|
datasetId_ = dataset.id;
|
|
269
360
|
}
|
|
270
361
|
const createdAt_ = createdAt || new Date();
|
|
@@ -289,7 +380,7 @@ export class LangChainPlusClient {
|
|
|
289
380
|
const path = `/examples/${exampleId}`;
|
|
290
381
|
return await this._get(path);
|
|
291
382
|
}
|
|
292
|
-
async listExamples(datasetId
|
|
383
|
+
async listExamples({ datasetId, datasetName, } = {}) {
|
|
293
384
|
let datasetId_;
|
|
294
385
|
if (datasetId !== undefined && datasetName !== undefined) {
|
|
295
386
|
throw new Error("Must provide either datasetName or datasetId, not both");
|
|
@@ -298,15 +389,13 @@ export class LangChainPlusClient {
|
|
|
298
389
|
datasetId_ = datasetId;
|
|
299
390
|
}
|
|
300
391
|
else if (datasetName !== undefined) {
|
|
301
|
-
const dataset = await this.readDataset(
|
|
392
|
+
const dataset = await this.readDataset({ datasetName });
|
|
302
393
|
datasetId_ = dataset.id;
|
|
303
394
|
}
|
|
304
395
|
else {
|
|
305
396
|
throw new Error("Must provide a datasetName or datasetId");
|
|
306
397
|
}
|
|
307
|
-
const response = await this._get("/examples", {
|
|
308
|
-
dataset: datasetId_,
|
|
309
|
-
});
|
|
398
|
+
const response = await this._get("/examples", new URLSearchParams({ dataset: datasetId_ }));
|
|
310
399
|
if (!Array.isArray(response)) {
|
|
311
400
|
throw new Error(`Expected /examples to return an array, but got ${response}`);
|
|
312
401
|
}
|
|
@@ -324,7 +413,7 @@ export class LangChainPlusClient {
|
|
|
324
413
|
const result = await response.json();
|
|
325
414
|
return result;
|
|
326
415
|
}
|
|
327
|
-
async runLLM(example, tracer, llm, numRepetitions = 1) {
|
|
416
|
+
async runLLM(example, tracer, llm, { numRepetitions = 1 }) {
|
|
328
417
|
const results = await Promise.all(Array.from({ length: numRepetitions }).map(async () => {
|
|
329
418
|
try {
|
|
330
419
|
const prompt = example.inputs.prompt;
|
|
@@ -337,7 +426,7 @@ export class LangChainPlusClient {
|
|
|
337
426
|
}));
|
|
338
427
|
return results;
|
|
339
428
|
}
|
|
340
|
-
async runChain(example, tracer, chainFactory, numRepetitions = 1) {
|
|
429
|
+
async runChain(example, tracer, chainFactory, { numRepetitions = 1, }) {
|
|
341
430
|
const results = await Promise.all(Array.from({ length: numRepetitions }).map(async () => {
|
|
342
431
|
try {
|
|
343
432
|
const chain = await chainFactory();
|
|
@@ -350,7 +439,7 @@ export class LangChainPlusClient {
|
|
|
350
439
|
}));
|
|
351
440
|
return results;
|
|
352
441
|
}
|
|
353
|
-
async runChatModel(example, tracer, chatModel, numRepetitions = 1) {
|
|
442
|
+
async runChatModel(example, tracer, chatModel, { numRepetitions = 1, }) {
|
|
354
443
|
const results = await Promise.all(Array.from({ length: numRepetitions }).map(async () => {
|
|
355
444
|
try {
|
|
356
445
|
const messages = example.inputs.messages;
|
|
@@ -363,8 +452,8 @@ export class LangChainPlusClient {
|
|
|
363
452
|
}));
|
|
364
453
|
return results;
|
|
365
454
|
}
|
|
366
|
-
async runOnDataset(datasetName, llmOrChainFactory, numRepetitions = 1, sessionName =
|
|
367
|
-
const examples = await this.listExamples(
|
|
455
|
+
async runOnDataset(datasetName, llmOrChainFactory, { numRepetitions = 1, sessionName, } = {}) {
|
|
456
|
+
const examples = await this.listExamples({ datasetName });
|
|
368
457
|
let sessionName_;
|
|
369
458
|
if (sessionName === undefined) {
|
|
370
459
|
const currentTime = new Date().toISOString();
|
|
@@ -382,17 +471,19 @@ export class LangChainPlusClient {
|
|
|
382
471
|
});
|
|
383
472
|
if (modelOrFactoryType === "llm") {
|
|
384
473
|
const llm = llmOrChainFactory;
|
|
385
|
-
const llmResult = await this.runLLM(example, tracer, llm,
|
|
474
|
+
const llmResult = await this.runLLM(example, tracer, llm, {
|
|
475
|
+
numRepetitions,
|
|
476
|
+
});
|
|
386
477
|
results[example.id] = llmResult;
|
|
387
478
|
}
|
|
388
479
|
else if (modelOrFactoryType === "chainFactory") {
|
|
389
480
|
const chainFactory = llmOrChainFactory;
|
|
390
|
-
const chainResult = await this.runChain(example, tracer, chainFactory, numRepetitions);
|
|
481
|
+
const chainResult = await this.runChain(example, tracer, chainFactory, { numRepetitions });
|
|
391
482
|
results[example.id] = chainResult;
|
|
392
483
|
}
|
|
393
484
|
else if (modelOrFactoryType === "chatModel") {
|
|
394
485
|
const chatModel = llmOrChainFactory;
|
|
395
|
-
const chatModelResult = await this.runChatModel(example, tracer, chatModel, numRepetitions);
|
|
486
|
+
const chatModelResult = await this.runChatModel(example, tracer, chatModel, { numRepetitions });
|
|
396
487
|
results[example.id] = chatModelResult;
|
|
397
488
|
}
|
|
398
489
|
else {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.UnstructuredLoader = exports.
|
|
3
|
+
exports.UnstructuredLoader = exports.NotionLoader = exports.CSVLoader = exports.JSONLinesLoader = exports.JSONLoader = exports.TextLoader = exports.EPubLoader = exports.DocxLoader = exports.PDFLoader = exports.SRTLoader = exports.UnknownHandling = exports.DirectoryLoader = exports.IMSDBLoader = exports.HNLoader = exports.GitbookLoader = exports.CollegeConfidentialLoader = exports.PuppeteerWebBaseLoader = exports.CheerioWebBaseLoader = exports.BaseDocumentLoader = void 0;
|
|
4
4
|
/* #__PURE__ */ console.error("[WARN] Importing from 'langchain/document_loaders' is deprecated. Import from eg. 'langchain/document_loaders/fs/text' or 'langchain/document_loaders/web/cheerio' instead. See https://js.langchain.com/docs/getting-started/install#updating-from-0052 for upgrade instructions.");
|
|
5
5
|
var base_js_1 = require("./base.cjs");
|
|
6
6
|
Object.defineProperty(exports, "BaseDocumentLoader", { enumerable: true, get: function () { return base_js_1.BaseDocumentLoader; } });
|
|
@@ -36,7 +36,5 @@ var csv_js_1 = require("./fs/csv.cjs");
|
|
|
36
36
|
Object.defineProperty(exports, "CSVLoader", { enumerable: true, get: function () { return csv_js_1.CSVLoader; } });
|
|
37
37
|
var notion_js_1 = require("./fs/notion.cjs");
|
|
38
38
|
Object.defineProperty(exports, "NotionLoader", { enumerable: true, get: function () { return notion_js_1.NotionLoader; } });
|
|
39
|
-
var github_js_1 = require("./web/github.cjs");
|
|
40
|
-
Object.defineProperty(exports, "GithubRepoLoader", { enumerable: true, get: function () { return github_js_1.GithubRepoLoader; } });
|
|
41
39
|
var unstructured_js_1 = require("./fs/unstructured.cjs");
|
|
42
40
|
Object.defineProperty(exports, "UnstructuredLoader", { enumerable: true, get: function () { return unstructured_js_1.UnstructuredLoader; } });
|
|
@@ -15,5 +15,4 @@ export { TextLoader } from "./fs/text.js";
|
|
|
15
15
|
export { JSONLoader, JSONLinesLoader } from "./fs/json.js";
|
|
16
16
|
export { CSVLoader } from "./fs/csv.js";
|
|
17
17
|
export { NotionLoader } from "./fs/notion.js";
|
|
18
|
-
export { GithubRepoLoader, GithubRepoLoaderParams } from "./web/github.js";
|
|
19
18
|
export { UnstructuredLoader } from "./fs/unstructured.js";
|
|
@@ -15,5 +15,4 @@ export { TextLoader } from "./fs/text.js";
|
|
|
15
15
|
export { JSONLoader, JSONLinesLoader } from "./fs/json.js";
|
|
16
16
|
export { CSVLoader } from "./fs/csv.js";
|
|
17
17
|
export { NotionLoader } from "./fs/notion.js";
|
|
18
|
-
export { GithubRepoLoader } from "./web/github.js";
|
|
19
18
|
export { UnstructuredLoader } from "./fs/unstructured.js";
|
|
@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.GithubRepoLoader = void 0;
|
|
7
|
+
const ignore_1 = __importDefault(require("ignore"));
|
|
7
8
|
const binary_extensions_1 = __importDefault(require("binary-extensions"));
|
|
8
9
|
const document_js_1 = require("../../document.cjs");
|
|
9
10
|
const base_js_1 = require("../base.cjs");
|
|
@@ -17,7 +18,7 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
17
18
|
constructor(githubUrl, { accessToken = typeof process !== "undefined"
|
|
18
19
|
? // eslint-disable-next-line no-process-env
|
|
19
20
|
process.env?.GITHUB_ACCESS_TOKEN
|
|
20
|
-
: undefined, branch = "main", recursive = true, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], } = {}) {
|
|
21
|
+
: undefined, branch = "main", recursive = true, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], ignorePaths, } = {}) {
|
|
21
22
|
super();
|
|
22
23
|
Object.defineProperty(this, "owner", {
|
|
23
24
|
enumerable: true,
|
|
@@ -73,6 +74,12 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
73
74
|
writable: true,
|
|
74
75
|
value: void 0
|
|
75
76
|
});
|
|
77
|
+
Object.defineProperty(this, "ignore", {
|
|
78
|
+
enumerable: true,
|
|
79
|
+
configurable: true,
|
|
80
|
+
writable: true,
|
|
81
|
+
value: void 0
|
|
82
|
+
});
|
|
76
83
|
const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
|
|
77
84
|
this.owner = owner;
|
|
78
85
|
this.repo = repo;
|
|
@@ -82,6 +89,9 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
82
89
|
this.unknown = unknown;
|
|
83
90
|
this.accessToken = accessToken;
|
|
84
91
|
this.ignoreFiles = ignoreFiles;
|
|
92
|
+
if (ignorePaths) {
|
|
93
|
+
this.ignore = ignore_1.default.default().add(ignorePaths);
|
|
94
|
+
}
|
|
85
95
|
if (this.accessToken) {
|
|
86
96
|
this.headers = {
|
|
87
97
|
Authorization: `Bearer ${this.accessToken}`,
|
|
@@ -100,38 +110,43 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
|
100
110
|
await this.processDirectory(this.initialPath, documents);
|
|
101
111
|
return documents;
|
|
102
112
|
}
|
|
103
|
-
shouldIgnore(path) {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
async shouldIgnore(path, fileType) {
|
|
114
|
+
if (fileType !== "dir" && isBinaryPath(path)) {
|
|
115
|
+
return true;
|
|
116
|
+
}
|
|
117
|
+
if (this.ignore !== undefined) {
|
|
118
|
+
return this.ignore.ignores(path);
|
|
119
|
+
}
|
|
120
|
+
return (fileType !== "dir" &&
|
|
121
|
+
this.ignoreFiles.some((pattern) => {
|
|
122
|
+
if (typeof pattern === "string") {
|
|
123
|
+
return path === pattern;
|
|
124
|
+
}
|
|
125
|
+
try {
|
|
126
|
+
return pattern.test(path);
|
|
127
|
+
}
|
|
128
|
+
catch {
|
|
129
|
+
throw new Error(`Unknown ignore file pattern: ${pattern}`);
|
|
130
|
+
}
|
|
131
|
+
}));
|
|
115
132
|
}
|
|
116
133
|
async processDirectory(path, documents) {
|
|
117
134
|
try {
|
|
118
135
|
const files = await this.fetchRepoFiles(path);
|
|
119
136
|
for (const file of files) {
|
|
120
|
-
if (file.type
|
|
121
|
-
if (
|
|
122
|
-
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
else {
|
|
126
|
-
try {
|
|
127
|
-
if (!isBinaryPath(file.name) && !this.shouldIgnore(file.path)) {
|
|
137
|
+
if (!(await this.shouldIgnore(file.path, file.type))) {
|
|
138
|
+
if (file.type !== "dir") {
|
|
139
|
+
try {
|
|
128
140
|
const fileContent = await this.fetchFileContent(file);
|
|
129
141
|
const metadata = { source: file.path };
|
|
130
142
|
documents.push(new document_js_1.Document({ pageContent: fileContent, metadata }));
|
|
131
143
|
}
|
|
144
|
+
catch (e) {
|
|
145
|
+
this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
|
|
146
|
+
}
|
|
132
147
|
}
|
|
133
|
-
|
|
134
|
-
this.
|
|
148
|
+
else if (this.recursive) {
|
|
149
|
+
await this.processDirectory(file.path, documents);
|
|
135
150
|
}
|
|
136
151
|
}
|
|
137
152
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Ignore } from "ignore";
|
|
1
2
|
import { Document } from "../../document.js";
|
|
2
3
|
import { BaseDocumentLoader } from "../base.js";
|
|
3
4
|
import { UnknownHandling } from "../fs/directory.js";
|
|
@@ -7,6 +8,7 @@ export interface GithubRepoLoaderParams {
|
|
|
7
8
|
unknown?: UnknownHandling;
|
|
8
9
|
accessToken?: string;
|
|
9
10
|
ignoreFiles?: (string | RegExp)[];
|
|
11
|
+
ignorePaths?: string[];
|
|
10
12
|
}
|
|
11
13
|
export declare class GithubRepoLoader extends BaseDocumentLoader implements GithubRepoLoaderParams {
|
|
12
14
|
private readonly owner;
|
|
@@ -18,10 +20,11 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
|
|
|
18
20
|
unknown: UnknownHandling;
|
|
19
21
|
accessToken?: string;
|
|
20
22
|
ignoreFiles: (string | RegExp)[];
|
|
21
|
-
|
|
23
|
+
ignore?: Ignore;
|
|
24
|
+
constructor(githubUrl: string, { accessToken, branch, recursive, unknown, ignoreFiles, ignorePaths, }?: GithubRepoLoaderParams);
|
|
22
25
|
private extractOwnerAndRepoAndPath;
|
|
23
26
|
load(): Promise<Document[]>;
|
|
24
|
-
|
|
27
|
+
protected shouldIgnore(path: string, fileType: string): Promise<boolean>;
|
|
25
28
|
private processDirectory;
|
|
26
29
|
private fetchRepoFiles;
|
|
27
30
|
private fetchFileContent;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import ignore from "ignore";
|
|
1
2
|
import binaryExtensions from "binary-extensions";
|
|
2
3
|
import { Document } from "../../document.js";
|
|
3
4
|
import { BaseDocumentLoader } from "../base.js";
|
|
@@ -11,7 +12,7 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
11
12
|
constructor(githubUrl, { accessToken = typeof process !== "undefined"
|
|
12
13
|
? // eslint-disable-next-line no-process-env
|
|
13
14
|
process.env?.GITHUB_ACCESS_TOKEN
|
|
14
|
-
: undefined, branch = "main", recursive = true, unknown = UnknownHandling.Warn, ignoreFiles = [], } = {}) {
|
|
15
|
+
: undefined, branch = "main", recursive = true, unknown = UnknownHandling.Warn, ignoreFiles = [], ignorePaths, } = {}) {
|
|
15
16
|
super();
|
|
16
17
|
Object.defineProperty(this, "owner", {
|
|
17
18
|
enumerable: true,
|
|
@@ -67,6 +68,12 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
67
68
|
writable: true,
|
|
68
69
|
value: void 0
|
|
69
70
|
});
|
|
71
|
+
Object.defineProperty(this, "ignore", {
|
|
72
|
+
enumerable: true,
|
|
73
|
+
configurable: true,
|
|
74
|
+
writable: true,
|
|
75
|
+
value: void 0
|
|
76
|
+
});
|
|
70
77
|
const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
|
|
71
78
|
this.owner = owner;
|
|
72
79
|
this.repo = repo;
|
|
@@ -76,6 +83,9 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
76
83
|
this.unknown = unknown;
|
|
77
84
|
this.accessToken = accessToken;
|
|
78
85
|
this.ignoreFiles = ignoreFiles;
|
|
86
|
+
if (ignorePaths) {
|
|
87
|
+
this.ignore = ignore.default().add(ignorePaths);
|
|
88
|
+
}
|
|
79
89
|
if (this.accessToken) {
|
|
80
90
|
this.headers = {
|
|
81
91
|
Authorization: `Bearer ${this.accessToken}`,
|
|
@@ -94,38 +104,43 @@ export class GithubRepoLoader extends BaseDocumentLoader {
|
|
|
94
104
|
await this.processDirectory(this.initialPath, documents);
|
|
95
105
|
return documents;
|
|
96
106
|
}
|
|
97
|
-
shouldIgnore(path) {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
async shouldIgnore(path, fileType) {
|
|
108
|
+
if (fileType !== "dir" && isBinaryPath(path)) {
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
if (this.ignore !== undefined) {
|
|
112
|
+
return this.ignore.ignores(path);
|
|
113
|
+
}
|
|
114
|
+
return (fileType !== "dir" &&
|
|
115
|
+
this.ignoreFiles.some((pattern) => {
|
|
116
|
+
if (typeof pattern === "string") {
|
|
117
|
+
return path === pattern;
|
|
118
|
+
}
|
|
119
|
+
try {
|
|
120
|
+
return pattern.test(path);
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
throw new Error(`Unknown ignore file pattern: ${pattern}`);
|
|
124
|
+
}
|
|
125
|
+
}));
|
|
109
126
|
}
|
|
110
127
|
async processDirectory(path, documents) {
|
|
111
128
|
try {
|
|
112
129
|
const files = await this.fetchRepoFiles(path);
|
|
113
130
|
for (const file of files) {
|
|
114
|
-
if (file.type
|
|
115
|
-
if (
|
|
116
|
-
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
else {
|
|
120
|
-
try {
|
|
121
|
-
if (!isBinaryPath(file.name) && !this.shouldIgnore(file.path)) {
|
|
131
|
+
if (!(await this.shouldIgnore(file.path, file.type))) {
|
|
132
|
+
if (file.type !== "dir") {
|
|
133
|
+
try {
|
|
122
134
|
const fileContent = await this.fetchFileContent(file);
|
|
123
135
|
const metadata = { source: file.path };
|
|
124
136
|
documents.push(new Document({ pageContent: fileContent, metadata }));
|
|
125
137
|
}
|
|
138
|
+
catch (e) {
|
|
139
|
+
this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
|
|
140
|
+
}
|
|
126
141
|
}
|
|
127
|
-
|
|
128
|
-
this.
|
|
142
|
+
else if (this.recursive) {
|
|
143
|
+
await this.processDirectory(file.path, documents);
|
|
129
144
|
}
|
|
130
145
|
}
|
|
131
146
|
}
|