langchain 0.0.78 → 0.0.80

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/chains/query_constructor/prompt.cjs +5 -5
  2. package/dist/chains/query_constructor/prompt.d.ts +2 -2
  3. package/dist/chains/query_constructor/prompt.js +5 -5
  4. package/dist/chains/sql_db/sql_db_chain.cjs +0 -3
  5. package/dist/chains/sql_db/sql_db_chain.js +0 -3
  6. package/dist/chains/vector_db_qa.cjs +1 -1
  7. package/dist/chains/vector_db_qa.js +1 -1
  8. package/dist/client/langchainplus.cjs +143 -52
  9. package/dist/client/langchainplus.d.ts +72 -15
  10. package/dist/client/langchainplus.js +144 -53
  11. package/dist/document_loaders/index.cjs +1 -3
  12. package/dist/document_loaders/index.d.ts +0 -1
  13. package/dist/document_loaders/index.js +0 -1
  14. package/dist/document_loaders/web/github.cjs +38 -23
  15. package/dist/document_loaders/web/github.d.ts +5 -2
  16. package/dist/document_loaders/web/github.js +38 -23
  17. package/dist/llms/googlevertexai.cjs +97 -0
  18. package/dist/llms/googlevertexai.d.ts +43 -0
  19. package/dist/llms/googlevertexai.js +93 -0
  20. package/dist/prompts/selectors/conditional.cjs +4 -0
  21. package/dist/prompts/selectors/conditional.d.ts +5 -0
  22. package/dist/prompts/selectors/conditional.js +4 -0
  23. package/dist/retrievers/metal.d.ts +2 -1
  24. package/dist/stores/message/redis.cjs +1 -10
  25. package/dist/stores/message/redis.js +1 -10
  26. package/dist/text_splitter.cjs +11 -4
  27. package/dist/text_splitter.d.ts +7 -2
  28. package/dist/text_splitter.js +11 -4
  29. package/dist/types/googlevertexai-types.cjs +2 -0
  30. package/dist/types/googlevertexai-types.d.ts +47 -0
  31. package/dist/types/googlevertexai-types.js +1 -0
  32. package/dist/util/googlevertexai-connection.cjs +66 -0
  33. package/dist/util/googlevertexai-connection.d.ts +13 -0
  34. package/dist/util/googlevertexai-connection.js +62 -0
  35. package/dist/vectorstores/chroma.cjs +34 -7
  36. package/dist/vectorstores/chroma.d.ts +5 -1
  37. package/dist/vectorstores/chroma.js +34 -7
  38. package/dist/vectorstores/milvus.cjs +9 -30
  39. package/dist/vectorstores/milvus.d.ts +0 -3
  40. package/dist/vectorstores/milvus.js +9 -30
  41. package/llms/googlevertexai.cjs +1 -0
  42. package/llms/googlevertexai.d.ts +1 -0
  43. package/llms/googlevertexai.js +1 -0
  44. package/package.json +21 -5
@@ -1,4 +1,4 @@
1
- import { LangChainTracer } from "../callbacks/handlers/tracer_langchain.js";
1
+ import { LangChainTracer, } from "../callbacks/handlers/tracer_langchain.js";
2
2
  import { mapStoredMessagesToChatMessages } from "../stores/message/utils.js";
3
3
  import { AsyncCaller } from "../util/async_caller.js";
4
4
  // utility functions
@@ -7,7 +7,7 @@ const isLocalhost = (url) => {
7
7
  const hostname = strippedUrl.split("/")[0].split(":")[0];
8
8
  return (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1");
9
9
  };
10
- const getSeededTenantId = async (apiUrl, apiKey, callerOptions = undefined) => {
10
+ const getSeededTenantId = async (apiUrl, { apiKey, callerOptions, }) => {
11
11
  // Get the tenant ID from the seeded tenant
12
12
  const caller = new AsyncCaller(callerOptions ?? {});
13
13
  const url = `${apiUrl}/tenants`;
@@ -15,7 +15,7 @@ const getSeededTenantId = async (apiUrl, apiKey, callerOptions = undefined) => {
15
15
  try {
16
16
  response = await caller.call(fetch, url, {
17
17
  method: "GET",
18
- headers: apiKey ? { authorization: `Bearer ${apiKey}` } : undefined,
18
+ headers: apiKey ? { "x-api-key": apiKey } : undefined,
19
19
  });
20
20
  }
21
21
  catch (err) {
@@ -76,18 +76,24 @@ async function getModelOrFactoryType(llm) {
76
76
  throw new Error("Unknown model or factory type");
77
77
  }
78
78
  export class LangChainPlusClient {
79
- constructor(apiUrl, tenantId, apiKey, callerOptions) {
79
+ constructor(config) {
80
80
  Object.defineProperty(this, "apiKey", {
81
81
  enumerable: true,
82
82
  configurable: true,
83
83
  writable: true,
84
- value: void 0
84
+ value: typeof process !== "undefined"
85
+ ? // eslint-disable-next-line no-process-env
86
+ process.env?.LANGCHAIN_API_KEY
87
+ : undefined
85
88
  });
86
89
  Object.defineProperty(this, "apiUrl", {
87
90
  enumerable: true,
88
91
  configurable: true,
89
92
  writable: true,
90
- value: void 0
93
+ value: (typeof process !== "undefined"
94
+ ? // eslint-disable-next-line no-process-env
95
+ process.env?.LANGCHAIN_ENDPOINT
96
+ : undefined) || "http://localhost:8000"
91
97
  });
92
98
  Object.defineProperty(this, "tenantId", {
93
99
  enumerable: true,
@@ -101,15 +107,45 @@ export class LangChainPlusClient {
101
107
  writable: true,
102
108
  value: void 0
103
109
  });
104
- this.apiUrl = apiUrl;
105
- this.apiKey = apiKey;
106
- this.tenantId = tenantId;
110
+ this.apiUrl = config.apiUrl ?? this.apiUrl;
111
+ this.apiKey = config.apiKey;
112
+ const tenantId = config.tenantId ??
113
+ (typeof process !== "undefined"
114
+ ? // eslint-disable-next-line no-process-env
115
+ process.env?.LANGCHAIN_TENANT_ID
116
+ : undefined);
117
+ if (tenantId === undefined) {
118
+ throw new Error("No tenant ID provided and no LANGCHAIN_TENANT_ID env var");
119
+ }
120
+ else {
121
+ this.tenantId = tenantId;
122
+ }
107
123
  this.validateApiKeyIfHosted();
108
- this.caller = new AsyncCaller(callerOptions ?? {});
124
+ this.caller = new AsyncCaller(config.callerOptions ?? {});
109
125
  }
110
- static async create(apiUrl, apiKey = undefined) {
111
- const tenantId = await getSeededTenantId(apiUrl, apiKey);
112
- return new LangChainPlusClient(apiUrl, tenantId, apiKey);
126
+ static async create(config = {}) {
127
+ const apiUrl_ = config.apiUrl ??
128
+ ((typeof process !== "undefined"
129
+ ? // eslint-disable-next-line no-process-env
130
+ process.env?.LANGCHAIN_ENDPOINT
131
+ : undefined) ||
132
+ "http://localhost:8000");
133
+ const apiKey_ = config.apiKey ??
134
+ (typeof process !== "undefined"
135
+ ? // eslint-disable-next-line no-process-env
136
+ process.env?.LANGCHAIN_API_KEY
137
+ : undefined);
138
+ const tenantId_ = config.tenantId ??
139
+ ((typeof process !== "undefined"
140
+ ? // eslint-disable-next-line no-process-env
141
+ process.env?.LANGCHAIN_TENANT_ID
142
+ : undefined) ||
143
+ (await getSeededTenantId(apiUrl_, { apiKey: apiKey_ })));
144
+ return new LangChainPlusClient({
145
+ tenantId: tenantId_,
146
+ apiKey: apiKey_,
147
+ apiUrl: apiUrl_,
148
+ });
113
149
  }
114
150
  validateApiKeyIfHosted() {
115
151
  const isLocal = isLocalhost(this.apiUrl);
@@ -120,24 +156,21 @@ export class LangChainPlusClient {
120
156
  get headers() {
121
157
  const headers = {};
122
158
  if (this.apiKey) {
123
- headers.authorization = `Bearer ${this.apiKey}`;
159
+ headers["x-api-key"] = `${this.apiKey}`;
124
160
  }
125
161
  return headers;
126
162
  }
127
163
  get queryParams() {
128
- return { tenant_id: this.tenantId };
129
- }
130
- async _get(path, queryParams = {}) {
131
- const params = { ...this.queryParams, ...queryParams };
132
- let queryString = "";
133
- for (const key in params) {
134
- if (Object.prototype.hasOwnProperty.call(params, key)) {
135
- queryString = queryString
136
- ? `${queryString}&${encodeURIComponent(key)}=${encodeURIComponent(params[key])}`
137
- : `${encodeURIComponent(key)}=${encodeURIComponent(params[key])}`;
138
- }
164
+ return new URLSearchParams({ tenant_id: this.tenantId });
165
+ }
166
+ async _get(path, queryParams) {
167
+ const params = this.queryParams;
168
+ if (queryParams) {
169
+ queryParams.forEach((value, key) => {
170
+ params.append(key, value);
171
+ });
139
172
  }
140
- const url = `${this.apiUrl}${path}${queryString ? `?${queryString}` : ""}`;
173
+ const url = `${this.apiUrl}${path}?${params.toString()}`;
141
174
  const response = await this.caller.call(fetch, url, {
142
175
  method: "GET",
143
176
  headers: this.headers,
@@ -147,14 +180,73 @@ export class LangChainPlusClient {
147
180
  }
148
181
  return response.json();
149
182
  }
150
- async uploadCsv(csvFile, fileName, description, inputKeys, outputKeys) {
183
+ async readRun(runId) {
184
+ return await this._get(`/runs/${runId}`);
185
+ }
186
+ async listRuns({ sessionId, sessionName, executionOrder = 1, runType, error, }) {
187
+ const queryParams = new URLSearchParams();
188
+ let sessionId_ = sessionId;
189
+ if (sessionName) {
190
+ if (sessionId) {
191
+ throw new Error("Only one of session_id or session_name may be given");
192
+ }
193
+ sessionId_ = (await this.readSession({ sessionName })).id;
194
+ }
195
+ if (sessionId_) {
196
+ queryParams.append("session", sessionId_);
197
+ }
198
+ if (executionOrder) {
199
+ queryParams.append("execution_order", executionOrder.toString());
200
+ }
201
+ if (runType) {
202
+ queryParams.append("run_type", runType);
203
+ }
204
+ if (error !== undefined) {
205
+ queryParams.append("error", error.toString());
206
+ }
207
+ return this._get("/runs", queryParams);
208
+ }
209
+ async readSession({ sessionId, sessionName, }) {
210
+ let path = "/sessions";
211
+ const params = new URLSearchParams();
212
+ if (sessionId !== undefined && sessionName !== undefined) {
213
+ throw new Error("Must provide either sessionName or sessionId, not both");
214
+ }
215
+ else if (sessionId !== undefined) {
216
+ path += `/${sessionId}`;
217
+ }
218
+ else if (sessionName !== undefined) {
219
+ params.append("name", sessionName);
220
+ }
221
+ else {
222
+ throw new Error("Must provide sessionName or sessionId");
223
+ }
224
+ const response = await this._get(path, params);
225
+ let result;
226
+ if (Array.isArray(response)) {
227
+ if (response.length === 0) {
228
+ throw new Error(`Session[id=${sessionId}, name=${sessionName}] not found`);
229
+ }
230
+ result = response[0];
231
+ }
232
+ else {
233
+ result = response;
234
+ }
235
+ return result;
236
+ }
237
+ async listSessions() {
238
+ return this._get("/sessions");
239
+ }
240
+ async uploadCsv({ csvFile, fileName, inputKeys, outputKeys, description, }) {
151
241
  const url = `${this.apiUrl}/datasets/upload`;
152
242
  const formData = new FormData();
153
243
  formData.append("file", csvFile, fileName);
154
244
  formData.append("input_keys", inputKeys.join(","));
155
245
  formData.append("output_keys", outputKeys.join(","));
156
- formData.append("description", description);
157
246
  formData.append("tenant_id", this.tenantId);
247
+ if (description) {
248
+ formData.append("description", description);
249
+ }
158
250
  const response = await this.caller.call(fetch, url, {
159
251
  method: "POST",
160
252
  headers: this.headers,
@@ -170,7 +262,7 @@ export class LangChainPlusClient {
170
262
  const result = await response.json();
171
263
  return result;
172
264
  }
173
- async createDataset(name, description) {
265
+ async createDataset(name, { description }) {
174
266
  const response = await this.caller.call(fetch, `${this.apiUrl}/datasets`, {
175
267
  method: "POST",
176
268
  headers: { ...this.headers, "Content-Type": "application/json" },
@@ -190,10 +282,10 @@ export class LangChainPlusClient {
190
282
  const result = await response.json();
191
283
  return result;
192
284
  }
193
- async readDataset(datasetId, datasetName) {
285
+ async readDataset({ datasetId, datasetName, }) {
194
286
  let path = "/datasets";
195
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
196
- const params = { limit: 1 };
287
+ // limit to 1 result
288
+ const params = new URLSearchParams({ limit: "1" });
197
289
  if (datasetId !== undefined && datasetName !== undefined) {
198
290
  throw new Error("Must provide either datasetName or datasetId, not both");
199
291
  }
@@ -201,7 +293,7 @@ export class LangChainPlusClient {
201
293
  path += `/${datasetId}`;
202
294
  }
203
295
  else if (datasetName !== undefined) {
204
- params.name = datasetName;
296
+ params.append("name", datasetName);
205
297
  }
206
298
  else {
207
299
  throw new Error("Must provide datasetName or datasetId");
@@ -219,24 +311,23 @@ export class LangChainPlusClient {
219
311
  }
220
312
  return result;
221
313
  }
222
- async listDatasets(limit = 100) {
314
+ async listDatasets({ limit = 100, } = {}) {
223
315
  const path = "/datasets";
224
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
225
- const params = { limit };
316
+ const params = new URLSearchParams({ limit: limit.toString() });
226
317
  const response = await this._get(path, params);
227
318
  if (!Array.isArray(response)) {
228
319
  throw new Error(`Expected ${path} to return an array, but got ${response}`);
229
320
  }
230
321
  return response;
231
322
  }
232
- async deleteDataset(datasetId, datasetName) {
323
+ async deleteDataset({ datasetId, datasetName, }) {
233
324
  let path = "/datasets";
234
325
  let datasetId_ = datasetId;
235
326
  if (datasetId !== undefined && datasetName !== undefined) {
236
327
  throw new Error("Must provide either datasetName or datasetId, not both");
237
328
  }
238
329
  else if (datasetName !== undefined) {
239
- const dataset = await this.readDataset(undefined, datasetName);
330
+ const dataset = await this.readDataset({ datasetName });
240
331
  datasetId_ = dataset.id;
241
332
  }
242
333
  if (datasetId_ !== undefined) {
@@ -255,7 +346,7 @@ export class LangChainPlusClient {
255
346
  const results = await response.json();
256
347
  return results;
257
348
  }
258
- async createExample(inputs, outputs = {}, datasetId = undefined, datasetName = undefined, createdAt = undefined) {
349
+ async createExample(inputs, outputs, { datasetId, datasetName, createdAt, }) {
259
350
  let datasetId_ = datasetId;
260
351
  if (datasetId_ === undefined && datasetName === undefined) {
261
352
  throw new Error("Must provide either datasetName or datasetId");
@@ -264,7 +355,7 @@ export class LangChainPlusClient {
264
355
  throw new Error("Must provide either datasetName or datasetId, not both");
265
356
  }
266
357
  else if (datasetId_ === undefined) {
267
- const dataset = await this.readDataset(undefined, datasetName);
358
+ const dataset = await this.readDataset({ datasetName });
268
359
  datasetId_ = dataset.id;
269
360
  }
270
361
  const createdAt_ = createdAt || new Date();
@@ -289,7 +380,7 @@ export class LangChainPlusClient {
289
380
  const path = `/examples/${exampleId}`;
290
381
  return await this._get(path);
291
382
  }
292
- async listExamples(datasetId = undefined, datasetName = undefined) {
383
+ async listExamples({ datasetId, datasetName, } = {}) {
293
384
  let datasetId_;
294
385
  if (datasetId !== undefined && datasetName !== undefined) {
295
386
  throw new Error("Must provide either datasetName or datasetId, not both");
@@ -298,15 +389,13 @@ export class LangChainPlusClient {
298
389
  datasetId_ = datasetId;
299
390
  }
300
391
  else if (datasetName !== undefined) {
301
- const dataset = await this.readDataset(undefined, datasetName);
392
+ const dataset = await this.readDataset({ datasetName });
302
393
  datasetId_ = dataset.id;
303
394
  }
304
395
  else {
305
396
  throw new Error("Must provide a datasetName or datasetId");
306
397
  }
307
- const response = await this._get("/examples", {
308
- dataset: datasetId_,
309
- });
398
+ const response = await this._get("/examples", new URLSearchParams({ dataset: datasetId_ }));
310
399
  if (!Array.isArray(response)) {
311
400
  throw new Error(`Expected /examples to return an array, but got ${response}`);
312
401
  }
@@ -324,7 +413,7 @@ export class LangChainPlusClient {
324
413
  const result = await response.json();
325
414
  return result;
326
415
  }
327
- async runLLM(example, tracer, llm, numRepetitions = 1) {
416
+ async runLLM(example, tracer, llm, { numRepetitions = 1 }) {
328
417
  const results = await Promise.all(Array.from({ length: numRepetitions }).map(async () => {
329
418
  try {
330
419
  const prompt = example.inputs.prompt;
@@ -337,7 +426,7 @@ export class LangChainPlusClient {
337
426
  }));
338
427
  return results;
339
428
  }
340
- async runChain(example, tracer, chainFactory, numRepetitions = 1) {
429
+ async runChain(example, tracer, chainFactory, { numRepetitions = 1, }) {
341
430
  const results = await Promise.all(Array.from({ length: numRepetitions }).map(async () => {
342
431
  try {
343
432
  const chain = await chainFactory();
@@ -350,7 +439,7 @@ export class LangChainPlusClient {
350
439
  }));
351
440
  return results;
352
441
  }
353
- async runChatModel(example, tracer, chatModel, numRepetitions = 1) {
442
+ async runChatModel(example, tracer, chatModel, { numRepetitions = 1, }) {
354
443
  const results = await Promise.all(Array.from({ length: numRepetitions }).map(async () => {
355
444
  try {
356
445
  const messages = example.inputs.messages;
@@ -363,8 +452,8 @@ export class LangChainPlusClient {
363
452
  }));
364
453
  return results;
365
454
  }
366
- async runOnDataset(datasetName, llmOrChainFactory, numRepetitions = 1, sessionName = undefined) {
367
- const examples = await this.listExamples(undefined, datasetName);
455
+ async runOnDataset(datasetName, llmOrChainFactory, { numRepetitions = 1, sessionName, } = {}) {
456
+ const examples = await this.listExamples({ datasetName });
368
457
  let sessionName_;
369
458
  if (sessionName === undefined) {
370
459
  const currentTime = new Date().toISOString();
@@ -382,17 +471,19 @@ export class LangChainPlusClient {
382
471
  });
383
472
  if (modelOrFactoryType === "llm") {
384
473
  const llm = llmOrChainFactory;
385
- const llmResult = await this.runLLM(example, tracer, llm, numRepetitions);
474
+ const llmResult = await this.runLLM(example, tracer, llm, {
475
+ numRepetitions,
476
+ });
386
477
  results[example.id] = llmResult;
387
478
  }
388
479
  else if (modelOrFactoryType === "chainFactory") {
389
480
  const chainFactory = llmOrChainFactory;
390
- const chainResult = await this.runChain(example, tracer, chainFactory, numRepetitions);
481
+ const chainResult = await this.runChain(example, tracer, chainFactory, { numRepetitions });
391
482
  results[example.id] = chainResult;
392
483
  }
393
484
  else if (modelOrFactoryType === "chatModel") {
394
485
  const chatModel = llmOrChainFactory;
395
- const chatModelResult = await this.runChatModel(example, tracer, chatModel, numRepetitions);
486
+ const chatModelResult = await this.runChatModel(example, tracer, chatModel, { numRepetitions });
396
487
  results[example.id] = chatModelResult;
397
488
  }
398
489
  else {
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.UnstructuredLoader = exports.GithubRepoLoader = exports.NotionLoader = exports.CSVLoader = exports.JSONLinesLoader = exports.JSONLoader = exports.TextLoader = exports.EPubLoader = exports.DocxLoader = exports.PDFLoader = exports.SRTLoader = exports.UnknownHandling = exports.DirectoryLoader = exports.IMSDBLoader = exports.HNLoader = exports.GitbookLoader = exports.CollegeConfidentialLoader = exports.PuppeteerWebBaseLoader = exports.CheerioWebBaseLoader = exports.BaseDocumentLoader = void 0;
3
+ exports.UnstructuredLoader = exports.NotionLoader = exports.CSVLoader = exports.JSONLinesLoader = exports.JSONLoader = exports.TextLoader = exports.EPubLoader = exports.DocxLoader = exports.PDFLoader = exports.SRTLoader = exports.UnknownHandling = exports.DirectoryLoader = exports.IMSDBLoader = exports.HNLoader = exports.GitbookLoader = exports.CollegeConfidentialLoader = exports.PuppeteerWebBaseLoader = exports.CheerioWebBaseLoader = exports.BaseDocumentLoader = void 0;
4
4
  /* #__PURE__ */ console.error("[WARN] Importing from 'langchain/document_loaders' is deprecated. Import from eg. 'langchain/document_loaders/fs/text' or 'langchain/document_loaders/web/cheerio' instead. See https://js.langchain.com/docs/getting-started/install#updating-from-0052 for upgrade instructions.");
5
5
  var base_js_1 = require("./base.cjs");
6
6
  Object.defineProperty(exports, "BaseDocumentLoader", { enumerable: true, get: function () { return base_js_1.BaseDocumentLoader; } });
@@ -36,7 +36,5 @@ var csv_js_1 = require("./fs/csv.cjs");
36
36
  Object.defineProperty(exports, "CSVLoader", { enumerable: true, get: function () { return csv_js_1.CSVLoader; } });
37
37
  var notion_js_1 = require("./fs/notion.cjs");
38
38
  Object.defineProperty(exports, "NotionLoader", { enumerable: true, get: function () { return notion_js_1.NotionLoader; } });
39
- var github_js_1 = require("./web/github.cjs");
40
- Object.defineProperty(exports, "GithubRepoLoader", { enumerable: true, get: function () { return github_js_1.GithubRepoLoader; } });
41
39
  var unstructured_js_1 = require("./fs/unstructured.cjs");
42
40
  Object.defineProperty(exports, "UnstructuredLoader", { enumerable: true, get: function () { return unstructured_js_1.UnstructuredLoader; } });
@@ -15,5 +15,4 @@ export { TextLoader } from "./fs/text.js";
15
15
  export { JSONLoader, JSONLinesLoader } from "./fs/json.js";
16
16
  export { CSVLoader } from "./fs/csv.js";
17
17
  export { NotionLoader } from "./fs/notion.js";
18
- export { GithubRepoLoader, GithubRepoLoaderParams } from "./web/github.js";
19
18
  export { UnstructuredLoader } from "./fs/unstructured.js";
@@ -15,5 +15,4 @@ export { TextLoader } from "./fs/text.js";
15
15
  export { JSONLoader, JSONLinesLoader } from "./fs/json.js";
16
16
  export { CSVLoader } from "./fs/csv.js";
17
17
  export { NotionLoader } from "./fs/notion.js";
18
- export { GithubRepoLoader } from "./web/github.js";
19
18
  export { UnstructuredLoader } from "./fs/unstructured.js";
@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.GithubRepoLoader = void 0;
7
+ const ignore_1 = __importDefault(require("ignore"));
7
8
  const binary_extensions_1 = __importDefault(require("binary-extensions"));
8
9
  const document_js_1 = require("../../document.cjs");
9
10
  const base_js_1 = require("../base.cjs");
@@ -17,7 +18,7 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
17
18
  constructor(githubUrl, { accessToken = typeof process !== "undefined"
18
19
  ? // eslint-disable-next-line no-process-env
19
20
  process.env?.GITHUB_ACCESS_TOKEN
20
- : undefined, branch = "main", recursive = true, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], } = {}) {
21
+ : undefined, branch = "main", recursive = true, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], ignorePaths, } = {}) {
21
22
  super();
22
23
  Object.defineProperty(this, "owner", {
23
24
  enumerable: true,
@@ -73,6 +74,12 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
73
74
  writable: true,
74
75
  value: void 0
75
76
  });
77
+ Object.defineProperty(this, "ignore", {
78
+ enumerable: true,
79
+ configurable: true,
80
+ writable: true,
81
+ value: void 0
82
+ });
76
83
  const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
77
84
  this.owner = owner;
78
85
  this.repo = repo;
@@ -82,6 +89,9 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
82
89
  this.unknown = unknown;
83
90
  this.accessToken = accessToken;
84
91
  this.ignoreFiles = ignoreFiles;
92
+ if (ignorePaths) {
93
+ this.ignore = ignore_1.default.default().add(ignorePaths);
94
+ }
85
95
  if (this.accessToken) {
86
96
  this.headers = {
87
97
  Authorization: `Bearer ${this.accessToken}`,
@@ -100,38 +110,43 @@ class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
100
110
  await this.processDirectory(this.initialPath, documents);
101
111
  return documents;
102
112
  }
103
- shouldIgnore(path) {
104
- return this.ignoreFiles.some((pattern) => {
105
- if (typeof pattern === "string") {
106
- return path === pattern;
107
- }
108
- try {
109
- return pattern.test(path);
110
- }
111
- catch {
112
- throw new Error(`Unknown ignore file pattern: ${pattern}`);
113
- }
114
- });
113
+ async shouldIgnore(path, fileType) {
114
+ if (fileType !== "dir" && isBinaryPath(path)) {
115
+ return true;
116
+ }
117
+ if (this.ignore !== undefined) {
118
+ return this.ignore.ignores(path);
119
+ }
120
+ return (fileType !== "dir" &&
121
+ this.ignoreFiles.some((pattern) => {
122
+ if (typeof pattern === "string") {
123
+ return path === pattern;
124
+ }
125
+ try {
126
+ return pattern.test(path);
127
+ }
128
+ catch {
129
+ throw new Error(`Unknown ignore file pattern: ${pattern}`);
130
+ }
131
+ }));
115
132
  }
116
133
  async processDirectory(path, documents) {
117
134
  try {
118
135
  const files = await this.fetchRepoFiles(path);
119
136
  for (const file of files) {
120
- if (file.type === "dir") {
121
- if (this.recursive) {
122
- await this.processDirectory(file.path, documents);
123
- }
124
- }
125
- else {
126
- try {
127
- if (!isBinaryPath(file.name) && !this.shouldIgnore(file.path)) {
137
+ if (!(await this.shouldIgnore(file.path, file.type))) {
138
+ if (file.type !== "dir") {
139
+ try {
128
140
  const fileContent = await this.fetchFileContent(file);
129
141
  const metadata = { source: file.path };
130
142
  documents.push(new document_js_1.Document({ pageContent: fileContent, metadata }));
131
143
  }
144
+ catch (e) {
145
+ this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
146
+ }
132
147
  }
133
- catch (e) {
134
- this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
148
+ else if (this.recursive) {
149
+ await this.processDirectory(file.path, documents);
135
150
  }
136
151
  }
137
152
  }
@@ -1,3 +1,4 @@
1
+ import { Ignore } from "ignore";
1
2
  import { Document } from "../../document.js";
2
3
  import { BaseDocumentLoader } from "../base.js";
3
4
  import { UnknownHandling } from "../fs/directory.js";
@@ -7,6 +8,7 @@ export interface GithubRepoLoaderParams {
7
8
  unknown?: UnknownHandling;
8
9
  accessToken?: string;
9
10
  ignoreFiles?: (string | RegExp)[];
11
+ ignorePaths?: string[];
10
12
  }
11
13
  export declare class GithubRepoLoader extends BaseDocumentLoader implements GithubRepoLoaderParams {
12
14
  private readonly owner;
@@ -18,10 +20,11 @@ export declare class GithubRepoLoader extends BaseDocumentLoader implements Gith
18
20
  unknown: UnknownHandling;
19
21
  accessToken?: string;
20
22
  ignoreFiles: (string | RegExp)[];
21
- constructor(githubUrl: string, { accessToken, branch, recursive, unknown, ignoreFiles, }?: GithubRepoLoaderParams);
23
+ ignore?: Ignore;
24
+ constructor(githubUrl: string, { accessToken, branch, recursive, unknown, ignoreFiles, ignorePaths, }?: GithubRepoLoaderParams);
22
25
  private extractOwnerAndRepoAndPath;
23
26
  load(): Promise<Document[]>;
24
- private shouldIgnore;
27
+ protected shouldIgnore(path: string, fileType: string): Promise<boolean>;
25
28
  private processDirectory;
26
29
  private fetchRepoFiles;
27
30
  private fetchFileContent;
@@ -1,3 +1,4 @@
1
+ import ignore from "ignore";
1
2
  import binaryExtensions from "binary-extensions";
2
3
  import { Document } from "../../document.js";
3
4
  import { BaseDocumentLoader } from "../base.js";
@@ -11,7 +12,7 @@ export class GithubRepoLoader extends BaseDocumentLoader {
11
12
  constructor(githubUrl, { accessToken = typeof process !== "undefined"
12
13
  ? // eslint-disable-next-line no-process-env
13
14
  process.env?.GITHUB_ACCESS_TOKEN
14
- : undefined, branch = "main", recursive = true, unknown = UnknownHandling.Warn, ignoreFiles = [], } = {}) {
15
+ : undefined, branch = "main", recursive = true, unknown = UnknownHandling.Warn, ignoreFiles = [], ignorePaths, } = {}) {
15
16
  super();
16
17
  Object.defineProperty(this, "owner", {
17
18
  enumerable: true,
@@ -67,6 +68,12 @@ export class GithubRepoLoader extends BaseDocumentLoader {
67
68
  writable: true,
68
69
  value: void 0
69
70
  });
71
+ Object.defineProperty(this, "ignore", {
72
+ enumerable: true,
73
+ configurable: true,
74
+ writable: true,
75
+ value: void 0
76
+ });
70
77
  const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
71
78
  this.owner = owner;
72
79
  this.repo = repo;
@@ -76,6 +83,9 @@ export class GithubRepoLoader extends BaseDocumentLoader {
76
83
  this.unknown = unknown;
77
84
  this.accessToken = accessToken;
78
85
  this.ignoreFiles = ignoreFiles;
86
+ if (ignorePaths) {
87
+ this.ignore = ignore.default().add(ignorePaths);
88
+ }
79
89
  if (this.accessToken) {
80
90
  this.headers = {
81
91
  Authorization: `Bearer ${this.accessToken}`,
@@ -94,38 +104,43 @@ export class GithubRepoLoader extends BaseDocumentLoader {
94
104
  await this.processDirectory(this.initialPath, documents);
95
105
  return documents;
96
106
  }
97
- shouldIgnore(path) {
98
- return this.ignoreFiles.some((pattern) => {
99
- if (typeof pattern === "string") {
100
- return path === pattern;
101
- }
102
- try {
103
- return pattern.test(path);
104
- }
105
- catch {
106
- throw new Error(`Unknown ignore file pattern: ${pattern}`);
107
- }
108
- });
107
+ async shouldIgnore(path, fileType) {
108
+ if (fileType !== "dir" && isBinaryPath(path)) {
109
+ return true;
110
+ }
111
+ if (this.ignore !== undefined) {
112
+ return this.ignore.ignores(path);
113
+ }
114
+ return (fileType !== "dir" &&
115
+ this.ignoreFiles.some((pattern) => {
116
+ if (typeof pattern === "string") {
117
+ return path === pattern;
118
+ }
119
+ try {
120
+ return pattern.test(path);
121
+ }
122
+ catch {
123
+ throw new Error(`Unknown ignore file pattern: ${pattern}`);
124
+ }
125
+ }));
109
126
  }
110
127
  async processDirectory(path, documents) {
111
128
  try {
112
129
  const files = await this.fetchRepoFiles(path);
113
130
  for (const file of files) {
114
- if (file.type === "dir") {
115
- if (this.recursive) {
116
- await this.processDirectory(file.path, documents);
117
- }
118
- }
119
- else {
120
- try {
121
- if (!isBinaryPath(file.name) && !this.shouldIgnore(file.path)) {
131
+ if (!(await this.shouldIgnore(file.path, file.type))) {
132
+ if (file.type !== "dir") {
133
+ try {
122
134
  const fileContent = await this.fetchFileContent(file);
123
135
  const metadata = { source: file.path };
124
136
  documents.push(new Document({ pageContent: fileContent, metadata }));
125
137
  }
138
+ catch (e) {
139
+ this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
140
+ }
126
141
  }
127
- catch (e) {
128
- this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
142
+ else if (this.recursive) {
143
+ await this.processDirectory(file.path, documents);
129
144
  }
130
145
  }
131
146
  }