langchain 0.2.17 → 0.3.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/executor.cjs +11 -0
- package/dist/agents/executor.d.ts +2 -0
- package/dist/agents/executor.js +11 -0
- package/dist/agents/openai_functions/index.cjs +2 -2
- package/dist/agents/openai_functions/index.js +2 -2
- package/dist/agents/react/output_parser.cjs +3 -2
- package/dist/agents/react/output_parser.js +3 -2
- package/dist/chains/combine_documents/stuff.cjs +2 -2
- package/dist/chains/combine_documents/stuff.js +2 -2
- package/dist/chains/conversational_retrieval_chain.cjs +1 -1
- package/dist/chains/conversational_retrieval_chain.d.ts +1 -1
- package/dist/chains/conversational_retrieval_chain.js +1 -1
- package/dist/chains/llm_chain.cjs +1 -1
- package/dist/chains/llm_chain.d.ts +1 -1
- package/dist/chains/llm_chain.js +1 -1
- package/dist/chains/openai_functions/openapi.cjs +3 -1
- package/dist/chains/openai_functions/openapi.js +3 -1
- package/dist/chains/retrieval_qa.cjs +1 -1
- package/dist/chains/retrieval_qa.d.ts +1 -1
- package/dist/chains/retrieval_qa.js +1 -1
- package/dist/load/import_constants.cjs +2 -39
- package/dist/load/import_constants.js +2 -39
- package/dist/load/import_map.cjs +2 -3
- package/dist/load/import_map.d.ts +0 -1
- package/dist/load/import_map.js +0 -1
- package/dist/smith/config.d.ts +1 -5
- package/package.json +35 -858
- package/dist/document_loaders/fs/chatgpt.cjs +0 -90
- package/dist/document_loaders/fs/chatgpt.d.ts +0 -8
- package/dist/document_loaders/fs/chatgpt.js +0 -86
- package/dist/document_loaders/fs/csv.cjs +0 -73
- package/dist/document_loaders/fs/csv.d.ts +0 -65
- package/dist/document_loaders/fs/csv.js +0 -69
- package/dist/document_loaders/fs/docx.cjs +0 -58
- package/dist/document_loaders/fs/docx.d.ts +0 -25
- package/dist/document_loaders/fs/docx.js +0 -54
- package/dist/document_loaders/fs/epub.cjs +0 -103
- package/dist/document_loaders/fs/epub.d.ts +0 -33
- package/dist/document_loaders/fs/epub.js +0 -99
- package/dist/document_loaders/fs/notion.cjs +0 -26
- package/dist/document_loaders/fs/notion.d.ts +0 -12
- package/dist/document_loaders/fs/notion.js +0 -22
- package/dist/document_loaders/fs/obsidian.cjs +0 -247
- package/dist/document_loaders/fs/obsidian.d.ts +0 -28
- package/dist/document_loaders/fs/obsidian.js +0 -240
- package/dist/document_loaders/fs/openai_whisper_audio.cjs +0 -49
- package/dist/document_loaders/fs/openai_whisper_audio.d.ts +0 -23
- package/dist/document_loaders/fs/openai_whisper_audio.js +0 -45
- package/dist/document_loaders/fs/pdf.cjs +0 -148
- package/dist/document_loaders/fs/pdf.d.ts +0 -49
- package/dist/document_loaders/fs/pdf.js +0 -144
- package/dist/document_loaders/fs/pptx.cjs +0 -46
- package/dist/document_loaders/fs/pptx.d.ts +0 -25
- package/dist/document_loaders/fs/pptx.js +0 -42
- package/dist/document_loaders/fs/srt.cjs +0 -57
- package/dist/document_loaders/fs/srt.d.ts +0 -32
- package/dist/document_loaders/fs/srt.js +0 -50
- package/dist/document_loaders/fs/unstructured.cjs +0 -338
- package/dist/document_loaders/fs/unstructured.d.ts +0 -125
- package/dist/document_loaders/fs/unstructured.js +0 -333
- package/dist/document_loaders/web/apify_dataset.cjs +0 -130
- package/dist/document_loaders/web/apify_dataset.d.ts +0 -85
- package/dist/document_loaders/web/apify_dataset.js +0 -126
- package/dist/document_loaders/web/assemblyai.cjs +0 -200
- package/dist/document_loaders/web/assemblyai.d.ts +0 -95
- package/dist/document_loaders/web/assemblyai.js +0 -193
- package/dist/document_loaders/web/azure_blob_storage_container.cjs +0 -73
- package/dist/document_loaders/web/azure_blob_storage_container.d.ts +0 -46
- package/dist/document_loaders/web/azure_blob_storage_container.js +0 -69
- package/dist/document_loaders/web/azure_blob_storage_file.cjs +0 -124
- package/dist/document_loaders/web/azure_blob_storage_file.d.ts +0 -53
- package/dist/document_loaders/web/azure_blob_storage_file.js +0 -97
- package/dist/document_loaders/web/browserbase.cjs +0 -93
- package/dist/document_loaders/web/browserbase.d.ts +0 -48
- package/dist/document_loaders/web/browserbase.js +0 -86
- package/dist/document_loaders/web/cheerio.cjs +0 -118
- package/dist/document_loaders/web/cheerio.d.ts +0 -77
- package/dist/document_loaders/web/cheerio.js +0 -114
- package/dist/document_loaders/web/college_confidential.cjs +0 -41
- package/dist/document_loaders/web/college_confidential.d.ts +0 -25
- package/dist/document_loaders/web/college_confidential.js +0 -37
- package/dist/document_loaders/web/confluence.cjs +0 -190
- package/dist/document_loaders/web/confluence.d.ts +0 -114
- package/dist/document_loaders/web/confluence.js +0 -186
- package/dist/document_loaders/web/couchbase.cjs +0 -95
- package/dist/document_loaders/web/couchbase.d.ts +0 -32
- package/dist/document_loaders/web/couchbase.js +0 -91
- package/dist/document_loaders/web/figma.cjs +0 -102
- package/dist/document_loaders/web/figma.d.ts +0 -82
- package/dist/document_loaders/web/figma.js +0 -98
- package/dist/document_loaders/web/firecrawl.cjs +0 -95
- package/dist/document_loaders/web/firecrawl.d.ts +0 -50
- package/dist/document_loaders/web/firecrawl.js +0 -88
- package/dist/document_loaders/web/gitbook.cjs +0 -110
- package/dist/document_loaders/web/gitbook.d.ts +0 -55
- package/dist/document_loaders/web/gitbook.js +0 -106
- package/dist/document_loaders/web/github.cjs +0 -615
- package/dist/document_loaders/web/github.d.ts +0 -203
- package/dist/document_loaders/web/github.js +0 -608
- package/dist/document_loaders/web/hn.cjs +0 -90
- package/dist/document_loaders/web/hn.d.ts +0 -42
- package/dist/document_loaders/web/hn.js +0 -86
- package/dist/document_loaders/web/imsdb.cjs +0 -44
- package/dist/document_loaders/web/imsdb.d.ts +0 -23
- package/dist/document_loaders/web/imsdb.js +0 -40
- package/dist/document_loaders/web/notionapi.cjs +0 -404
- package/dist/document_loaders/web/notionapi.d.ts +0 -133
- package/dist/document_loaders/web/notionapi.js +0 -392
- package/dist/document_loaders/web/notiondb.cjs +0 -199
- package/dist/document_loaders/web/notiondb.d.ts +0 -56
- package/dist/document_loaders/web/notiondb.js +0 -195
- package/dist/document_loaders/web/pdf.cjs +0 -140
- package/dist/document_loaders/web/pdf.d.ts +0 -35
- package/dist/document_loaders/web/pdf.js +0 -136
- package/dist/document_loaders/web/playwright.cjs +0 -89
- package/dist/document_loaders/web/playwright.d.ts +0 -58
- package/dist/document_loaders/web/playwright.js +0 -85
- package/dist/document_loaders/web/puppeteer.cjs +0 -139
- package/dist/document_loaders/web/puppeteer.d.ts +0 -82
- package/dist/document_loaders/web/puppeteer.js +0 -135
- package/dist/document_loaders/web/recursive_url.cjs +0 -198
- package/dist/document_loaders/web/recursive_url.d.ts +0 -33
- package/dist/document_loaders/web/recursive_url.js +0 -194
- package/dist/document_loaders/web/s3.cjs +0 -164
- package/dist/document_loaders/web/s3.d.ts +0 -78
- package/dist/document_loaders/web/s3.js +0 -137
- package/dist/document_loaders/web/searchapi.cjs +0 -150
- package/dist/document_loaders/web/searchapi.d.ts +0 -76
- package/dist/document_loaders/web/searchapi.js +0 -146
- package/dist/document_loaders/web/serpapi.cjs +0 -127
- package/dist/document_loaders/web/serpapi.d.ts +0 -62
- package/dist/document_loaders/web/serpapi.js +0 -123
- package/dist/document_loaders/web/sitemap.cjs +0 -118
- package/dist/document_loaders/web/sitemap.d.ts +0 -41
- package/dist/document_loaders/web/sitemap.js +0 -114
- package/dist/document_loaders/web/sonix_audio.cjs +0 -68
- package/dist/document_loaders/web/sonix_audio.d.ts +0 -36
- package/dist/document_loaders/web/sonix_audio.js +0 -64
- package/dist/document_loaders/web/sort_xyz_blockchain.cjs +0 -157
- package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +0 -78
- package/dist/document_loaders/web/sort_xyz_blockchain.js +0 -153
- package/dist/document_loaders/web/youtube.cjs +0 -116
- package/dist/document_loaders/web/youtube.d.ts +0 -55
- package/dist/document_loaders/web/youtube.js +0 -112
- package/dist/experimental/chrome_ai/app/dist/bundle.cjs +0 -1250
- package/dist/experimental/chrome_ai/app/dist/bundle.d.ts +0 -1
- package/dist/experimental/chrome_ai/app/dist/bundle.js +0 -1249
- package/dist/experimental/tools/pyinterpreter.cjs +0 -248
- package/dist/experimental/tools/pyinterpreter.d.ts +0 -18
- package/dist/experimental/tools/pyinterpreter.js +0 -244
- package/dist/retrievers/self_query/chroma.cjs +0 -48
- package/dist/retrievers/self_query/chroma.d.ts +0 -26
- package/dist/retrievers/self_query/chroma.js +0 -44
- package/dist/retrievers/self_query/pinecone.cjs +0 -47
- package/dist/retrievers/self_query/pinecone.d.ts +0 -26
- package/dist/retrievers/self_query/pinecone.js +0 -43
- package/dist/retrievers/self_query/supabase.cjs +0 -278
- package/dist/retrievers/self_query/supabase.d.ts +0 -109
- package/dist/retrievers/self_query/supabase.js +0 -274
- package/dist/retrievers/self_query/supabase_utils.cjs +0 -264
- package/dist/retrievers/self_query/supabase_utils.d.ts +0 -101
- package/dist/retrievers/self_query/supabase_utils.js +0 -259
- package/dist/retrievers/self_query/vectara.cjs +0 -143
- package/dist/retrievers/self_query/vectara.d.ts +0 -42
- package/dist/retrievers/self_query/vectara.js +0 -139
- package/dist/retrievers/self_query/weaviate.cjs +0 -201
- package/dist/retrievers/self_query/weaviate.d.ts +0 -99
- package/dist/retrievers/self_query/weaviate.js +0 -197
- package/dist/types/assemblyai-types.cjs +0 -2
- package/dist/types/assemblyai-types.d.ts +0 -4
- package/dist/types/assemblyai-types.js +0 -1
- package/document_loaders/fs/chatgpt.cjs +0 -1
- package/document_loaders/fs/chatgpt.d.cts +0 -1
- package/document_loaders/fs/chatgpt.d.ts +0 -1
- package/document_loaders/fs/chatgpt.js +0 -1
- package/document_loaders/fs/csv.cjs +0 -1
- package/document_loaders/fs/csv.d.cts +0 -1
- package/document_loaders/fs/csv.d.ts +0 -1
- package/document_loaders/fs/csv.js +0 -1
- package/document_loaders/fs/docx.cjs +0 -1
- package/document_loaders/fs/docx.d.cts +0 -1
- package/document_loaders/fs/docx.d.ts +0 -1
- package/document_loaders/fs/docx.js +0 -1
- package/document_loaders/fs/epub.cjs +0 -1
- package/document_loaders/fs/epub.d.cts +0 -1
- package/document_loaders/fs/epub.d.ts +0 -1
- package/document_loaders/fs/epub.js +0 -1
- package/document_loaders/fs/notion.cjs +0 -1
- package/document_loaders/fs/notion.d.cts +0 -1
- package/document_loaders/fs/notion.d.ts +0 -1
- package/document_loaders/fs/notion.js +0 -1
- package/document_loaders/fs/obsidian.cjs +0 -1
- package/document_loaders/fs/obsidian.d.cts +0 -1
- package/document_loaders/fs/obsidian.d.ts +0 -1
- package/document_loaders/fs/obsidian.js +0 -1
- package/document_loaders/fs/openai_whisper_audio.cjs +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.cts +0 -1
- package/document_loaders/fs/openai_whisper_audio.d.ts +0 -1
- package/document_loaders/fs/openai_whisper_audio.js +0 -1
- package/document_loaders/fs/pdf.cjs +0 -1
- package/document_loaders/fs/pdf.d.cts +0 -1
- package/document_loaders/fs/pdf.d.ts +0 -1
- package/document_loaders/fs/pdf.js +0 -1
- package/document_loaders/fs/pptx.cjs +0 -1
- package/document_loaders/fs/pptx.d.cts +0 -1
- package/document_loaders/fs/pptx.d.ts +0 -1
- package/document_loaders/fs/pptx.js +0 -1
- package/document_loaders/fs/srt.cjs +0 -1
- package/document_loaders/fs/srt.d.cts +0 -1
- package/document_loaders/fs/srt.d.ts +0 -1
- package/document_loaders/fs/srt.js +0 -1
- package/document_loaders/fs/unstructured.cjs +0 -1
- package/document_loaders/fs/unstructured.d.cts +0 -1
- package/document_loaders/fs/unstructured.d.ts +0 -1
- package/document_loaders/fs/unstructured.js +0 -1
- package/document_loaders/web/apify_dataset.cjs +0 -1
- package/document_loaders/web/apify_dataset.d.cts +0 -1
- package/document_loaders/web/apify_dataset.d.ts +0 -1
- package/document_loaders/web/apify_dataset.js +0 -1
- package/document_loaders/web/assemblyai.cjs +0 -1
- package/document_loaders/web/assemblyai.d.cts +0 -1
- package/document_loaders/web/assemblyai.d.ts +0 -1
- package/document_loaders/web/assemblyai.js +0 -1
- package/document_loaders/web/azure_blob_storage_container.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_container.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_container.js +0 -1
- package/document_loaders/web/azure_blob_storage_file.cjs +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.cts +0 -1
- package/document_loaders/web/azure_blob_storage_file.d.ts +0 -1
- package/document_loaders/web/azure_blob_storage_file.js +0 -1
- package/document_loaders/web/browserbase.cjs +0 -1
- package/document_loaders/web/browserbase.d.cts +0 -1
- package/document_loaders/web/browserbase.d.ts +0 -1
- package/document_loaders/web/browserbase.js +0 -1
- package/document_loaders/web/cheerio.cjs +0 -1
- package/document_loaders/web/cheerio.d.cts +0 -1
- package/document_loaders/web/cheerio.d.ts +0 -1
- package/document_loaders/web/cheerio.js +0 -1
- package/document_loaders/web/college_confidential.cjs +0 -1
- package/document_loaders/web/college_confidential.d.cts +0 -1
- package/document_loaders/web/college_confidential.d.ts +0 -1
- package/document_loaders/web/college_confidential.js +0 -1
- package/document_loaders/web/confluence.cjs +0 -1
- package/document_loaders/web/confluence.d.cts +0 -1
- package/document_loaders/web/confluence.d.ts +0 -1
- package/document_loaders/web/confluence.js +0 -1
- package/document_loaders/web/couchbase.cjs +0 -1
- package/document_loaders/web/couchbase.d.cts +0 -1
- package/document_loaders/web/couchbase.d.ts +0 -1
- package/document_loaders/web/couchbase.js +0 -1
- package/document_loaders/web/figma.cjs +0 -1
- package/document_loaders/web/figma.d.cts +0 -1
- package/document_loaders/web/figma.d.ts +0 -1
- package/document_loaders/web/figma.js +0 -1
- package/document_loaders/web/firecrawl.cjs +0 -1
- package/document_loaders/web/firecrawl.d.cts +0 -1
- package/document_loaders/web/firecrawl.d.ts +0 -1
- package/document_loaders/web/firecrawl.js +0 -1
- package/document_loaders/web/gitbook.cjs +0 -1
- package/document_loaders/web/gitbook.d.cts +0 -1
- package/document_loaders/web/gitbook.d.ts +0 -1
- package/document_loaders/web/gitbook.js +0 -1
- package/document_loaders/web/github.cjs +0 -1
- package/document_loaders/web/github.d.cts +0 -1
- package/document_loaders/web/github.d.ts +0 -1
- package/document_loaders/web/github.js +0 -1
- package/document_loaders/web/hn.cjs +0 -1
- package/document_loaders/web/hn.d.cts +0 -1
- package/document_loaders/web/hn.d.ts +0 -1
- package/document_loaders/web/hn.js +0 -1
- package/document_loaders/web/imsdb.cjs +0 -1
- package/document_loaders/web/imsdb.d.cts +0 -1
- package/document_loaders/web/imsdb.d.ts +0 -1
- package/document_loaders/web/imsdb.js +0 -1
- package/document_loaders/web/notionapi.cjs +0 -1
- package/document_loaders/web/notionapi.d.cts +0 -1
- package/document_loaders/web/notionapi.d.ts +0 -1
- package/document_loaders/web/notionapi.js +0 -1
- package/document_loaders/web/notiondb.cjs +0 -1
- package/document_loaders/web/notiondb.d.cts +0 -1
- package/document_loaders/web/notiondb.d.ts +0 -1
- package/document_loaders/web/notiondb.js +0 -1
- package/document_loaders/web/pdf.cjs +0 -1
- package/document_loaders/web/pdf.d.cts +0 -1
- package/document_loaders/web/pdf.d.ts +0 -1
- package/document_loaders/web/pdf.js +0 -1
- package/document_loaders/web/playwright.cjs +0 -1
- package/document_loaders/web/playwright.d.cts +0 -1
- package/document_loaders/web/playwright.d.ts +0 -1
- package/document_loaders/web/playwright.js +0 -1
- package/document_loaders/web/puppeteer.cjs +0 -1
- package/document_loaders/web/puppeteer.d.cts +0 -1
- package/document_loaders/web/puppeteer.d.ts +0 -1
- package/document_loaders/web/puppeteer.js +0 -1
- package/document_loaders/web/recursive_url.cjs +0 -1
- package/document_loaders/web/recursive_url.d.cts +0 -1
- package/document_loaders/web/recursive_url.d.ts +0 -1
- package/document_loaders/web/recursive_url.js +0 -1
- package/document_loaders/web/s3.cjs +0 -1
- package/document_loaders/web/s3.d.cts +0 -1
- package/document_loaders/web/s3.d.ts +0 -1
- package/document_loaders/web/s3.js +0 -1
- package/document_loaders/web/searchapi.cjs +0 -1
- package/document_loaders/web/searchapi.d.cts +0 -1
- package/document_loaders/web/searchapi.d.ts +0 -1
- package/document_loaders/web/searchapi.js +0 -1
- package/document_loaders/web/serpapi.cjs +0 -1
- package/document_loaders/web/serpapi.d.cts +0 -1
- package/document_loaders/web/serpapi.d.ts +0 -1
- package/document_loaders/web/serpapi.js +0 -1
- package/document_loaders/web/sitemap.cjs +0 -1
- package/document_loaders/web/sitemap.d.cts +0 -1
- package/document_loaders/web/sitemap.d.ts +0 -1
- package/document_loaders/web/sitemap.js +0 -1
- package/document_loaders/web/sonix_audio.cjs +0 -1
- package/document_loaders/web/sonix_audio.d.cts +0 -1
- package/document_loaders/web/sonix_audio.d.ts +0 -1
- package/document_loaders/web/sonix_audio.js +0 -1
- package/document_loaders/web/sort_xyz_blockchain.cjs +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.cts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.d.ts +0 -1
- package/document_loaders/web/sort_xyz_blockchain.js +0 -1
- package/document_loaders/web/youtube.cjs +0 -1
- package/document_loaders/web/youtube.d.cts +0 -1
- package/document_loaders/web/youtube.d.ts +0 -1
- package/document_loaders/web/youtube.js +0 -1
- package/experimental/tools/pyinterpreter.cjs +0 -1
- package/experimental/tools/pyinterpreter.d.cts +0 -1
- package/experimental/tools/pyinterpreter.d.ts +0 -1
- package/experimental/tools/pyinterpreter.js +0 -1
- package/memory/index.cjs +0 -1
- package/memory/index.d.cts +0 -1
- package/memory/index.d.ts +0 -1
- package/memory/index.js +0 -1
- package/retrievers/self_query/chroma.cjs +0 -1
- package/retrievers/self_query/chroma.d.cts +0 -1
- package/retrievers/self_query/chroma.d.ts +0 -1
- package/retrievers/self_query/chroma.js +0 -1
- package/retrievers/self_query/pinecone.cjs +0 -1
- package/retrievers/self_query/pinecone.d.cts +0 -1
- package/retrievers/self_query/pinecone.d.ts +0 -1
- package/retrievers/self_query/pinecone.js +0 -1
- package/retrievers/self_query/supabase.cjs +0 -1
- package/retrievers/self_query/supabase.d.cts +0 -1
- package/retrievers/self_query/supabase.d.ts +0 -1
- package/retrievers/self_query/supabase.js +0 -1
- package/retrievers/self_query/vectara.cjs +0 -1
- package/retrievers/self_query/vectara.d.cts +0 -1
- package/retrievers/self_query/vectara.d.ts +0 -1
- package/retrievers/self_query/vectara.js +0 -1
- package/retrievers/self_query/weaviate.cjs +0 -1
- package/retrievers/self_query/weaviate.d.cts +0 -1
- package/retrievers/self_query/weaviate.d.ts +0 -1
- package/retrievers/self_query/weaviate.js +0 -1
|
@@ -1,615 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.GithubRepoLoader = void 0;
|
|
7
|
-
const ignore_1 = __importDefault(require("ignore"));
|
|
8
|
-
const binary_extensions_1 = __importDefault(require("binary-extensions"));
|
|
9
|
-
const documents_1 = require("@langchain/core/documents");
|
|
10
|
-
const env_1 = require("@langchain/core/utils/env");
|
|
11
|
-
const async_caller_1 = require("@langchain/core/utils/async_caller");
|
|
12
|
-
const base_js_1 = require("../base.cjs");
|
|
13
|
-
const directory_js_1 = require("../fs/directory.cjs");
|
|
14
|
-
const extname_js_1 = require("../../util/extname.cjs");
|
|
15
|
-
const entrypoint_deprecation_js_1 = require("../../util/entrypoint_deprecation.cjs");
|
|
16
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion020MigrationWarning)({
|
|
17
|
-
oldEntrypointName: "document_loaders/web/github",
|
|
18
|
-
newPackageName: "@langchain/community",
|
|
19
|
-
});
|
|
20
|
-
const extensions = /* #__PURE__ */ new Set(binary_extensions_1.default);
|
|
21
|
-
/**
|
|
22
|
-
* A function that checks if a file path is a binary file based on its
|
|
23
|
-
* extension.
|
|
24
|
-
* @param name The file path to check.
|
|
25
|
-
* @returns A boolean indicating whether the file path is a binary file.
|
|
26
|
-
*/
|
|
27
|
-
function isBinaryPath(name) {
|
|
28
|
-
return extensions.has((0, extname_js_1.extname)(name).slice(1).toLowerCase());
|
|
29
|
-
}
|
|
30
|
-
/**
|
|
31
|
-
* @deprecated - Import from "@langchain/community/document_loaders/web/github" instead. This entrypoint will be removed in 0.3.0.
|
|
32
|
-
*
|
|
33
|
-
* A class that extends the BaseDocumentLoader and implements the
|
|
34
|
-
* GithubRepoLoaderParams interface. It represents a document loader for
|
|
35
|
-
* loading files from a GitHub repository.
|
|
36
|
-
*/
|
|
37
|
-
class GithubRepoLoader extends base_js_1.BaseDocumentLoader {
|
|
38
|
-
constructor(githubUrl, { accessToken = (0, env_1.getEnvironmentVariable)("GITHUB_ACCESS_TOKEN"), baseUrl = "https://github.com", apiUrl = "https://api.github.com", branch = "main", recursive = true, processSubmodules = false, unknown = directory_js_1.UnknownHandling.Warn, ignoreFiles = [], ignorePaths, verbose = false, maxConcurrency = 2, maxRetries = 2, ...rest } = {}) {
|
|
39
|
-
super();
|
|
40
|
-
Object.defineProperty(this, "baseUrl", {
|
|
41
|
-
enumerable: true,
|
|
42
|
-
configurable: true,
|
|
43
|
-
writable: true,
|
|
44
|
-
value: void 0
|
|
45
|
-
});
|
|
46
|
-
Object.defineProperty(this, "apiUrl", {
|
|
47
|
-
enumerable: true,
|
|
48
|
-
configurable: true,
|
|
49
|
-
writable: true,
|
|
50
|
-
value: void 0
|
|
51
|
-
});
|
|
52
|
-
Object.defineProperty(this, "owner", {
|
|
53
|
-
enumerable: true,
|
|
54
|
-
configurable: true,
|
|
55
|
-
writable: true,
|
|
56
|
-
value: void 0
|
|
57
|
-
});
|
|
58
|
-
Object.defineProperty(this, "repo", {
|
|
59
|
-
enumerable: true,
|
|
60
|
-
configurable: true,
|
|
61
|
-
writable: true,
|
|
62
|
-
value: void 0
|
|
63
|
-
});
|
|
64
|
-
Object.defineProperty(this, "initialPath", {
|
|
65
|
-
enumerable: true,
|
|
66
|
-
configurable: true,
|
|
67
|
-
writable: true,
|
|
68
|
-
value: void 0
|
|
69
|
-
});
|
|
70
|
-
Object.defineProperty(this, "headers", {
|
|
71
|
-
enumerable: true,
|
|
72
|
-
configurable: true,
|
|
73
|
-
writable: true,
|
|
74
|
-
value: {}
|
|
75
|
-
});
|
|
76
|
-
Object.defineProperty(this, "branch", {
|
|
77
|
-
enumerable: true,
|
|
78
|
-
configurable: true,
|
|
79
|
-
writable: true,
|
|
80
|
-
value: void 0
|
|
81
|
-
});
|
|
82
|
-
Object.defineProperty(this, "recursive", {
|
|
83
|
-
enumerable: true,
|
|
84
|
-
configurable: true,
|
|
85
|
-
writable: true,
|
|
86
|
-
value: void 0
|
|
87
|
-
});
|
|
88
|
-
Object.defineProperty(this, "processSubmodules", {
|
|
89
|
-
enumerable: true,
|
|
90
|
-
configurable: true,
|
|
91
|
-
writable: true,
|
|
92
|
-
value: void 0
|
|
93
|
-
});
|
|
94
|
-
Object.defineProperty(this, "unknown", {
|
|
95
|
-
enumerable: true,
|
|
96
|
-
configurable: true,
|
|
97
|
-
writable: true,
|
|
98
|
-
value: void 0
|
|
99
|
-
});
|
|
100
|
-
Object.defineProperty(this, "accessToken", {
|
|
101
|
-
enumerable: true,
|
|
102
|
-
configurable: true,
|
|
103
|
-
writable: true,
|
|
104
|
-
value: void 0
|
|
105
|
-
});
|
|
106
|
-
Object.defineProperty(this, "ignoreFiles", {
|
|
107
|
-
enumerable: true,
|
|
108
|
-
configurable: true,
|
|
109
|
-
writable: true,
|
|
110
|
-
value: void 0
|
|
111
|
-
});
|
|
112
|
-
Object.defineProperty(this, "ignore", {
|
|
113
|
-
enumerable: true,
|
|
114
|
-
configurable: true,
|
|
115
|
-
writable: true,
|
|
116
|
-
value: void 0
|
|
117
|
-
});
|
|
118
|
-
Object.defineProperty(this, "verbose", {
|
|
119
|
-
enumerable: true,
|
|
120
|
-
configurable: true,
|
|
121
|
-
writable: true,
|
|
122
|
-
value: void 0
|
|
123
|
-
});
|
|
124
|
-
Object.defineProperty(this, "maxConcurrency", {
|
|
125
|
-
enumerable: true,
|
|
126
|
-
configurable: true,
|
|
127
|
-
writable: true,
|
|
128
|
-
value: void 0
|
|
129
|
-
});
|
|
130
|
-
Object.defineProperty(this, "maxRetries", {
|
|
131
|
-
enumerable: true,
|
|
132
|
-
configurable: true,
|
|
133
|
-
writable: true,
|
|
134
|
-
value: void 0
|
|
135
|
-
});
|
|
136
|
-
Object.defineProperty(this, "caller", {
|
|
137
|
-
enumerable: true,
|
|
138
|
-
configurable: true,
|
|
139
|
-
writable: true,
|
|
140
|
-
value: void 0
|
|
141
|
-
});
|
|
142
|
-
Object.defineProperty(this, "ignorePaths", {
|
|
143
|
-
enumerable: true,
|
|
144
|
-
configurable: true,
|
|
145
|
-
writable: true,
|
|
146
|
-
value: void 0
|
|
147
|
-
});
|
|
148
|
-
Object.defineProperty(this, "submoduleInfos", {
|
|
149
|
-
enumerable: true,
|
|
150
|
-
configurable: true,
|
|
151
|
-
writable: true,
|
|
152
|
-
value: void 0
|
|
153
|
-
});
|
|
154
|
-
this.baseUrl = baseUrl;
|
|
155
|
-
this.apiUrl = apiUrl;
|
|
156
|
-
const { owner, repo, path } = this.extractOwnerAndRepoAndPath(githubUrl);
|
|
157
|
-
this.owner = owner;
|
|
158
|
-
this.repo = repo;
|
|
159
|
-
this.initialPath = path;
|
|
160
|
-
this.branch = branch;
|
|
161
|
-
this.recursive = recursive;
|
|
162
|
-
// processing submodules without processing contents of other directories makes no sense
|
|
163
|
-
if (processSubmodules && !recursive) {
|
|
164
|
-
throw new Error(`Input property "recursive" must be true if "processSubmodules" is true.`);
|
|
165
|
-
}
|
|
166
|
-
this.processSubmodules = processSubmodules;
|
|
167
|
-
this.unknown = unknown;
|
|
168
|
-
this.accessToken = accessToken;
|
|
169
|
-
this.ignoreFiles = ignoreFiles;
|
|
170
|
-
this.verbose = verbose;
|
|
171
|
-
this.maxConcurrency = maxConcurrency;
|
|
172
|
-
this.maxRetries = maxRetries;
|
|
173
|
-
this.headers = {
|
|
174
|
-
"User-Agent": "langchain",
|
|
175
|
-
};
|
|
176
|
-
this.caller = new async_caller_1.AsyncCaller({
|
|
177
|
-
maxConcurrency,
|
|
178
|
-
maxRetries,
|
|
179
|
-
...rest,
|
|
180
|
-
});
|
|
181
|
-
this.ignorePaths = ignorePaths;
|
|
182
|
-
if (ignorePaths) {
|
|
183
|
-
this.ignore = ignore_1.default.default().add(ignorePaths);
|
|
184
|
-
}
|
|
185
|
-
if (this.accessToken) {
|
|
186
|
-
this.headers = {
|
|
187
|
-
...this.headers,
|
|
188
|
-
Authorization: `Bearer ${this.accessToken}`,
|
|
189
|
-
};
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
/**
|
|
193
|
-
* Extracts the owner, repository, and path from a GitHub URL.
|
|
194
|
-
* @param url The GitHub URL to extract information from.
|
|
195
|
-
* @returns An object containing the owner, repository, and path extracted from the GitHub URL.
|
|
196
|
-
*/
|
|
197
|
-
extractOwnerAndRepoAndPath(url) {
|
|
198
|
-
const match = url.match(new RegExp(`${this.baseUrl}/([^/]+)/([^/]+)(/tree/[^/]+/(.+))?`, "i"));
|
|
199
|
-
if (!match) {
|
|
200
|
-
throw new Error("Invalid GitHub URL format.");
|
|
201
|
-
}
|
|
202
|
-
return { owner: match[1], repo: match[2], path: match[4] || "" };
|
|
203
|
-
}
|
|
204
|
-
/**
|
|
205
|
-
* Fetches the files from the GitHub repository and creates Document
|
|
206
|
-
* instances for each file. It also handles error handling based on the
|
|
207
|
-
* unknown handling option.
|
|
208
|
-
* @returns A promise that resolves to an array of Document instances.
|
|
209
|
-
*/
|
|
210
|
-
async load() {
|
|
211
|
-
this.log(`Loading documents from ${this.baseUrl}/${this.owner}/${this.repo}/${this.initialPath}...`);
|
|
212
|
-
// process repository without submodules
|
|
213
|
-
const documents = (await this.processRepo()).map((fileResponse) => new documents_1.Document({
|
|
214
|
-
pageContent: fileResponse.contents,
|
|
215
|
-
metadata: fileResponse.metadata,
|
|
216
|
-
}));
|
|
217
|
-
if (this.processSubmodules) {
|
|
218
|
-
// process submodules
|
|
219
|
-
await this.getSubmoduleInfo();
|
|
220
|
-
for (const submoduleInfo of this.submoduleInfos) {
|
|
221
|
-
documents.push(...(await this.loadSubmodule(submoduleInfo)));
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
return documents;
|
|
225
|
-
}
|
|
226
|
-
/**
|
|
227
|
-
* Asynchronously streams documents from the entire GitHub repository.
|
|
228
|
-
* It is suitable for situations where processing large repositories in a memory-efficient manner is required.
|
|
229
|
-
* @yields Yields a Promise that resolves to a Document object for each file or submodule content found in the repository.
|
|
230
|
-
*/
|
|
231
|
-
async *loadAsStream() {
|
|
232
|
-
this.log(`Loading documents from ${this.baseUrl}/${this.owner}/${this.repo}/${this.initialPath}...`);
|
|
233
|
-
yield* await this.processRepoAsStream(this.initialPath);
|
|
234
|
-
if (!this.processSubmodules) {
|
|
235
|
-
return;
|
|
236
|
-
}
|
|
237
|
-
await this.getSubmoduleInfo();
|
|
238
|
-
for (const submoduleInfo of this.submoduleInfos) {
|
|
239
|
-
yield* await this.loadSubmoduleAsStream(submoduleInfo);
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
/**
|
|
243
|
-
* Loads the information about Git submodules from the repository, if available.
|
|
244
|
-
*/
|
|
245
|
-
async getSubmoduleInfo() {
|
|
246
|
-
this.log("Loading info about submodules...");
|
|
247
|
-
// we have to fetch the files of the root directory to get the download url of the .gitmodules file
|
|
248
|
-
// however, we cannot reuse the files retrieved in processRepo() as initialPath may be != ""
|
|
249
|
-
// so it may be that we end up fetching this file list twice
|
|
250
|
-
const repoFiles = await this.fetchRepoFiles("");
|
|
251
|
-
const gitmodulesFile = repoFiles.filter(({ name }) => name === ".gitmodules")?.[0];
|
|
252
|
-
if (gitmodulesFile) {
|
|
253
|
-
const gitmodulesContent = await this.fetchFileContent({
|
|
254
|
-
download_url: gitmodulesFile.download_url,
|
|
255
|
-
});
|
|
256
|
-
this.submoduleInfos = await this.parseGitmodules(gitmodulesContent);
|
|
257
|
-
}
|
|
258
|
-
else {
|
|
259
|
-
this.submoduleInfos = [];
|
|
260
|
-
}
|
|
261
|
-
this.log(`Found ${this.submoduleInfos.length} submodules:`);
|
|
262
|
-
for (const submoduleInfo of this.submoduleInfos) {
|
|
263
|
-
this.log(JSON.stringify(submoduleInfo));
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
/**
|
|
267
|
-
* Parses the given content of a .gitmodules file. Furthermore, queries the current SHA ref of all submodules.
|
|
268
|
-
* Returns the submodule information as array.
|
|
269
|
-
* @param gitmodulesContent the content of a .gitmodules file
|
|
270
|
-
*/
|
|
271
|
-
async parseGitmodules(gitmodulesContent) {
|
|
272
|
-
let validGitmodulesContent = gitmodulesContent;
|
|
273
|
-
// in case the .gitmodules file does not end with a newline, we add one to make the regex work
|
|
274
|
-
if (!validGitmodulesContent.endsWith("\n")) {
|
|
275
|
-
validGitmodulesContent += "\n";
|
|
276
|
-
}
|
|
277
|
-
// catches the initial line of submodule entries
|
|
278
|
-
const submodulePattern = /\[submodule "(.*?)"]\n((\s+.*?\s*=\s*.*?\n)*)/g;
|
|
279
|
-
// catches the properties of a submodule
|
|
280
|
-
const keyValuePattern = /\s+(.*?)\s*=\s*(.*?)\s/g;
|
|
281
|
-
const submoduleInfos = [];
|
|
282
|
-
for (const [, name, propertyLines] of validGitmodulesContent.matchAll(submodulePattern)) {
|
|
283
|
-
if (!name || !propertyLines) {
|
|
284
|
-
throw new Error("Could not parse submodule entry");
|
|
285
|
-
}
|
|
286
|
-
const submodulePropertyLines = propertyLines.matchAll(keyValuePattern);
|
|
287
|
-
let path;
|
|
288
|
-
let url;
|
|
289
|
-
for (const [, key, value] of submodulePropertyLines) {
|
|
290
|
-
if (!key || !value) {
|
|
291
|
-
throw new Error(`Could not parse key/value pairs for submodule ${name}`);
|
|
292
|
-
}
|
|
293
|
-
switch (key) {
|
|
294
|
-
case "path":
|
|
295
|
-
path = value;
|
|
296
|
-
break;
|
|
297
|
-
case "url":
|
|
298
|
-
url = value;
|
|
299
|
-
if (url.endsWith(".git")) {
|
|
300
|
-
url = url.substring(0, url.length - 4);
|
|
301
|
-
}
|
|
302
|
-
break;
|
|
303
|
-
default:
|
|
304
|
-
// ignoring unused keys
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
if (!path || !url) {
|
|
308
|
-
throw new Error(`Missing properties for submodule ${name}`);
|
|
309
|
-
}
|
|
310
|
-
// fetch the current ref of the submodule
|
|
311
|
-
const files = await this.fetchRepoFiles(path);
|
|
312
|
-
const submoduleInfo = {
|
|
313
|
-
name,
|
|
314
|
-
path,
|
|
315
|
-
url,
|
|
316
|
-
ref: files[0].sha,
|
|
317
|
-
};
|
|
318
|
-
submoduleInfos.push(submoduleInfo);
|
|
319
|
-
}
|
|
320
|
-
return submoduleInfos;
|
|
321
|
-
}
|
|
322
|
-
/**
|
|
323
|
-
* Loads the documents of the given submodule. Uses the same parameters as for the current repository.
|
|
324
|
-
* External submodules, i.e. submodules pointing to another GitHub instance, are ignored.
|
|
325
|
-
* @param submoduleInfo the info about the submodule to be loaded
|
|
326
|
-
*/
|
|
327
|
-
async loadSubmodule(submoduleInfo) {
|
|
328
|
-
if (!submoduleInfo.url.startsWith(this.baseUrl)) {
|
|
329
|
-
this.log(`Ignoring external submodule ${submoduleInfo.url}.`);
|
|
330
|
-
return [];
|
|
331
|
-
}
|
|
332
|
-
else if (!submoduleInfo.path.startsWith(this.initialPath)) {
|
|
333
|
-
this.log(`Ignoring submodule ${submoduleInfo.url}, as it is not on initial path.`);
|
|
334
|
-
return [];
|
|
335
|
-
}
|
|
336
|
-
else {
|
|
337
|
-
this.log(`Accessing submodule ${submoduleInfo.name} (${submoduleInfo.url})...`);
|
|
338
|
-
return new GithubRepoLoader(submoduleInfo.url, {
|
|
339
|
-
accessToken: this.accessToken,
|
|
340
|
-
apiUrl: this.apiUrl,
|
|
341
|
-
baseUrl: this.baseUrl,
|
|
342
|
-
branch: submoduleInfo.ref,
|
|
343
|
-
recursive: this.recursive,
|
|
344
|
-
processSubmodules: this.processSubmodules,
|
|
345
|
-
unknown: this.unknown,
|
|
346
|
-
ignoreFiles: this.ignoreFiles,
|
|
347
|
-
ignorePaths: this.ignorePaths,
|
|
348
|
-
verbose: this.verbose,
|
|
349
|
-
maxConcurrency: this.maxConcurrency,
|
|
350
|
-
maxRetries: this.maxRetries,
|
|
351
|
-
}).load();
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
/**
|
|
355
|
-
* Asynchronously processes and streams the contents of a specified submodule in the GitHub repository.
|
|
356
|
-
* @param submoduleInfo the info about the submodule to be loaded
|
|
357
|
-
* @yields Yields a Promise that resolves to a Document object for each file found in the submodule.
|
|
358
|
-
*/
|
|
359
|
-
async *loadSubmoduleAsStream(submoduleInfo) {
|
|
360
|
-
if (!submoduleInfo.url.startsWith(this.baseUrl)) {
|
|
361
|
-
this.log(`Ignoring external submodule ${submoduleInfo.url}.`);
|
|
362
|
-
yield* [];
|
|
363
|
-
}
|
|
364
|
-
if (!submoduleInfo.path.startsWith(this.initialPath)) {
|
|
365
|
-
this.log(`Ignoring submodule ${submoduleInfo.url}, as it is not on initial path.`);
|
|
366
|
-
yield* [];
|
|
367
|
-
}
|
|
368
|
-
this.log(`Accessing submodule ${submoduleInfo.name} (${submoduleInfo.url})...`);
|
|
369
|
-
const submoduleLoader = new GithubRepoLoader(submoduleInfo.url, {
|
|
370
|
-
accessToken: this.accessToken,
|
|
371
|
-
baseUrl: this.baseUrl,
|
|
372
|
-
apiUrl: this.apiUrl,
|
|
373
|
-
branch: submoduleInfo.ref,
|
|
374
|
-
recursive: this.recursive,
|
|
375
|
-
processSubmodules: this.processSubmodules,
|
|
376
|
-
unknown: this.unknown,
|
|
377
|
-
ignoreFiles: this.ignoreFiles,
|
|
378
|
-
ignorePaths: this.ignorePaths,
|
|
379
|
-
verbose: this.verbose,
|
|
380
|
-
maxConcurrency: this.maxConcurrency,
|
|
381
|
-
maxRetries: this.maxRetries,
|
|
382
|
-
});
|
|
383
|
-
yield* await submoduleLoader.processRepoAsStream(submoduleInfo.path);
|
|
384
|
-
}
|
|
385
|
-
/**
|
|
386
|
-
* Determines whether a file or directory should be ignored based on its
|
|
387
|
-
* path and type.
|
|
388
|
-
* @param path The path of the file or directory.
|
|
389
|
-
* @param fileType The type of the file or directory.
|
|
390
|
-
* @returns A boolean indicating whether the file or directory should be ignored.
|
|
391
|
-
*/
|
|
392
|
-
shouldIgnore(path, fileType) {
|
|
393
|
-
if (fileType !== "dir" && isBinaryPath(path)) {
|
|
394
|
-
return true;
|
|
395
|
-
}
|
|
396
|
-
if (this.ignore !== undefined) {
|
|
397
|
-
return this.ignore.ignores(path);
|
|
398
|
-
}
|
|
399
|
-
return (fileType !== "dir" &&
|
|
400
|
-
this.ignoreFiles.some((pattern) => {
|
|
401
|
-
if (typeof pattern === "string") {
|
|
402
|
-
return path === pattern;
|
|
403
|
-
}
|
|
404
|
-
try {
|
|
405
|
-
return pattern.test(path);
|
|
406
|
-
}
|
|
407
|
-
catch {
|
|
408
|
-
throw new Error(`Unknown ignore file pattern: ${pattern}`);
|
|
409
|
-
}
|
|
410
|
-
}));
|
|
411
|
-
}
|
|
412
|
-
/**
|
|
413
|
-
* Takes the file info and wrap it in a promise that will resolve to the file content and metadata
|
|
414
|
-
* @param file
|
|
415
|
-
* @returns
|
|
416
|
-
*/
|
|
417
|
-
async fetchFileContentWrapper(file) {
|
|
418
|
-
const fileContent = await this.fetchFileContent(file).catch((error) => {
|
|
419
|
-
this.handleError(`Failed wrap file content: ${file}, ${error}`);
|
|
420
|
-
});
|
|
421
|
-
return {
|
|
422
|
-
contents: fileContent || "",
|
|
423
|
-
metadata: {
|
|
424
|
-
source: file.path,
|
|
425
|
-
repository: `${this.baseUrl}/${this.owner}/${this.repo}`,
|
|
426
|
-
branch: this.branch,
|
|
427
|
-
},
|
|
428
|
-
};
|
|
429
|
-
}
|
|
430
|
-
/**
|
|
431
|
-
* Maps a list of files / directories to a list of promises that will fetch the file / directory contents
|
|
432
|
-
*/
|
|
433
|
-
async getCurrentDirectoryFilePromises(files) {
|
|
434
|
-
const currentDirectoryFilePromises = [];
|
|
435
|
-
// Directories have nested files / directories, which is why this is a list of promises of promises
|
|
436
|
-
const currentDirectoryDirectoryPromises = [];
|
|
437
|
-
for (const file of files) {
|
|
438
|
-
if (this.shouldIgnore(file.path, file.type)) {
|
|
439
|
-
continue;
|
|
440
|
-
}
|
|
441
|
-
if (file.type === "file" && file.size === 0) {
|
|
442
|
-
// this is a submodule. ignoring for the moment. submodule processing is done separately
|
|
443
|
-
continue;
|
|
444
|
-
}
|
|
445
|
-
if (file.type !== "dir") {
|
|
446
|
-
try {
|
|
447
|
-
currentDirectoryFilePromises.push(this.fetchFileContentWrapper(file));
|
|
448
|
-
}
|
|
449
|
-
catch (e) {
|
|
450
|
-
this.handleError(`Failed to fetch file content: ${file.path}, ${e}`);
|
|
451
|
-
}
|
|
452
|
-
}
|
|
453
|
-
else if (this.recursive) {
|
|
454
|
-
currentDirectoryDirectoryPromises.push(this.processDirectory(file.path));
|
|
455
|
-
}
|
|
456
|
-
}
|
|
457
|
-
const curDirDirectories = await Promise.all(currentDirectoryDirectoryPromises);
|
|
458
|
-
return [...currentDirectoryFilePromises, ...curDirDirectories.flat()];
|
|
459
|
-
}
|
|
460
|
-
/**
|
|
461
|
-
* Begins the process of fetching the contents of the repository
|
|
462
|
-
*/
|
|
463
|
-
async processRepo() {
|
|
464
|
-
try {
|
|
465
|
-
// Get the list of file / directory names in the root directory
|
|
466
|
-
const files = await this.fetchRepoFiles(this.initialPath);
|
|
467
|
-
// Map the file / directory paths to promises that will fetch the file / directory contents
|
|
468
|
-
const currentDirectoryFilePromises = await this.getCurrentDirectoryFilePromises(files);
|
|
469
|
-
return Promise.all(currentDirectoryFilePromises);
|
|
470
|
-
}
|
|
471
|
-
catch (error) {
|
|
472
|
-
this.handleError(`Failed to process directory: ${this.initialPath}, ${error}`);
|
|
473
|
-
return Promise.reject(error);
|
|
474
|
-
}
|
|
475
|
-
}
|
|
476
|
-
/**
|
|
477
|
-
* Asynchronously processes the contents of the entire GitHub repository,
|
|
478
|
-
* streaming each file as a Document object.
|
|
479
|
-
* @param path The path of the directory to process.
|
|
480
|
-
* @yields Yields a Promise that resolves to a Document object for each file found in the repository.
|
|
481
|
-
*/
|
|
482
|
-
async *processRepoAsStream(path) {
|
|
483
|
-
const files = await this.fetchRepoFiles(path);
|
|
484
|
-
for (const file of files) {
|
|
485
|
-
if (this.shouldIgnore(file.path, file.type)) {
|
|
486
|
-
continue;
|
|
487
|
-
}
|
|
488
|
-
if (file.type === "file") {
|
|
489
|
-
try {
|
|
490
|
-
const fileResponse = await this.fetchFileContentWrapper(file);
|
|
491
|
-
yield new documents_1.Document({
|
|
492
|
-
pageContent: fileResponse.contents,
|
|
493
|
-
metadata: fileResponse.metadata,
|
|
494
|
-
});
|
|
495
|
-
}
|
|
496
|
-
catch (error) {
|
|
497
|
-
this.handleError(`Failed to fetch file content: ${file.path}, ${error}`);
|
|
498
|
-
}
|
|
499
|
-
}
|
|
500
|
-
else if (this.recursive) {
|
|
501
|
-
yield* await this.processDirectoryAsStream(file.path);
|
|
502
|
-
}
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
/**
|
|
506
|
-
* Fetches the contents of a directory and maps the file / directory paths
|
|
507
|
-
* to promises that will fetch the file / directory contents.
|
|
508
|
-
* @param path The path of the directory to process.
|
|
509
|
-
* @returns A promise that resolves to an array of promises that will fetch the file / directory contents.
|
|
510
|
-
*/
|
|
511
|
-
async processDirectory(path) {
|
|
512
|
-
try {
|
|
513
|
-
const files = await this.fetchRepoFiles(path);
|
|
514
|
-
return this.getCurrentDirectoryFilePromises(files);
|
|
515
|
-
}
|
|
516
|
-
catch (error) {
|
|
517
|
-
this.handleError(`Failed to process directory: ${path}, ${error}`);
|
|
518
|
-
return Promise.reject(error);
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
/**
|
|
522
|
-
* Asynchronously processes the contents of a given directory in the GitHub repository,
|
|
523
|
-
* streaming each file as a Document object.
|
|
524
|
-
* @param path The path of the directory to process.
|
|
525
|
-
* @yields Yields a Promise that resolves to a Document object for each file in the directory.
|
|
526
|
-
*/
|
|
527
|
-
async *processDirectoryAsStream(path) {
|
|
528
|
-
const files = await this.fetchRepoFiles(path);
|
|
529
|
-
for (const file of files) {
|
|
530
|
-
if (this.shouldIgnore(file.path, file.type)) {
|
|
531
|
-
continue;
|
|
532
|
-
}
|
|
533
|
-
if (file.type === "file") {
|
|
534
|
-
try {
|
|
535
|
-
const fileResponse = await this.fetchFileContentWrapper(file);
|
|
536
|
-
yield new documents_1.Document({
|
|
537
|
-
pageContent: fileResponse.contents,
|
|
538
|
-
metadata: fileResponse.metadata,
|
|
539
|
-
});
|
|
540
|
-
}
|
|
541
|
-
catch {
|
|
542
|
-
this.handleError(`Failed to fetch file content: ${file.path}`);
|
|
543
|
-
}
|
|
544
|
-
}
|
|
545
|
-
else if (this.recursive) {
|
|
546
|
-
yield* await this.processDirectoryAsStream(file.path);
|
|
547
|
-
}
|
|
548
|
-
}
|
|
549
|
-
}
|
|
550
|
-
/**
|
|
551
|
-
* Fetches the files from a GitHub repository.
|
|
552
|
-
* If the path denotes a single file, the resulting array contains only one element.
|
|
553
|
-
* @param path The path of the repository to fetch the files from.
|
|
554
|
-
* @returns A promise that resolves to an array of GithubFile instances.
|
|
555
|
-
*/
|
|
556
|
-
async fetchRepoFiles(path) {
|
|
557
|
-
const url = `${this.apiUrl}/repos/${this.owner}/${this.repo}/contents/${path}?ref=${this.branch}`;
|
|
558
|
-
return this.caller.call(async () => {
|
|
559
|
-
this.log(`Fetching ${url}`);
|
|
560
|
-
const response = await fetch(url, { headers: this.headers });
|
|
561
|
-
const data = await response.json();
|
|
562
|
-
if (!response.ok) {
|
|
563
|
-
throw new Error(`Unable to fetch repository files: ${response.status} ${JSON.stringify(data)}`);
|
|
564
|
-
}
|
|
565
|
-
if (Array.isArray(data)) {
|
|
566
|
-
return data;
|
|
567
|
-
}
|
|
568
|
-
else {
|
|
569
|
-
return [data];
|
|
570
|
-
}
|
|
571
|
-
});
|
|
572
|
-
}
|
|
573
|
-
/**
|
|
574
|
-
* Fetches the content of a file from a GitHub repository.
|
|
575
|
-
* @param file The file to fetch the content from.
|
|
576
|
-
* @returns A promise that resolves to the content of the file.
|
|
577
|
-
*/
|
|
578
|
-
async fetchFileContent(file) {
|
|
579
|
-
return this.caller.call(async () => {
|
|
580
|
-
this.log(`Fetching ${file.download_url}`);
|
|
581
|
-
const response = await fetch(file.download_url, {
|
|
582
|
-
headers: this.headers,
|
|
583
|
-
});
|
|
584
|
-
return response.text();
|
|
585
|
-
});
|
|
586
|
-
}
|
|
587
|
-
/**
|
|
588
|
-
* Handles errors based on the unknown handling option.
|
|
589
|
-
* @param message The error message.
|
|
590
|
-
* @returns void
|
|
591
|
-
*/
|
|
592
|
-
handleError(message) {
|
|
593
|
-
switch (this.unknown) {
|
|
594
|
-
case directory_js_1.UnknownHandling.Ignore:
|
|
595
|
-
break;
|
|
596
|
-
case directory_js_1.UnknownHandling.Warn:
|
|
597
|
-
console.warn(message);
|
|
598
|
-
break;
|
|
599
|
-
case directory_js_1.UnknownHandling.Error:
|
|
600
|
-
throw new Error(message);
|
|
601
|
-
default:
|
|
602
|
-
throw new Error(`Unknown unknown handling: ${this.unknown}`);
|
|
603
|
-
}
|
|
604
|
-
}
|
|
605
|
-
/**
|
|
606
|
-
* Logs the given message to the console, if parameter 'verbose' is set to true.
|
|
607
|
-
* @param message the message to be logged.
|
|
608
|
-
*/
|
|
609
|
-
log(message) {
|
|
610
|
-
if (this.verbose) {
|
|
611
|
-
console.log(message);
|
|
612
|
-
}
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
exports.GithubRepoLoader = GithubRepoLoader;
|