@hazeljs/rag 0.2.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +192 -0
- package/README.md +504 -0
- package/dist/__tests__/graph/community-detector.test.d.ts +2 -0
- package/dist/__tests__/graph/community-detector.test.d.ts.map +1 -0
- package/dist/__tests__/graph/community-detector.test.js +87 -0
- package/dist/__tests__/graph/community-detector.test.js.map +1 -0
- package/dist/__tests__/graph/community-summarizer.test.d.ts +2 -0
- package/dist/__tests__/graph/community-summarizer.test.d.ts.map +1 -0
- package/dist/__tests__/graph/community-summarizer.test.js +131 -0
- package/dist/__tests__/graph/community-summarizer.test.js.map +1 -0
- package/dist/__tests__/graph/entity-extractor.test.d.ts +2 -0
- package/dist/__tests__/graph/entity-extractor.test.d.ts.map +1 -0
- package/dist/__tests__/graph/entity-extractor.test.js +129 -0
- package/dist/__tests__/graph/entity-extractor.test.js.map +1 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts +2 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts.map +1 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.js +158 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.js.map +1 -0
- package/dist/__tests__/graph/knowledge-graph.test.d.ts +2 -0
- package/dist/__tests__/graph/knowledge-graph.test.d.ts.map +1 -0
- package/dist/__tests__/graph/knowledge-graph.test.js +208 -0
- package/dist/__tests__/graph/knowledge-graph.test.js.map +1 -0
- package/dist/__tests__/loaders/base.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/base.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/base.loader.test.js +114 -0
- package/dist/__tests__/loaders/base.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/csv-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/csv-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/csv-file.loader.test.js +98 -0
- package/dist/__tests__/loaders/csv-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/directory.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/directory.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/directory.loader.test.js +154 -0
- package/dist/__tests__/loaders/directory.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/html-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/html-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/html-file.loader.test.js +93 -0
- package/dist/__tests__/loaders/html-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/json-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/json-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/json-file.loader.test.js +84 -0
- package/dist/__tests__/loaders/json-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.js +83 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/text-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/text-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/text-file.loader.test.js +50 -0
- package/dist/__tests__/loaders/text-file.loader.test.js.map +1 -0
- package/dist/__tests__/rag-pipeline.test.d.ts +2 -0
- package/dist/__tests__/rag-pipeline.test.d.ts.map +1 -0
- package/dist/__tests__/rag-pipeline.test.js +210 -0
- package/dist/__tests__/rag-pipeline.test.js.map +1 -0
- package/dist/__tests__/retrieval/bm25.test.d.ts +2 -0
- package/dist/__tests__/retrieval/bm25.test.d.ts.map +1 -0
- package/dist/__tests__/retrieval/bm25.test.js +86 -0
- package/dist/__tests__/retrieval/bm25.test.js.map +1 -0
- package/dist/__tests__/retrieval/hybrid-search.test.d.ts +2 -0
- package/dist/__tests__/retrieval/hybrid-search.test.d.ts.map +1 -0
- package/dist/__tests__/retrieval/hybrid-search.test.js +85 -0
- package/dist/__tests__/retrieval/hybrid-search.test.js.map +1 -0
- package/dist/__tests__/retrieval/multi-query.test.d.ts +2 -0
- package/dist/__tests__/retrieval/multi-query.test.d.ts.map +1 -0
- package/dist/__tests__/retrieval/multi-query.test.js +90 -0
- package/dist/__tests__/retrieval/multi-query.test.js.map +1 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts +2 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts.map +1 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.js +97 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.js.map +1 -0
- package/dist/__tests__/utils/similarity.test.d.ts +2 -0
- package/dist/__tests__/utils/similarity.test.d.ts.map +1 -0
- package/dist/__tests__/utils/similarity.test.js +47 -0
- package/dist/__tests__/utils/similarity.test.js.map +1 -0
- package/dist/agentic/agentic-rag.service.d.ts +49 -0
- package/dist/agentic/agentic-rag.service.d.ts.map +1 -0
- package/dist/agentic/agentic-rag.service.js +149 -0
- package/dist/agentic/agentic-rag.service.js.map +1 -0
- package/dist/agentic/decorators/active-learning.decorator.d.ts +19 -0
- package/dist/agentic/decorators/active-learning.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/active-learning.decorator.js +98 -0
- package/dist/agentic/decorators/active-learning.decorator.js.map +1 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts +17 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.js +103 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.js.map +1 -0
- package/dist/agentic/decorators/cached.decorator.d.ts +18 -0
- package/dist/agentic/decorators/cached.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/cached.decorator.js +93 -0
- package/dist/agentic/decorators/cached.decorator.js.map +1 -0
- package/dist/agentic/decorators/context-aware.decorator.d.ts +16 -0
- package/dist/agentic/decorators/context-aware.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/context-aware.decorator.js +169 -0
- package/dist/agentic/decorators/context-aware.decorator.js.map +1 -0
- package/dist/agentic/decorators/corrective-rag.decorator.d.ts +16 -0
- package/dist/agentic/decorators/corrective-rag.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/corrective-rag.decorator.js +142 -0
- package/dist/agentic/decorators/corrective-rag.decorator.js.map +1 -0
- package/dist/agentic/decorators/hyde.decorator.d.ts +15 -0
- package/dist/agentic/decorators/hyde.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/hyde.decorator.js +91 -0
- package/dist/agentic/decorators/hyde.decorator.js.map +1 -0
- package/dist/agentic/decorators/index.d.ts +16 -0
- package/dist/agentic/decorators/index.d.ts.map +1 -0
- package/dist/agentic/decorators/index.js +32 -0
- package/dist/agentic/decorators/index.js.map +1 -0
- package/dist/agentic/decorators/multi-hop.decorator.d.ts +15 -0
- package/dist/agentic/decorators/multi-hop.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/multi-hop.decorator.js +109 -0
- package/dist/agentic/decorators/multi-hop.decorator.js.map +1 -0
- package/dist/agentic/decorators/query-planner.decorator.d.ts +20 -0
- package/dist/agentic/decorators/query-planner.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/query-planner.decorator.js +213 -0
- package/dist/agentic/decorators/query-planner.decorator.js.map +1 -0
- package/dist/agentic/decorators/query-rewriter.decorator.d.ts +16 -0
- package/dist/agentic/decorators/query-rewriter.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/query-rewriter.decorator.js +143 -0
- package/dist/agentic/decorators/query-rewriter.decorator.js.map +1 -0
- package/dist/agentic/decorators/self-reflective.decorator.d.ts +20 -0
- package/dist/agentic/decorators/self-reflective.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/self-reflective.decorator.js +189 -0
- package/dist/agentic/decorators/self-reflective.decorator.js.map +1 -0
- package/dist/agentic/decorators/source-verification.decorator.d.ts +15 -0
- package/dist/agentic/decorators/source-verification.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/source-verification.decorator.js +121 -0
- package/dist/agentic/decorators/source-verification.decorator.js.map +1 -0
- package/dist/agentic/index.d.ts +9 -0
- package/dist/agentic/index.d.ts.map +1 -0
- package/dist/agentic/index.js +25 -0
- package/dist/agentic/index.js.map +1 -0
- package/dist/agentic/types.d.ts +210 -0
- package/dist/agentic/types.d.ts.map +1 -0
- package/dist/agentic/types.js +7 -0
- package/dist/agentic/types.js.map +1 -0
- package/dist/decorators/embeddable.decorator.d.ts +31 -0
- package/dist/decorators/embeddable.decorator.d.ts.map +1 -0
- package/dist/decorators/embeddable.decorator.js +44 -0
- package/dist/decorators/embeddable.decorator.js.map +1 -0
- package/dist/decorators/rag.decorator.d.ts +58 -0
- package/dist/decorators/rag.decorator.d.ts.map +1 -0
- package/dist/decorators/rag.decorator.js +78 -0
- package/dist/decorators/rag.decorator.js.map +1 -0
- package/dist/decorators/semantic-search.decorator.d.ts +69 -0
- package/dist/decorators/semantic-search.decorator.d.ts.map +1 -0
- package/dist/decorators/semantic-search.decorator.js +116 -0
- package/dist/decorators/semantic-search.decorator.js.map +1 -0
- package/dist/embeddings/cohere-embeddings.d.ts +33 -0
- package/dist/embeddings/cohere-embeddings.d.ts.map +1 -0
- package/dist/embeddings/cohere-embeddings.js +91 -0
- package/dist/embeddings/cohere-embeddings.js.map +1 -0
- package/dist/embeddings/openai-embeddings.d.ts +21 -0
- package/dist/embeddings/openai-embeddings.d.ts.map +1 -0
- package/dist/embeddings/openai-embeddings.js +53 -0
- package/dist/embeddings/openai-embeddings.js.map +1 -0
- package/dist/graph/community-detector.d.ts +45 -0
- package/dist/graph/community-detector.d.ts.map +1 -0
- package/dist/graph/community-detector.js +153 -0
- package/dist/graph/community-detector.js.map +1 -0
- package/dist/graph/community-summarizer.d.ts +41 -0
- package/dist/graph/community-summarizer.d.ts.map +1 -0
- package/dist/graph/community-summarizer.js +119 -0
- package/dist/graph/community-summarizer.js.map +1 -0
- package/dist/graph/entity-extractor.d.ts +47 -0
- package/dist/graph/entity-extractor.d.ts.map +1 -0
- package/dist/graph/entity-extractor.js +224 -0
- package/dist/graph/entity-extractor.js.map +1 -0
- package/dist/graph/graph-rag-pipeline.d.ts +83 -0
- package/dist/graph/graph-rag-pipeline.d.ts.map +1 -0
- package/dist/graph/graph-rag-pipeline.js +390 -0
- package/dist/graph/graph-rag-pipeline.js.map +1 -0
- package/dist/graph/graph.types.d.ts +186 -0
- package/dist/graph/graph.types.d.ts.map +1 -0
- package/dist/graph/graph.types.js +20 -0
- package/dist/graph/graph.types.js.map +1 -0
- package/dist/graph/index.d.ts +15 -0
- package/dist/graph/index.d.ts.map +1 -0
- package/dist/graph/index.js +31 -0
- package/dist/graph/index.js.map +1 -0
- package/dist/graph/knowledge-graph.d.ts +57 -0
- package/dist/graph/knowledge-graph.d.ts.map +1 -0
- package/dist/graph/knowledge-graph.js +198 -0
- package/dist/graph/knowledge-graph.js.map +1 -0
- package/dist/index.d.ts +29 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +58 -0
- package/dist/index.js.map +1 -0
- package/dist/loaders/base.loader.d.ts +108 -0
- package/dist/loaders/base.loader.d.ts.map +1 -0
- package/dist/loaders/base.loader.js +123 -0
- package/dist/loaders/base.loader.js.map +1 -0
- package/dist/loaders/csv-file.loader.d.ts +61 -0
- package/dist/loaders/csv-file.loader.d.ts.map +1 -0
- package/dist/loaders/csv-file.loader.js +162 -0
- package/dist/loaders/csv-file.loader.js.map +1 -0
- package/dist/loaders/directory.loader.d.ts +67 -0
- package/dist/loaders/directory.loader.d.ts.map +1 -0
- package/dist/loaders/directory.loader.js +163 -0
- package/dist/loaders/directory.loader.js.map +1 -0
- package/dist/loaders/docx.loader.d.ts +52 -0
- package/dist/loaders/docx.loader.d.ts.map +1 -0
- package/dist/loaders/docx.loader.js +110 -0
- package/dist/loaders/docx.loader.js.map +1 -0
- package/dist/loaders/github.loader.d.ts +114 -0
- package/dist/loaders/github.loader.d.ts.map +1 -0
- package/dist/loaders/github.loader.js +217 -0
- package/dist/loaders/github.loader.js.map +1 -0
- package/dist/loaders/html-file.loader.d.ts +55 -0
- package/dist/loaders/html-file.loader.d.ts.map +1 -0
- package/dist/loaders/html-file.loader.js +170 -0
- package/dist/loaders/html-file.loader.js.map +1 -0
- package/dist/loaders/index.d.ts +52 -0
- package/dist/loaders/index.d.ts.map +1 -0
- package/dist/loaders/index.js +61 -0
- package/dist/loaders/index.js.map +1 -0
- package/dist/loaders/json-file.loader.d.ts +51 -0
- package/dist/loaders/json-file.loader.d.ts.map +1 -0
- package/dist/loaders/json-file.loader.js +100 -0
- package/dist/loaders/json-file.loader.js.map +1 -0
- package/dist/loaders/markdown-file.loader.d.ts +61 -0
- package/dist/loaders/markdown-file.loader.d.ts.map +1 -0
- package/dist/loaders/markdown-file.loader.js +148 -0
- package/dist/loaders/markdown-file.loader.js.map +1 -0
- package/dist/loaders/pdf.loader.d.ts +64 -0
- package/dist/loaders/pdf.loader.d.ts.map +1 -0
- package/dist/loaders/pdf.loader.js +163 -0
- package/dist/loaders/pdf.loader.js.map +1 -0
- package/dist/loaders/text-file.loader.d.ts +39 -0
- package/dist/loaders/text-file.loader.d.ts.map +1 -0
- package/dist/loaders/text-file.loader.js +69 -0
- package/dist/loaders/text-file.loader.js.map +1 -0
- package/dist/loaders/web.loader.d.ts +87 -0
- package/dist/loaders/web.loader.d.ts.map +1 -0
- package/dist/loaders/web.loader.js +194 -0
- package/dist/loaders/web.loader.js.map +1 -0
- package/dist/loaders/youtube-transcript.loader.d.ts +92 -0
- package/dist/loaders/youtube-transcript.loader.d.ts.map +1 -0
- package/dist/loaders/youtube-transcript.loader.js +254 -0
- package/dist/loaders/youtube-transcript.loader.js.map +1 -0
- package/dist/memory/index.d.ts +11 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +31 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/memory-manager.d.ts +96 -0
- package/dist/memory/memory-manager.d.ts.map +1 -0
- package/dist/memory/memory-manager.js +369 -0
- package/dist/memory/memory-manager.js.map +1 -0
- package/dist/memory/memory-store.interface.d.ts +73 -0
- package/dist/memory/memory-store.interface.d.ts.map +1 -0
- package/dist/memory/memory-store.interface.js +6 -0
- package/dist/memory/memory-store.interface.js.map +1 -0
- package/dist/memory/stores/buffer-memory.d.ts +47 -0
- package/dist/memory/stores/buffer-memory.d.ts.map +1 -0
- package/dist/memory/stores/buffer-memory.js +280 -0
- package/dist/memory/stores/buffer-memory.js.map +1 -0
- package/dist/memory/stores/hybrid-memory.d.ts +49 -0
- package/dist/memory/stores/hybrid-memory.d.ts.map +1 -0
- package/dist/memory/stores/hybrid-memory.js +194 -0
- package/dist/memory/stores/hybrid-memory.js.map +1 -0
- package/dist/memory/stores/vector-memory.d.ts +48 -0
- package/dist/memory/stores/vector-memory.d.ts.map +1 -0
- package/dist/memory/stores/vector-memory.js +312 -0
- package/dist/memory/stores/vector-memory.js.map +1 -0
- package/dist/memory/types.d.ts +119 -0
- package/dist/memory/types.d.ts.map +1 -0
- package/dist/memory/types.js +18 -0
- package/dist/memory/types.js.map +1 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts +8 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.js +27 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.js.map +1 -0
- package/dist/prompts/agentic/corrective-rag.prompt.d.ts +9 -0
- package/dist/prompts/agentic/corrective-rag.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/corrective-rag.prompt.js +23 -0
- package/dist/prompts/agentic/corrective-rag.prompt.js.map +1 -0
- package/dist/prompts/agentic/hyde.prompt.d.ts +9 -0
- package/dist/prompts/agentic/hyde.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/hyde.prompt.js +18 -0
- package/dist/prompts/agentic/hyde.prompt.js.map +1 -0
- package/dist/prompts/agentic/multi-hop.prompt.d.ts +15 -0
- package/dist/prompts/agentic/multi-hop.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/multi-hop.prompt.js +38 -0
- package/dist/prompts/agentic/multi-hop.prompt.js.map +1 -0
- package/dist/prompts/agentic/query-planner.prompt.d.ts +8 -0
- package/dist/prompts/agentic/query-planner.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/query-planner.prompt.js +30 -0
- package/dist/prompts/agentic/query-planner.prompt.js.map +1 -0
- package/dist/prompts/agentic/query-rewriter.prompt.d.ts +10 -0
- package/dist/prompts/agentic/query-rewriter.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/query-rewriter.prompt.js +17 -0
- package/dist/prompts/agentic/query-rewriter.prompt.js.map +1 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts +10 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.js +24 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.js.map +1 -0
- package/dist/prompts/agentic/self-reflective.prompt.d.ts +9 -0
- package/dist/prompts/agentic/self-reflective.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/self-reflective.prompt.js +32 -0
- package/dist/prompts/agentic/self-reflective.prompt.js.map +1 -0
- package/dist/prompts/community-summary.prompt.d.ts +9 -0
- package/dist/prompts/community-summary.prompt.d.ts.map +1 -0
- package/dist/prompts/community-summary.prompt.js +30 -0
- package/dist/prompts/community-summary.prompt.js.map +1 -0
- package/dist/prompts/entity-extraction.prompt.d.ts +10 -0
- package/dist/prompts/entity-extraction.prompt.d.ts.map +1 -0
- package/dist/prompts/entity-extraction.prompt.js +39 -0
- package/dist/prompts/entity-extraction.prompt.js.map +1 -0
- package/dist/prompts/graph-search.prompt.d.ts +10 -0
- package/dist/prompts/graph-search.prompt.d.ts.map +1 -0
- package/dist/prompts/graph-search.prompt.js +23 -0
- package/dist/prompts/graph-search.prompt.js.map +1 -0
- package/dist/prompts/index.d.ts +13 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +29 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/prompts/rag-answer.prompt.d.ts +9 -0
- package/dist/prompts/rag-answer.prompt.d.ts.map +1 -0
- package/dist/prompts/rag-answer.prompt.js +20 -0
- package/dist/prompts/rag-answer.prompt.js.map +1 -0
- package/dist/rag-pipeline-with-memory.d.ts +68 -0
- package/dist/rag-pipeline-with-memory.d.ts.map +1 -0
- package/dist/rag-pipeline-with-memory.js +186 -0
- package/dist/rag-pipeline-with-memory.js.map +1 -0
- package/dist/rag-pipeline.d.ts +59 -0
- package/dist/rag-pipeline.d.ts.map +1 -0
- package/dist/rag-pipeline.js +181 -0
- package/dist/rag-pipeline.js.map +1 -0
- package/dist/rag.module.d.ts +26 -0
- package/dist/rag.module.d.ts.map +1 -0
- package/dist/rag.module.js +40 -0
- package/dist/rag.module.js.map +1 -0
- package/dist/rag.service.d.ts +96 -0
- package/dist/rag.service.d.ts.map +1 -0
- package/dist/rag.service.js +173 -0
- package/dist/rag.service.js.map +1 -0
- package/dist/retrieval/bm25.d.ts +57 -0
- package/dist/retrieval/bm25.d.ts.map +1 -0
- package/dist/retrieval/bm25.js +106 -0
- package/dist/retrieval/bm25.js.map +1 -0
- package/dist/retrieval/hybrid-search.d.ts +48 -0
- package/dist/retrieval/hybrid-search.d.ts.map +1 -0
- package/dist/retrieval/hybrid-search.js +123 -0
- package/dist/retrieval/hybrid-search.js.map +1 -0
- package/dist/retrieval/multi-query.d.ts +38 -0
- package/dist/retrieval/multi-query.d.ts.map +1 -0
- package/dist/retrieval/multi-query.js +135 -0
- package/dist/retrieval/multi-query.js.map +1 -0
- package/dist/text-splitters/recursive-text-splitter.d.ts +21 -0
- package/dist/text-splitters/recursive-text-splitter.d.ts.map +1 -0
- package/dist/text-splitters/recursive-text-splitter.js +95 -0
- package/dist/text-splitters/recursive-text-splitter.js.map +1 -0
- package/dist/types/index.d.ts +144 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +16 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +16 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +58 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/vector-stores/chroma.store.d.ts +42 -0
- package/dist/vector-stores/chroma.store.d.ts.map +1 -0
- package/dist/vector-stores/chroma.store.js +242 -0
- package/dist/vector-stores/chroma.store.js.map +1 -0
- package/dist/vector-stores/memory-vector-store.d.ts +20 -0
- package/dist/vector-stores/memory-vector-store.d.ts.map +1 -0
- package/dist/vector-stores/memory-vector-store.js +94 -0
- package/dist/vector-stores/memory-vector-store.js.map +1 -0
- package/dist/vector-stores/pinecone.store.d.ts +34 -0
- package/dist/vector-stores/pinecone.store.d.ts.map +1 -0
- package/dist/vector-stores/pinecone.store.js +146 -0
- package/dist/vector-stores/pinecone.store.js.map +1 -0
- package/dist/vector-stores/qdrant.store.d.ts +33 -0
- package/dist/vector-stores/qdrant.store.d.ts.map +1 -0
- package/dist/vector-stores/qdrant.store.js +174 -0
- package/dist/vector-stores/qdrant.store.js.map +1 -0
- package/dist/vector-stores/weaviate.store.d.ts +37 -0
- package/dist/vector-stores/weaviate.store.d.ts.map +1 -0
- package/dist/vector-stores/weaviate.store.js +226 -0
- package/dist/vector-stores/weaviate.store.js.map +1 -0
- package/package.json +146 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* GitHubLoader
|
|
4
|
+
*
|
|
5
|
+
* Loads files from a GitHub repository using the GitHub REST API.
|
|
6
|
+
* No extra npm dependency required — uses Node.js built-in `fetch`.
|
|
7
|
+
*
|
|
8
|
+
* Supports:
|
|
9
|
+
* - Public repositories (no token needed, but rate-limited to 60 req/h)
|
|
10
|
+
* - Private repositories (requires `token`)
|
|
11
|
+
* - Specific branches, tags, or commits via `ref`
|
|
12
|
+
* - Recursive directory traversal with depth limit
|
|
13
|
+
* - Glob-style extension filtering (`includeExtensions`)
|
|
14
|
+
* - Path prefix filtering (`includePaths`)
|
|
15
|
+
*
|
|
16
|
+
* Rate limits:
|
|
17
|
+
* - Unauthenticated: 60 requests per hour per IP
|
|
18
|
+
* - Authenticated: 5,000 requests per hour per token
|
|
19
|
+
* For large repos, always provide a `token`.
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```typescript
|
|
23
|
+
* // Load all Markdown files from a public repo
|
|
24
|
+
* const loader = new GitHubLoader({
|
|
25
|
+
* owner: 'hazeljs',
|
|
26
|
+
* repo: 'hazeljs',
|
|
27
|
+
* ref: 'main',
|
|
28
|
+
* includeExtensions: ['.md'],
|
|
29
|
+
* token: process.env.GITHUB_TOKEN,
|
|
30
|
+
* });
|
|
31
|
+
* const docs = await loader.load();
|
|
32
|
+
* // docs[0].metadata.path === 'README.md'
|
|
33
|
+
* // docs[0].metadata.url === 'https://github.com/hazeljs/hazeljs/blob/main/README.md'
|
|
34
|
+
* ```
|
|
35
|
+
*
|
|
36
|
+
* Load a specific directory only:
|
|
37
|
+
* ```typescript
|
|
38
|
+
* const loader = new GitHubLoader({
|
|
39
|
+
* owner: 'facebook',
|
|
40
|
+
* repo: 'react',
|
|
41
|
+
* ref: 'main',
|
|
42
|
+
* directory: 'packages/react/src',
|
|
43
|
+
* includeExtensions: ['.js', '.ts'],
|
|
44
|
+
* });
|
|
45
|
+
* ```
|
|
46
|
+
*/
|
|
47
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
48
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
49
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
50
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
51
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
52
|
+
};
|
|
53
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
54
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
55
|
+
};
|
|
56
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
57
|
+
exports.GitHubLoader = void 0;
|
|
58
|
+
const base_loader_1 = require("./base.loader");
|
|
59
|
+
let GitHubLoader = class GitHubLoader extends base_loader_1.BaseDocumentLoader {
|
|
60
|
+
constructor(options) {
|
|
61
|
+
super();
|
|
62
|
+
this.apiBase = 'https://api.github.com';
|
|
63
|
+
this.opts = {
|
|
64
|
+
owner: options.owner,
|
|
65
|
+
repo: options.repo,
|
|
66
|
+
ref: options.ref ?? 'main',
|
|
67
|
+
directory: options.directory ?? '',
|
|
68
|
+
includeExtensions: options.includeExtensions ?? [],
|
|
69
|
+
includePaths: options.includePaths ?? [],
|
|
70
|
+
excludePaths: options.excludePaths ?? ['node_modules/', 'dist/', '.git/'],
|
|
71
|
+
maxFiles: options.maxFiles ?? 500,
|
|
72
|
+
maxFileSize: options.maxFileSize ?? 102400,
|
|
73
|
+
token: options.token ?? '',
|
|
74
|
+
timeout: options.timeout ?? 15000,
|
|
75
|
+
metadata: options.metadata ?? {},
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
async load() {
|
|
79
|
+
const treeItems = await this.getTree();
|
|
80
|
+
const filtered = this.filterItems(treeItems);
|
|
81
|
+
if (filtered.length === 0) {
|
|
82
|
+
// eslint-disable-next-line no-console
|
|
83
|
+
console.warn(`[GitHubLoader] No files matched the filter criteria in ${this.opts.owner}/${this.opts.repo}`);
|
|
84
|
+
return [];
|
|
85
|
+
}
|
|
86
|
+
const docs = [];
|
|
87
|
+
for (let i = 0; i < filtered.length && docs.length < this.opts.maxFiles; i++) {
|
|
88
|
+
const item = filtered[i];
|
|
89
|
+
try {
|
|
90
|
+
const content = await this.getFileContent(item);
|
|
91
|
+
if (content !== null) {
|
|
92
|
+
docs.push(this.createDocument(content, {
|
|
93
|
+
source: item.path,
|
|
94
|
+
path: item.path,
|
|
95
|
+
repo: `${this.opts.owner}/${this.opts.repo}`,
|
|
96
|
+
ref: this.opts.ref,
|
|
97
|
+
url: `https://github.com/${this.opts.owner}/${this.opts.repo}/blob/${this.opts.ref}/${item.path}`,
|
|
98
|
+
rawUrl: `https://raw.githubusercontent.com/${this.opts.owner}/${this.opts.repo}/${this.opts.ref}/${item.path}`,
|
|
99
|
+
sha: item.sha,
|
|
100
|
+
loaderType: 'github',
|
|
101
|
+
...this.opts.metadata,
|
|
102
|
+
}));
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
catch (err) {
|
|
106
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
107
|
+
// eslint-disable-next-line no-console
|
|
108
|
+
console.warn(`[GitHubLoader] Skipping ${item.path}: ${message}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return docs;
|
|
112
|
+
}
|
|
113
|
+
// ── Private: GitHub API helpers ──────────────────────────────────────────
|
|
114
|
+
async getTree() {
|
|
115
|
+
// Use the recursive git tree endpoint — one API call for the whole repo
|
|
116
|
+
const url = `${this.apiBase}/repos/${this.opts.owner}/${this.opts.repo}` +
|
|
117
|
+
`/git/trees/${this.opts.ref}?recursive=1`;
|
|
118
|
+
const data = await this.apiGet(url);
|
|
119
|
+
if (data.truncated) {
|
|
120
|
+
// eslint-disable-next-line no-console
|
|
121
|
+
console.warn(`[GitHubLoader] Repository tree was truncated by the GitHub API ` +
|
|
122
|
+
`(too many files). Use \`directory\` or \`includePaths\` to narrow the scope.`);
|
|
123
|
+
}
|
|
124
|
+
let items = data.tree.filter((item) => item.type === 'blob');
|
|
125
|
+
// Restrict to `directory` prefix if set
|
|
126
|
+
if (this.opts.directory) {
|
|
127
|
+
const prefix = this.opts.directory.replace(/\/$/, '') + '/';
|
|
128
|
+
items = items.filter((item) => item.path.startsWith(prefix));
|
|
129
|
+
}
|
|
130
|
+
return items;
|
|
131
|
+
}
|
|
132
|
+
filterItems(items) {
|
|
133
|
+
return items.filter((item) => {
|
|
134
|
+
const path = item.path;
|
|
135
|
+
const ext = '.' + path.split('.').pop().toLowerCase();
|
|
136
|
+
// Extension filter
|
|
137
|
+
if (this.opts.includeExtensions.length > 0 && !this.opts.includeExtensions.includes(ext)) {
|
|
138
|
+
return false;
|
|
139
|
+
}
|
|
140
|
+
// Include path filter
|
|
141
|
+
if (this.opts.includePaths.length > 0 &&
|
|
142
|
+
!this.opts.includePaths.some((prefix) => path.startsWith(prefix))) {
|
|
143
|
+
return false;
|
|
144
|
+
}
|
|
145
|
+
// Exclude path filter
|
|
146
|
+
if (this.opts.excludePaths.some((prefix) => path.startsWith(prefix))) {
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
// File size filter
|
|
150
|
+
if (item.size !== undefined && item.size > this.opts.maxFileSize) {
|
|
151
|
+
// eslint-disable-next-line no-console
|
|
152
|
+
console.warn(`[GitHubLoader] Skipping ${path} — size ${item.size} bytes exceeds maxFileSize ${this.opts.maxFileSize}`);
|
|
153
|
+
return false;
|
|
154
|
+
}
|
|
155
|
+
return true;
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
async getFileContent(item) {
|
|
159
|
+
// Use raw content URL to avoid base64 decoding and extra API calls
|
|
160
|
+
const rawUrl = `https://raw.githubusercontent.com/${this.opts.owner}/${this.opts.repo}` +
|
|
161
|
+
`/${this.opts.ref}/${item.path}`;
|
|
162
|
+
const response = await this.httpGet(rawUrl);
|
|
163
|
+
// Skip binary files
|
|
164
|
+
if (this.isBinary(response)) {
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
return response;
|
|
168
|
+
}
|
|
169
|
+
async apiGet(url) {
|
|
170
|
+
const text = await this.httpGet(url);
|
|
171
|
+
return JSON.parse(text);
|
|
172
|
+
}
|
|
173
|
+
async httpGet(url) {
|
|
174
|
+
const controller = new AbortController();
|
|
175
|
+
const timer = setTimeout(() => controller.abort(), this.opts.timeout);
|
|
176
|
+
const headers = {
|
|
177
|
+
Accept: 'application/vnd.github.v3+json',
|
|
178
|
+
'User-Agent': 'HazelJS-RAG/1.0',
|
|
179
|
+
};
|
|
180
|
+
if (this.opts.token) {
|
|
181
|
+
headers['Authorization'] = `Bearer ${this.opts.token}`;
|
|
182
|
+
}
|
|
183
|
+
const response = await fetch(url, { headers, signal: controller.signal });
|
|
184
|
+
clearTimeout(timer);
|
|
185
|
+
if (!response.ok) {
|
|
186
|
+
const body = await response.text().catch(() => '');
|
|
187
|
+
const hint = response.status === 403
|
|
188
|
+
? ' (rate limited — provide a GitHub token with the `token` option)'
|
|
189
|
+
: response.status === 404
|
|
190
|
+
? ' (repo or path not found — check owner/repo/ref/directory)'
|
|
191
|
+
: '';
|
|
192
|
+
throw new Error(`GitHub API error ${response.status}${hint}: ${body.slice(0, 200)}`);
|
|
193
|
+
}
|
|
194
|
+
return response.text();
|
|
195
|
+
}
|
|
196
|
+
isBinary(text) {
|
|
197
|
+
// Heuristic: if > 10% of the first 512 bytes are non-text codepoints, treat as binary
|
|
198
|
+
const sample = text.slice(0, 512);
|
|
199
|
+
let nonText = 0;
|
|
200
|
+
for (let i = 0; i < sample.length; i++) {
|
|
201
|
+
const code = sample.charCodeAt(i);
|
|
202
|
+
if (code < 9 || (code > 13 && code < 32 && code !== 27)) {
|
|
203
|
+
nonText++;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
return nonText / sample.length > 0.1;
|
|
207
|
+
}
|
|
208
|
+
};
|
|
209
|
+
exports.GitHubLoader = GitHubLoader;
|
|
210
|
+
exports.GitHubLoader = GitHubLoader = __decorate([
|
|
211
|
+
(0, base_loader_1.Loader)({
|
|
212
|
+
name: 'GitHubLoader',
|
|
213
|
+
description: 'Loads files from a GitHub repository using the GitHub REST API.',
|
|
214
|
+
}),
|
|
215
|
+
__metadata("design:paramtypes", [Object])
|
|
216
|
+
], GitHubLoader);
|
|
217
|
+
//# sourceMappingURL=github.loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github.loader.js","sourceRoot":"","sources":["../../src/loaders/github.loader.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4CG;;;;;;;;;;;;AAEH,+CAA2D;AAsEpD,IAAM,YAAY,GAAlB,MAAM,YAAa,SAAQ,gCAAkB;IAIlD,YAAY,OAA4B;QACtC,KAAK,EAAE,CAAC;QAHO,YAAO,GAAG,wBAAwB,CAAC;QAIlD,IAAI,CAAC,IAAI,GAAG;YACV,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,GAAG,EAAE,OAAO,CAAC,GAAG,IAAI,MAAM;YAC1B,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,EAAE;YAClC,iBAAiB,EAAE,OAAO,CAAC,iBAAiB,IAAI,EAAE;YAClD,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,EAAE;YACxC,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,CAAC,eAAe,EAAE,OAAO,EAAE,OAAO,CAAC;YACzE,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,GAAG;YACjC,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,MAAO;YAC3C,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAM;YAClC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,EAAE;SACjC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAE7C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,sCAAsC;YACtC,OAAO,CAAC,IAAI,CACV,0DAA0D,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAC9F,CAAC;YACF,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,IAAI,GAAe,EAAE,CAAC;QAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7E,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;gBAChD,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;oBACrB,IAAI,CAAC,IAAI,CACP,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE;wBAC3B,MAAM,EAAE,IAAI,CAAC,IAAI;wBACjB,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,IAAI,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;wBAC5C,GAAG,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG;wBAClB,GAAG,EAAE,sBAAsB,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,SAAS,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE;wBACjG,MAAM,EAAE,qCAAqC,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE;wBAC9G,GAAG,EAAE,IAAI,CAAC,GAAG;wBACb,UAAU,EAAE,QAAQ;wBACpB,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ;qBACtB,CAAC,CACH,CAAC;gBACJ,CAAC;YACH,CAAC;YAAC,OAAO,GAAY,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACjE,sCAAsC;gBACtC,OAAO,CAAC,IAAI,CAAC,2BAA2B,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC,CAAC;YACnE,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED,4EAA4E;IAEpE,KAAK,CAAC,OAAO;QACnB,wEAAwE;QACxE,MAAM,GAAG,GACP,GAAG,IAAI,CAAC,OAAO,UAAU,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAC5D,cAAc,IAAI,CAAC,IAAI,CAAC,GAAG,cAAc,CAAC;QAE5C,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,CAG3B,GAAG,CAAC,CAAC;QAER,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,sCAAsC;YACtC,OAAO,CAAC,IAAI,CACV,iEAAiE;gBAC/D,8EAA8E,CACjF,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;QAE7D,wCAAwC;QACxC,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC;YAC5D,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;QAC/D,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,WAAW,CAAC,KAAuB;QACzC,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;YAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;YACvB,MAAM,GAAG,GAAG,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAG,CAAC,WAAW,EAAE,CAAC;YAEvD,mBAAmB;YACnB,IAAI,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBACzF,OAAO,KAAK,CAAC;YACf,CAAC;YAED,sBAAsB;YACtB,IACE,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC;gBACjC,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,EACjE,CAAC;gBACD,OAAO,KAAK,CAAC;YACf,CAAC;YAED,sBAAsB;YACtB,IAAI,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;gBACrE,OAAO,KAAK,CAAC;YACf,CAAC;YAED,mBAAmB;YACnB,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;gBACjE,sCAAsC;gBACtC,OAAO,CAAC,IAAI,CACV,2BAA2B,IAAI,WAAW,IAAI,CAAC,IAAI,8BAA8B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CACzG,CAAC;gBACF,OAAO,KAAK,CAAC;YACf,CAAC;YAED,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,cAAc,CAAC,IAAoB;QAC/C,mEAAmE;QACnE,MAAM,MAAM,GACV,qCAAqC,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YACxE,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAE5C,oBAAoB;QACpB,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAEO,KAAK,CAAC,MAAM,CAAI,GAAW;QACjC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACrC,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAM,CAAC;IAC/B,CAAC;IAEO,KAAK,CAAC,OAAO,CAAC,GAAW;QAC/B,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEtE,MAAM,OAAO,GAA2B;YACtC,MAAM,EAAE,gCAAgC;YACxC,YAAY,EAAE,iBAAiB;SAChC,CAAC;QACF,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YACpB,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACzD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QAC1E,YAAY,CAAC,KAAK,CAAC,CAAC;QAEpB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YACnD,MAAM,IAAI,GACR,QAAQ,CAAC,MAAM,KAAK,GAAG;gBACrB,CAAC,CAAC,kEAAkE;gBACpE,CAAC,CAAC,QAAQ,CAAC,MAAM,KAAK,GAAG;oBACvB,CAAC,CAAC,4DAA4D;oBAC9D,CAAC,CAAC,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CAAC,oBAAoB,QAAQ,CAAC,MAAM,GAAG,IAAI,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACvF,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAEO,QAAQ,CAAC,IAAY;QAC3B,sFAAsF;QACtF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAClC,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAClC,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,EAAE,IAAI,IAAI,GAAG,EAAE,IAAI,IAAI,KAAK,EAAE,CAAC,EAAE,CAAC;gBACxD,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QACD,OAAO,OAAO,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC;IACvC,CAAC;CACF,CAAA;AAnMY,oCAAY;uBAAZ,YAAY;IAJxB,IAAA,oBAAM,EAAC;QACN,IAAI,EAAE,cAAc;QACpB,WAAW,EAAE,iEAAiE;KAC/E,CAAC;;GACW,YAAY,CAmMxB"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTMLFileLoader
|
|
3
|
+
*
|
|
4
|
+
* Loads HTML files from disk and converts them to plain text.
|
|
5
|
+
*
|
|
6
|
+
* Uses a built-in tag stripper with no external dependencies.
|
|
7
|
+
* For advanced HTML parsing (CSS selectors, JavaScript rendering), use
|
|
8
|
+
* `WebLoader` with the optional `cheerio` dependency instead.
|
|
9
|
+
*
|
|
10
|
+
* Optionally strips:
|
|
11
|
+
* - `<script>` and `<style>` blocks (content + tags)
|
|
12
|
+
* - HTML comments
|
|
13
|
+
* - All remaining HTML tags (leaving only text nodes)
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```typescript
|
|
17
|
+
* const loader = new HTMLFileLoader({ path: './page.html' });
|
|
18
|
+
* const docs = await loader.load();
|
|
19
|
+
* // docs[0].content === stripped plain text
|
|
20
|
+
* // docs[0].metadata.title === page <title>
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
import { BaseDocumentLoader } from './base.loader';
|
|
24
|
+
import type { Document } from '../types';
|
|
25
|
+
export interface HTMLFileLoaderOptions {
|
|
26
|
+
path?: string;
|
|
27
|
+
paths?: string[];
|
|
28
|
+
/**
|
|
29
|
+
* CSS selector string used to limit extraction to a specific element.
|
|
30
|
+
* Requires the optional `cheerio` peer dependency to be installed.
|
|
31
|
+
* If cheerio is not installed, the full stripped text is returned.
|
|
32
|
+
* @example 'article', 'main', '#content'
|
|
33
|
+
*/
|
|
34
|
+
selector?: string;
|
|
35
|
+
/** Strip <script> and <style> blocks entirely. @default true */
|
|
36
|
+
stripScripts?: boolean;
|
|
37
|
+
/** Normalise multiple blank lines into a single blank line. @default true */
|
|
38
|
+
collapseWhitespace?: boolean;
|
|
39
|
+
/** Extra metadata merged into every document. */
|
|
40
|
+
metadata?: Record<string, unknown>;
|
|
41
|
+
}
|
|
42
|
+
export declare class HTMLFileLoader extends BaseDocumentLoader {
|
|
43
|
+
private readonly paths;
|
|
44
|
+
private readonly selector?;
|
|
45
|
+
private readonly stripScripts;
|
|
46
|
+
private readonly collapseWhitespace;
|
|
47
|
+
private readonly extraMetadata;
|
|
48
|
+
constructor(options: HTMLFileLoaderOptions);
|
|
49
|
+
load(): Promise<Document[]>;
|
|
50
|
+
private extractTitle;
|
|
51
|
+
private stripTags;
|
|
52
|
+
private decodeEntities;
|
|
53
|
+
private clean;
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=html-file.loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-file.loader.d.ts","sourceRoot":"","sources":["../../src/loaders/html-file.loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAIH,OAAO,EAAE,kBAAkB,EAAU,MAAM,eAAe,CAAC;AAC3D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,WAAW,qBAAqB;IACpC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gEAAgE;IAChE,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,6EAA6E;IAC7E,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,qBAMa,cAAe,SAAQ,kBAAkB;IACpD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAW;IACjC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAU;IACvC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAU;IAC7C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAA0B;gBAE5C,OAAO,EAAE,qBAAqB;IAYpC,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;IA4CjC,OAAO,CAAC,YAAY;IAKpB,OAAO,CAAC,SAAS;IAsBjB,OAAO,CAAC,cAAc;IAYtB,OAAO,CAAC,KAAK;CAOd"}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* HTMLFileLoader
|
|
4
|
+
*
|
|
5
|
+
* Loads HTML files from disk and converts them to plain text.
|
|
6
|
+
*
|
|
7
|
+
* Uses a built-in tag stripper with no external dependencies.
|
|
8
|
+
* For advanced HTML parsing (CSS selectors, JavaScript rendering), use
|
|
9
|
+
* `WebLoader` with the optional `cheerio` dependency instead.
|
|
10
|
+
*
|
|
11
|
+
* Optionally strips:
|
|
12
|
+
* - `<script>` and `<style>` blocks (content + tags)
|
|
13
|
+
* - HTML comments
|
|
14
|
+
* - All remaining HTML tags (leaving only text nodes)
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* const loader = new HTMLFileLoader({ path: './page.html' });
|
|
19
|
+
* const docs = await loader.load();
|
|
20
|
+
* // docs[0].content === stripped plain text
|
|
21
|
+
* // docs[0].metadata.title === page <title>
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
25
|
+
if (k2 === undefined) k2 = k;
|
|
26
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
27
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
28
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
29
|
+
}
|
|
30
|
+
Object.defineProperty(o, k2, desc);
|
|
31
|
+
}) : (function(o, m, k, k2) {
|
|
32
|
+
if (k2 === undefined) k2 = k;
|
|
33
|
+
o[k2] = m[k];
|
|
34
|
+
}));
|
|
35
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
36
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
37
|
+
}) : function(o, v) {
|
|
38
|
+
o["default"] = v;
|
|
39
|
+
});
|
|
40
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
41
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
42
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
43
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
44
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
45
|
+
};
|
|
46
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
47
|
+
var ownKeys = function(o) {
|
|
48
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
49
|
+
var ar = [];
|
|
50
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
51
|
+
return ar;
|
|
52
|
+
};
|
|
53
|
+
return ownKeys(o);
|
|
54
|
+
};
|
|
55
|
+
return function (mod) {
|
|
56
|
+
if (mod && mod.__esModule) return mod;
|
|
57
|
+
var result = {};
|
|
58
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
59
|
+
__setModuleDefault(result, mod);
|
|
60
|
+
return result;
|
|
61
|
+
};
|
|
62
|
+
})();
|
|
63
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
64
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
65
|
+
};
|
|
66
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
67
|
+
exports.HTMLFileLoader = void 0;
|
|
68
|
+
const promises_1 = require("fs/promises");
|
|
69
|
+
const path_1 = require("path");
|
|
70
|
+
const base_loader_1 = require("./base.loader");
|
|
71
|
+
let HTMLFileLoader = class HTMLFileLoader extends base_loader_1.BaseDocumentLoader {
|
|
72
|
+
constructor(options) {
|
|
73
|
+
super();
|
|
74
|
+
if (!options.path && (!options.paths || options.paths.length === 0)) {
|
|
75
|
+
throw new Error('HTMLFileLoader: provide at least one path via `path` or `paths`.');
|
|
76
|
+
}
|
|
77
|
+
this.paths = options.paths ?? (options.path ? [options.path] : []);
|
|
78
|
+
this.selector = options.selector;
|
|
79
|
+
this.stripScripts = options.stripScripts ?? true;
|
|
80
|
+
this.collapseWhitespace = options.collapseWhitespace ?? true;
|
|
81
|
+
this.extraMetadata = options.metadata ?? {};
|
|
82
|
+
}
|
|
83
|
+
async load() {
|
|
84
|
+
const docs = [];
|
|
85
|
+
for (const filePath of this.paths) {
|
|
86
|
+
const html = await (0, promises_1.readFile)(filePath, { encoding: 'utf-8' });
|
|
87
|
+
// Try cheerio for selector support, fall back to built-in stripper
|
|
88
|
+
let text;
|
|
89
|
+
let title = '';
|
|
90
|
+
if (this.selector) {
|
|
91
|
+
try {
|
|
92
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
93
|
+
const cheerio = await Promise.resolve(`${'cheerio'}`).then(s => __importStar(require(s))).then((m) => m);
|
|
94
|
+
const $ = (cheerio.load ?? cheerio.default)(html);
|
|
95
|
+
title = $('title').first().text().trim();
|
|
96
|
+
text = this.clean($(this.selector).text());
|
|
97
|
+
}
|
|
98
|
+
catch {
|
|
99
|
+
// cheerio not installed — fall back
|
|
100
|
+
text = this.stripTags(html);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
title = this.extractTitle(html);
|
|
105
|
+
text = this.stripTags(html);
|
|
106
|
+
}
|
|
107
|
+
docs.push(this.createDocument(text, {
|
|
108
|
+
source: (0, path_1.basename)(filePath),
|
|
109
|
+
filePath,
|
|
110
|
+
loaderType: 'html',
|
|
111
|
+
...(title && { title }),
|
|
112
|
+
...this.extraMetadata,
|
|
113
|
+
}));
|
|
114
|
+
}
|
|
115
|
+
return docs;
|
|
116
|
+
}
|
|
117
|
+
// ── Private helpers ──────────────────────────────────────────────────────
|
|
118
|
+
extractTitle(html) {
|
|
119
|
+
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
120
|
+
return match ? match[1].trim() : '';
|
|
121
|
+
}
|
|
122
|
+
stripTags(html) {
|
|
123
|
+
let text = html;
|
|
124
|
+
if (this.stripScripts) {
|
|
125
|
+
// Remove <script> blocks
|
|
126
|
+
text = text.replace(/<script[\s\S]*?<\/script>/gi, ' ');
|
|
127
|
+
// Remove <style> blocks
|
|
128
|
+
text = text.replace(/<style[\s\S]*?<\/style>/gi, ' ');
|
|
129
|
+
}
|
|
130
|
+
// Remove HTML comments
|
|
131
|
+
text = text.replace(/<!--[\s\S]*?-->/g, ' ');
|
|
132
|
+
// Replace block-level tags with newlines for readability
|
|
133
|
+
text = text.replace(/<\/(p|div|h[1-6]|li|tr|br)[^>]*>/gi, '\n');
|
|
134
|
+
// Remove all remaining tags
|
|
135
|
+
text = text.replace(/<[^>]+>/g, ' ');
|
|
136
|
+
// Decode common HTML entities
|
|
137
|
+
text = this.decodeEntities(text);
|
|
138
|
+
return this.clean(text);
|
|
139
|
+
}
|
|
140
|
+
decodeEntities(text) {
|
|
141
|
+
return text
|
|
142
|
+
.replace(/&/g, '&')
|
|
143
|
+
.replace(/</g, '<')
|
|
144
|
+
.replace(/>/g, '>')
|
|
145
|
+
.replace(/"/g, '"')
|
|
146
|
+
.replace(/'/g, "'")
|
|
147
|
+
.replace(/ /g, ' ')
|
|
148
|
+
.replace(/…/g, '...')
|
|
149
|
+
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)));
|
|
150
|
+
}
|
|
151
|
+
clean(text) {
|
|
152
|
+
if (!this.collapseWhitespace)
|
|
153
|
+
return text.trim();
|
|
154
|
+
return text
|
|
155
|
+
.replace(/[ \t]+/g, ' ') // collapse inline spaces
|
|
156
|
+
.replace(/\n{3,}/g, '\n\n') // collapse multiple blank lines
|
|
157
|
+
.trim();
|
|
158
|
+
}
|
|
159
|
+
};
|
|
160
|
+
exports.HTMLFileLoader = HTMLFileLoader;
|
|
161
|
+
exports.HTMLFileLoader = HTMLFileLoader = __decorate([
|
|
162
|
+
(0, base_loader_1.Loader)({
|
|
163
|
+
name: 'HTMLFileLoader',
|
|
164
|
+
description: 'Loads local HTML files and strips tags to produce plain text.',
|
|
165
|
+
extensions: ['.html', '.htm', '.xhtml'],
|
|
166
|
+
mimeTypes: ['text/html', 'application/xhtml+xml'],
|
|
167
|
+
}),
|
|
168
|
+
__metadata("design:paramtypes", [Object])
|
|
169
|
+
], HTMLFileLoader);
|
|
170
|
+
//# sourceMappingURL=html-file.loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-file.loader.js","sourceRoot":"","sources":["../../src/loaders/html-file.loader.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,0CAAuC;AACvC,+BAAgC;AAChC,+CAA2D;AA2BpD,IAAM,cAAc,GAApB,MAAM,cAAe,SAAQ,gCAAkB;IAOpD,YAAY,OAA8B;QACxC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;YACpE,MAAM,IAAI,KAAK,CAAC,kEAAkE,CAAC,CAAC;QACtF,CAAC;QACD,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACnE,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC;QACjD,IAAI,CAAC,kBAAkB,GAAG,OAAO,CAAC,kBAAkB,IAAI,IAAI,CAAC;QAC7D,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IAC9C,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,IAAI,GAAe,EAAE,CAAC;QAE5B,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAClC,MAAM,IAAI,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC,CAAC;YAE7D,mEAAmE;YACnE,IAAI,IAAY,CAAC;YACjB,IAAI,KAAK,GAAG,EAAE,CAAC;YAEf,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,IAAI,CAAC;oBACH,8DAA8D;oBAC9D,MAAM,OAAO,GAAG,MAAM,mBAAO,SAAmB,wCAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAQ,CAAC,CAAC;oBACxE,MAAM,CAAC,GAAG,CACR,OAAO,CAAC,IAAI,IAAK,OAAuD,CAAC,OAAO,CACjF,CAAC,IAAI,CAAC,CAAC;oBACR,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;oBACzC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC7C,CAAC;gBAAC,MAAM,CAAC;oBACP,oCAAoC;oBACpC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;gBAChC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAC9B,CAAC;YAED,IAAI,CAAC,IAAI,CACP,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE;gBACxB,MAAM,EAAE,IAAA,eAAQ,EAAC,QAAQ,CAAC;gBAC1B,QAAQ;gBACR,UAAU,EAAE,MAAM;gBAClB,GAAG,CAAC,KAAK,IAAI,EAAE,KAAK,EAAE,CAAC;gBACvB,GAAG,IAAI,CAAC,aAAa;aACtB,CAAC,CACH,CAAC;QACJ,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED,4EAA4E;IAEpE,YAAY,CAAC,IAAY;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QAC7D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACtC,CAAC;IAEO,SAAS,CAAC,IAAY;QAC5B,IAAI,IAAI,GAAG,IAAI,CAAC;QAEhB,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,yBAAyB;YACzB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC;YACxD,wBAAwB;YACxB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,2BAA2B,EAAE,GAAG,CAAC,CAAC;QACxD,CAAC;QAED,uBAAuB;QACvB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,kBAAkB,EAAE,GAAG,CAAC,CAAC;QAC7C,yDAAyD;QACzD,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,oCAAoC,EAAE,IAAI,CAAC,CAAC;QAChE,4BAA4B;QAC5B,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QACrC,8BAA8B;QAC9B,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAEjC,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,OAAO,IAAI;aACR,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC;aAC3B,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAChF,CAAC;IAEO,KAAK,CAAC,IAAY;QACxB,IAAI,CAAC,IAAI,CAAC,kBAAkB;YAAE,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;QACjD,OAAO,IAAI;aACR,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,yBAAyB;aACjD,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,gCAAgC;aAC3D,IAAI,EAAE,CAAC;IACZ,CAAC;CACF,CAAA;AA7GY,wCAAc;yBAAd,cAAc;IAN1B,IAAA,oBAAM,EAAC;QACN,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,+DAA+D;QAC5E,UAAU,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,CAAC;QACvC,SAAS,EAAE,CAAC,WAAW,EAAE,uBAAuB,CAAC;KAClD,CAAC;;GACW,cAAc,CA6G1B"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @hazeljs/rag — Document Loaders
|
|
3
|
+
*
|
|
4
|
+
* All document loaders that ship with @hazeljs/rag.
|
|
5
|
+
*
|
|
6
|
+
* Built-in loaders (no extra dependencies):
|
|
7
|
+
* - TextFileLoader .txt, .log
|
|
8
|
+
* - JSONFileLoader .json
|
|
9
|
+
* - CSVFileLoader .csv
|
|
10
|
+
* - MarkdownFileLoader .md, .mdx
|
|
11
|
+
* - HTMLFileLoader .html, .htm
|
|
12
|
+
* - DirectoryLoader walks a directory and delegates to the above
|
|
13
|
+
* - WebLoader fetches web pages (Node.js fetch)
|
|
14
|
+
* - YouTubeTranscriptLoader YouTube captions via ytInitialPlayerResponse
|
|
15
|
+
* - GitHubLoader GitHub REST API (no extra dep)
|
|
16
|
+
*
|
|
17
|
+
* Optional-dependency loaders (graceful error if dep not installed):
|
|
18
|
+
* - PdfLoader requires: npm install pdf-parse
|
|
19
|
+
* - DocxLoader requires: npm install mammoth
|
|
20
|
+
* - WebLoader (selector) requires: npm install cheerio (only for the `selector` option)
|
|
21
|
+
*
|
|
22
|
+
* Base class and decorator (extend to build your own):
|
|
23
|
+
* - BaseDocumentLoader
|
|
24
|
+
* - @Loader
|
|
25
|
+
* - DocumentLoaderRegistry
|
|
26
|
+
* - LoaderConfig
|
|
27
|
+
*/
|
|
28
|
+
export { BaseDocumentLoader, Loader, getLoaderConfig, DocumentLoaderRegistry } from './base.loader';
|
|
29
|
+
export type { LoaderConfig } from './base.loader';
|
|
30
|
+
export { TextFileLoader } from './text-file.loader';
|
|
31
|
+
export { JSONFileLoader } from './json-file.loader';
|
|
32
|
+
export { CSVFileLoader } from './csv-file.loader';
|
|
33
|
+
export { MarkdownFileLoader } from './markdown-file.loader';
|
|
34
|
+
export { HTMLFileLoader } from './html-file.loader';
|
|
35
|
+
export { DirectoryLoader } from './directory.loader';
|
|
36
|
+
export { PdfLoader } from './pdf.loader';
|
|
37
|
+
export { DocxLoader } from './docx.loader';
|
|
38
|
+
export { WebLoader } from './web.loader';
|
|
39
|
+
export { YouTubeTranscriptLoader } from './youtube-transcript.loader';
|
|
40
|
+
export { GitHubLoader } from './github.loader';
|
|
41
|
+
export type { TextFileLoaderOptions } from './text-file.loader';
|
|
42
|
+
export type { JSONFileLoaderOptions } from './json-file.loader';
|
|
43
|
+
export type { CSVFileLoaderOptions } from './csv-file.loader';
|
|
44
|
+
export type { MarkdownFileLoaderOptions } from './markdown-file.loader';
|
|
45
|
+
export type { HTMLFileLoaderOptions } from './html-file.loader';
|
|
46
|
+
export type { DirectoryLoaderOptions, LoaderFactory } from './directory.loader';
|
|
47
|
+
export type { PdfLoaderOptions } from './pdf.loader';
|
|
48
|
+
export type { DocxLoaderOptions } from './docx.loader';
|
|
49
|
+
export type { WebLoaderOptions } from './web.loader';
|
|
50
|
+
export type { YouTubeTranscriptLoaderOptions } from './youtube-transcript.loader';
|
|
51
|
+
export type { GitHubLoaderOptions } from './github.loader';
|
|
52
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/loaders/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC;AACpG,YAAY,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAGlD,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAGrD,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAG3C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,uBAAuB,EAAE,MAAM,6BAA6B,CAAC;AACtE,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAG/C,YAAY,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAChE,YAAY,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAChE,YAAY,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AAC9D,YAAY,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AACxE,YAAY,EAAE,qBAAqB,EAAE,MAAM,oBAAoB,CAAC;AAChE,YAAY,EAAE,sBAAsB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAChF,YAAY,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AACrD,YAAY,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AACvD,YAAY,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AACrD,YAAY,EAAE,8BAA8B,EAAE,MAAM,6BAA6B,CAAC;AAClF,YAAY,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* @hazeljs/rag — Document Loaders
|
|
4
|
+
*
|
|
5
|
+
* All document loaders that ship with @hazeljs/rag.
|
|
6
|
+
*
|
|
7
|
+
* Built-in loaders (no extra dependencies):
|
|
8
|
+
* - TextFileLoader .txt, .log
|
|
9
|
+
* - JSONFileLoader .json
|
|
10
|
+
* - CSVFileLoader .csv
|
|
11
|
+
* - MarkdownFileLoader .md, .mdx
|
|
12
|
+
* - HTMLFileLoader .html, .htm
|
|
13
|
+
* - DirectoryLoader walks a directory and delegates to the above
|
|
14
|
+
* - WebLoader fetches web pages (Node.js fetch)
|
|
15
|
+
* - YouTubeTranscriptLoader YouTube captions via ytInitialPlayerResponse
|
|
16
|
+
* - GitHubLoader GitHub REST API (no extra dep)
|
|
17
|
+
*
|
|
18
|
+
* Optional-dependency loaders (graceful error if dep not installed):
|
|
19
|
+
* - PdfLoader requires: npm install pdf-parse
|
|
20
|
+
* - DocxLoader requires: npm install mammoth
|
|
21
|
+
* - WebLoader (selector) requires: npm install cheerio (only for the `selector` option)
|
|
22
|
+
*
|
|
23
|
+
* Base class and decorator (extend to build your own):
|
|
24
|
+
* - BaseDocumentLoader
|
|
25
|
+
* - @Loader
|
|
26
|
+
* - DocumentLoaderRegistry
|
|
27
|
+
* - LoaderConfig
|
|
28
|
+
*/
|
|
29
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
30
|
+
exports.GitHubLoader = exports.YouTubeTranscriptLoader = exports.WebLoader = exports.DocxLoader = exports.PdfLoader = exports.DirectoryLoader = exports.HTMLFileLoader = exports.MarkdownFileLoader = exports.CSVFileLoader = exports.JSONFileLoader = exports.TextFileLoader = exports.DocumentLoaderRegistry = exports.getLoaderConfig = exports.Loader = exports.BaseDocumentLoader = void 0;
|
|
31
|
+
var base_loader_1 = require("./base.loader");
|
|
32
|
+
Object.defineProperty(exports, "BaseDocumentLoader", { enumerable: true, get: function () { return base_loader_1.BaseDocumentLoader; } });
|
|
33
|
+
Object.defineProperty(exports, "Loader", { enumerable: true, get: function () { return base_loader_1.Loader; } });
|
|
34
|
+
Object.defineProperty(exports, "getLoaderConfig", { enumerable: true, get: function () { return base_loader_1.getLoaderConfig; } });
|
|
35
|
+
Object.defineProperty(exports, "DocumentLoaderRegistry", { enumerable: true, get: function () { return base_loader_1.DocumentLoaderRegistry; } });
|
|
36
|
+
// ── File-system loaders ───────────────────────────────────────────────────
|
|
37
|
+
var text_file_loader_1 = require("./text-file.loader");
|
|
38
|
+
Object.defineProperty(exports, "TextFileLoader", { enumerable: true, get: function () { return text_file_loader_1.TextFileLoader; } });
|
|
39
|
+
var json_file_loader_1 = require("./json-file.loader");
|
|
40
|
+
Object.defineProperty(exports, "JSONFileLoader", { enumerable: true, get: function () { return json_file_loader_1.JSONFileLoader; } });
|
|
41
|
+
var csv_file_loader_1 = require("./csv-file.loader");
|
|
42
|
+
Object.defineProperty(exports, "CSVFileLoader", { enumerable: true, get: function () { return csv_file_loader_1.CSVFileLoader; } });
|
|
43
|
+
var markdown_file_loader_1 = require("./markdown-file.loader");
|
|
44
|
+
Object.defineProperty(exports, "MarkdownFileLoader", { enumerable: true, get: function () { return markdown_file_loader_1.MarkdownFileLoader; } });
|
|
45
|
+
var html_file_loader_1 = require("./html-file.loader");
|
|
46
|
+
Object.defineProperty(exports, "HTMLFileLoader", { enumerable: true, get: function () { return html_file_loader_1.HTMLFileLoader; } });
|
|
47
|
+
var directory_loader_1 = require("./directory.loader");
|
|
48
|
+
Object.defineProperty(exports, "DirectoryLoader", { enumerable: true, get: function () { return directory_loader_1.DirectoryLoader; } });
|
|
49
|
+
// ── Format loaders (optional deps) ───────────────────────────────────────
|
|
50
|
+
var pdf_loader_1 = require("./pdf.loader");
|
|
51
|
+
Object.defineProperty(exports, "PdfLoader", { enumerable: true, get: function () { return pdf_loader_1.PdfLoader; } });
|
|
52
|
+
var docx_loader_1 = require("./docx.loader");
|
|
53
|
+
Object.defineProperty(exports, "DocxLoader", { enumerable: true, get: function () { return docx_loader_1.DocxLoader; } });
|
|
54
|
+
// ── Remote loaders ────────────────────────────────────────────────────────
|
|
55
|
+
var web_loader_1 = require("./web.loader");
|
|
56
|
+
Object.defineProperty(exports, "WebLoader", { enumerable: true, get: function () { return web_loader_1.WebLoader; } });
|
|
57
|
+
var youtube_transcript_loader_1 = require("./youtube-transcript.loader");
|
|
58
|
+
Object.defineProperty(exports, "YouTubeTranscriptLoader", { enumerable: true, get: function () { return youtube_transcript_loader_1.YouTubeTranscriptLoader; } });
|
|
59
|
+
var github_loader_1 = require("./github.loader");
|
|
60
|
+
Object.defineProperty(exports, "GitHubLoader", { enumerable: true, get: function () { return github_loader_1.GitHubLoader; } });
|
|
61
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/loaders/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;;;AAEH,6CAAoG;AAA3F,iHAAA,kBAAkB,OAAA;AAAE,qGAAA,MAAM,OAAA;AAAE,8GAAA,eAAe,OAAA;AAAE,qHAAA,sBAAsB,OAAA;AAG5E,6EAA6E;AAC7E,uDAAoD;AAA3C,kHAAA,cAAc,OAAA;AACvB,uDAAoD;AAA3C,kHAAA,cAAc,OAAA;AACvB,qDAAkD;AAAzC,gHAAA,aAAa,OAAA;AACtB,+DAA4D;AAAnD,0HAAA,kBAAkB,OAAA;AAC3B,uDAAoD;AAA3C,kHAAA,cAAc,OAAA;AACvB,uDAAqD;AAA5C,mHAAA,eAAe,OAAA;AAExB,4EAA4E;AAC5E,2CAAyC;AAAhC,uGAAA,SAAS,OAAA;AAClB,6CAA2C;AAAlC,yGAAA,UAAU,OAAA;AAEnB,6EAA6E;AAC7E,2CAAyC;AAAhC,uGAAA,SAAS,OAAA;AAClB,yEAAsE;AAA7D,oIAAA,uBAAuB,OAAA;AAChC,iDAA+C;AAAtC,6GAAA,YAAY,OAAA"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSONFileLoader
|
|
3
|
+
*
|
|
4
|
+
* Loads a JSON file and converts it to documents.
|
|
5
|
+
*
|
|
6
|
+
* Supports two shapes:
|
|
7
|
+
* - **Array mode** (default) — if the root value is an array, each element
|
|
8
|
+
* becomes a document. The element's text is either the value of `textKey`
|
|
9
|
+
* (a specific field) or the full JSON stringification of the element.
|
|
10
|
+
* - **Object mode** — a single root object becomes one document.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* // articles.json = [{ "title": "...", "body": "..." }, ...]
|
|
15
|
+
* const loader = new JSONFileLoader({
|
|
16
|
+
* path: './articles.json',
|
|
17
|
+
* textKey: 'body', // use "body" field as content
|
|
18
|
+
* metadataKeys: ['title'], // include "title" in metadata
|
|
19
|
+
* });
|
|
20
|
+
* const docs = await loader.load();
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
import { BaseDocumentLoader } from './base.loader';
|
|
24
|
+
import type { Document } from '../types';
|
|
25
|
+
export interface JSONFileLoaderOptions {
|
|
26
|
+
/** Path to the JSON file. */
|
|
27
|
+
path: string;
|
|
28
|
+
/**
|
|
29
|
+
* Key in each array element (or root object) whose value becomes the
|
|
30
|
+
* document content. If omitted, the entire element is JSON-stringified.
|
|
31
|
+
*/
|
|
32
|
+
textKey?: string;
|
|
33
|
+
/**
|
|
34
|
+
* Keys to extract from each element into the document metadata.
|
|
35
|
+
* All other keys are omitted from metadata.
|
|
36
|
+
*/
|
|
37
|
+
metadataKeys?: string[];
|
|
38
|
+
/** Extra metadata merged into every document. */
|
|
39
|
+
metadata?: Record<string, unknown>;
|
|
40
|
+
/**
|
|
41
|
+
* JSON pointer (dot-separated path) to the array within the JSON file.
|
|
42
|
+
* @example 'data.results' → reads `json.data.results`
|
|
43
|
+
*/
|
|
44
|
+
jsonPointer?: string;
|
|
45
|
+
}
|
|
46
|
+
export declare class JSONFileLoader extends BaseDocumentLoader {
|
|
47
|
+
private readonly opts;
|
|
48
|
+
constructor(options: JSONFileLoaderOptions);
|
|
49
|
+
load(): Promise<Document[]>;
|
|
50
|
+
}
|
|
51
|
+
//# sourceMappingURL=json-file.loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-file.loader.d.ts","sourceRoot":"","sources":["../../src/loaders/json-file.loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAIH,OAAO,EAAE,kBAAkB,EAAU,MAAM,eAAe,CAAC;AAC3D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,WAAW,qBAAqB;IACpC,6BAA6B;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAMa,cAAe,SAAQ,kBAAkB;IACpD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAGnB;gBAEU,OAAO,EAAE,qBAAqB;IAWpC,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;CAkDlC"}
|