@hazeljs/rag 0.2.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +192 -0
- package/README.md +504 -0
- package/dist/__tests__/graph/community-detector.test.d.ts +2 -0
- package/dist/__tests__/graph/community-detector.test.d.ts.map +1 -0
- package/dist/__tests__/graph/community-detector.test.js +87 -0
- package/dist/__tests__/graph/community-detector.test.js.map +1 -0
- package/dist/__tests__/graph/community-summarizer.test.d.ts +2 -0
- package/dist/__tests__/graph/community-summarizer.test.d.ts.map +1 -0
- package/dist/__tests__/graph/community-summarizer.test.js +131 -0
- package/dist/__tests__/graph/community-summarizer.test.js.map +1 -0
- package/dist/__tests__/graph/entity-extractor.test.d.ts +2 -0
- package/dist/__tests__/graph/entity-extractor.test.d.ts.map +1 -0
- package/dist/__tests__/graph/entity-extractor.test.js +129 -0
- package/dist/__tests__/graph/entity-extractor.test.js.map +1 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts +2 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts.map +1 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.js +158 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.js.map +1 -0
- package/dist/__tests__/graph/knowledge-graph.test.d.ts +2 -0
- package/dist/__tests__/graph/knowledge-graph.test.d.ts.map +1 -0
- package/dist/__tests__/graph/knowledge-graph.test.js +208 -0
- package/dist/__tests__/graph/knowledge-graph.test.js.map +1 -0
- package/dist/__tests__/loaders/base.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/base.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/base.loader.test.js +114 -0
- package/dist/__tests__/loaders/base.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/csv-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/csv-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/csv-file.loader.test.js +98 -0
- package/dist/__tests__/loaders/csv-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/directory.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/directory.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/directory.loader.test.js +154 -0
- package/dist/__tests__/loaders/directory.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/html-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/html-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/html-file.loader.test.js +93 -0
- package/dist/__tests__/loaders/html-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/json-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/json-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/json-file.loader.test.js +84 -0
- package/dist/__tests__/loaders/json-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.js +83 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/text-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/text-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/text-file.loader.test.js +50 -0
- package/dist/__tests__/loaders/text-file.loader.test.js.map +1 -0
- package/dist/__tests__/rag-pipeline.test.d.ts +2 -0
- package/dist/__tests__/rag-pipeline.test.d.ts.map +1 -0
- package/dist/__tests__/rag-pipeline.test.js +210 -0
- package/dist/__tests__/rag-pipeline.test.js.map +1 -0
- package/dist/__tests__/retrieval/bm25.test.d.ts +2 -0
- package/dist/__tests__/retrieval/bm25.test.d.ts.map +1 -0
- package/dist/__tests__/retrieval/bm25.test.js +86 -0
- package/dist/__tests__/retrieval/bm25.test.js.map +1 -0
- package/dist/__tests__/retrieval/hybrid-search.test.d.ts +2 -0
- package/dist/__tests__/retrieval/hybrid-search.test.d.ts.map +1 -0
- package/dist/__tests__/retrieval/hybrid-search.test.js +85 -0
- package/dist/__tests__/retrieval/hybrid-search.test.js.map +1 -0
- package/dist/__tests__/retrieval/multi-query.test.d.ts +2 -0
- package/dist/__tests__/retrieval/multi-query.test.d.ts.map +1 -0
- package/dist/__tests__/retrieval/multi-query.test.js +90 -0
- package/dist/__tests__/retrieval/multi-query.test.js.map +1 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts +2 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts.map +1 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.js +97 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.js.map +1 -0
- package/dist/__tests__/utils/similarity.test.d.ts +2 -0
- package/dist/__tests__/utils/similarity.test.d.ts.map +1 -0
- package/dist/__tests__/utils/similarity.test.js +47 -0
- package/dist/__tests__/utils/similarity.test.js.map +1 -0
- package/dist/agentic/agentic-rag.service.d.ts +49 -0
- package/dist/agentic/agentic-rag.service.d.ts.map +1 -0
- package/dist/agentic/agentic-rag.service.js +149 -0
- package/dist/agentic/agentic-rag.service.js.map +1 -0
- package/dist/agentic/decorators/active-learning.decorator.d.ts +19 -0
- package/dist/agentic/decorators/active-learning.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/active-learning.decorator.js +98 -0
- package/dist/agentic/decorators/active-learning.decorator.js.map +1 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts +17 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.js +103 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.js.map +1 -0
- package/dist/agentic/decorators/cached.decorator.d.ts +18 -0
- package/dist/agentic/decorators/cached.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/cached.decorator.js +93 -0
- package/dist/agentic/decorators/cached.decorator.js.map +1 -0
- package/dist/agentic/decorators/context-aware.decorator.d.ts +16 -0
- package/dist/agentic/decorators/context-aware.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/context-aware.decorator.js +169 -0
- package/dist/agentic/decorators/context-aware.decorator.js.map +1 -0
- package/dist/agentic/decorators/corrective-rag.decorator.d.ts +16 -0
- package/dist/agentic/decorators/corrective-rag.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/corrective-rag.decorator.js +142 -0
- package/dist/agentic/decorators/corrective-rag.decorator.js.map +1 -0
- package/dist/agentic/decorators/hyde.decorator.d.ts +15 -0
- package/dist/agentic/decorators/hyde.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/hyde.decorator.js +91 -0
- package/dist/agentic/decorators/hyde.decorator.js.map +1 -0
- package/dist/agentic/decorators/index.d.ts +16 -0
- package/dist/agentic/decorators/index.d.ts.map +1 -0
- package/dist/agentic/decorators/index.js +32 -0
- package/dist/agentic/decorators/index.js.map +1 -0
- package/dist/agentic/decorators/multi-hop.decorator.d.ts +15 -0
- package/dist/agentic/decorators/multi-hop.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/multi-hop.decorator.js +109 -0
- package/dist/agentic/decorators/multi-hop.decorator.js.map +1 -0
- package/dist/agentic/decorators/query-planner.decorator.d.ts +20 -0
- package/dist/agentic/decorators/query-planner.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/query-planner.decorator.js +213 -0
- package/dist/agentic/decorators/query-planner.decorator.js.map +1 -0
- package/dist/agentic/decorators/query-rewriter.decorator.d.ts +16 -0
- package/dist/agentic/decorators/query-rewriter.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/query-rewriter.decorator.js +143 -0
- package/dist/agentic/decorators/query-rewriter.decorator.js.map +1 -0
- package/dist/agentic/decorators/self-reflective.decorator.d.ts +20 -0
- package/dist/agentic/decorators/self-reflective.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/self-reflective.decorator.js +189 -0
- package/dist/agentic/decorators/self-reflective.decorator.js.map +1 -0
- package/dist/agentic/decorators/source-verification.decorator.d.ts +15 -0
- package/dist/agentic/decorators/source-verification.decorator.d.ts.map +1 -0
- package/dist/agentic/decorators/source-verification.decorator.js +121 -0
- package/dist/agentic/decorators/source-verification.decorator.js.map +1 -0
- package/dist/agentic/index.d.ts +9 -0
- package/dist/agentic/index.d.ts.map +1 -0
- package/dist/agentic/index.js +25 -0
- package/dist/agentic/index.js.map +1 -0
- package/dist/agentic/types.d.ts +210 -0
- package/dist/agentic/types.d.ts.map +1 -0
- package/dist/agentic/types.js +7 -0
- package/dist/agentic/types.js.map +1 -0
- package/dist/decorators/embeddable.decorator.d.ts +31 -0
- package/dist/decorators/embeddable.decorator.d.ts.map +1 -0
- package/dist/decorators/embeddable.decorator.js +44 -0
- package/dist/decorators/embeddable.decorator.js.map +1 -0
- package/dist/decorators/rag.decorator.d.ts +58 -0
- package/dist/decorators/rag.decorator.d.ts.map +1 -0
- package/dist/decorators/rag.decorator.js +78 -0
- package/dist/decorators/rag.decorator.js.map +1 -0
- package/dist/decorators/semantic-search.decorator.d.ts +69 -0
- package/dist/decorators/semantic-search.decorator.d.ts.map +1 -0
- package/dist/decorators/semantic-search.decorator.js +116 -0
- package/dist/decorators/semantic-search.decorator.js.map +1 -0
- package/dist/embeddings/cohere-embeddings.d.ts +33 -0
- package/dist/embeddings/cohere-embeddings.d.ts.map +1 -0
- package/dist/embeddings/cohere-embeddings.js +91 -0
- package/dist/embeddings/cohere-embeddings.js.map +1 -0
- package/dist/embeddings/openai-embeddings.d.ts +21 -0
- package/dist/embeddings/openai-embeddings.d.ts.map +1 -0
- package/dist/embeddings/openai-embeddings.js +53 -0
- package/dist/embeddings/openai-embeddings.js.map +1 -0
- package/dist/graph/community-detector.d.ts +45 -0
- package/dist/graph/community-detector.d.ts.map +1 -0
- package/dist/graph/community-detector.js +153 -0
- package/dist/graph/community-detector.js.map +1 -0
- package/dist/graph/community-summarizer.d.ts +41 -0
- package/dist/graph/community-summarizer.d.ts.map +1 -0
- package/dist/graph/community-summarizer.js +119 -0
- package/dist/graph/community-summarizer.js.map +1 -0
- package/dist/graph/entity-extractor.d.ts +47 -0
- package/dist/graph/entity-extractor.d.ts.map +1 -0
- package/dist/graph/entity-extractor.js +224 -0
- package/dist/graph/entity-extractor.js.map +1 -0
- package/dist/graph/graph-rag-pipeline.d.ts +83 -0
- package/dist/graph/graph-rag-pipeline.d.ts.map +1 -0
- package/dist/graph/graph-rag-pipeline.js +390 -0
- package/dist/graph/graph-rag-pipeline.js.map +1 -0
- package/dist/graph/graph.types.d.ts +186 -0
- package/dist/graph/graph.types.d.ts.map +1 -0
- package/dist/graph/graph.types.js +20 -0
- package/dist/graph/graph.types.js.map +1 -0
- package/dist/graph/index.d.ts +15 -0
- package/dist/graph/index.d.ts.map +1 -0
- package/dist/graph/index.js +31 -0
- package/dist/graph/index.js.map +1 -0
- package/dist/graph/knowledge-graph.d.ts +57 -0
- package/dist/graph/knowledge-graph.d.ts.map +1 -0
- package/dist/graph/knowledge-graph.js +198 -0
- package/dist/graph/knowledge-graph.js.map +1 -0
- package/dist/index.d.ts +29 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +58 -0
- package/dist/index.js.map +1 -0
- package/dist/loaders/base.loader.d.ts +108 -0
- package/dist/loaders/base.loader.d.ts.map +1 -0
- package/dist/loaders/base.loader.js +123 -0
- package/dist/loaders/base.loader.js.map +1 -0
- package/dist/loaders/csv-file.loader.d.ts +61 -0
- package/dist/loaders/csv-file.loader.d.ts.map +1 -0
- package/dist/loaders/csv-file.loader.js +162 -0
- package/dist/loaders/csv-file.loader.js.map +1 -0
- package/dist/loaders/directory.loader.d.ts +67 -0
- package/dist/loaders/directory.loader.d.ts.map +1 -0
- package/dist/loaders/directory.loader.js +163 -0
- package/dist/loaders/directory.loader.js.map +1 -0
- package/dist/loaders/docx.loader.d.ts +52 -0
- package/dist/loaders/docx.loader.d.ts.map +1 -0
- package/dist/loaders/docx.loader.js +110 -0
- package/dist/loaders/docx.loader.js.map +1 -0
- package/dist/loaders/github.loader.d.ts +114 -0
- package/dist/loaders/github.loader.d.ts.map +1 -0
- package/dist/loaders/github.loader.js +217 -0
- package/dist/loaders/github.loader.js.map +1 -0
- package/dist/loaders/html-file.loader.d.ts +55 -0
- package/dist/loaders/html-file.loader.d.ts.map +1 -0
- package/dist/loaders/html-file.loader.js +170 -0
- package/dist/loaders/html-file.loader.js.map +1 -0
- package/dist/loaders/index.d.ts +52 -0
- package/dist/loaders/index.d.ts.map +1 -0
- package/dist/loaders/index.js +61 -0
- package/dist/loaders/index.js.map +1 -0
- package/dist/loaders/json-file.loader.d.ts +51 -0
- package/dist/loaders/json-file.loader.d.ts.map +1 -0
- package/dist/loaders/json-file.loader.js +100 -0
- package/dist/loaders/json-file.loader.js.map +1 -0
- package/dist/loaders/markdown-file.loader.d.ts +61 -0
- package/dist/loaders/markdown-file.loader.d.ts.map +1 -0
- package/dist/loaders/markdown-file.loader.js +148 -0
- package/dist/loaders/markdown-file.loader.js.map +1 -0
- package/dist/loaders/pdf.loader.d.ts +64 -0
- package/dist/loaders/pdf.loader.d.ts.map +1 -0
- package/dist/loaders/pdf.loader.js +163 -0
- package/dist/loaders/pdf.loader.js.map +1 -0
- package/dist/loaders/text-file.loader.d.ts +39 -0
- package/dist/loaders/text-file.loader.d.ts.map +1 -0
- package/dist/loaders/text-file.loader.js +69 -0
- package/dist/loaders/text-file.loader.js.map +1 -0
- package/dist/loaders/web.loader.d.ts +87 -0
- package/dist/loaders/web.loader.d.ts.map +1 -0
- package/dist/loaders/web.loader.js +194 -0
- package/dist/loaders/web.loader.js.map +1 -0
- package/dist/loaders/youtube-transcript.loader.d.ts +92 -0
- package/dist/loaders/youtube-transcript.loader.d.ts.map +1 -0
- package/dist/loaders/youtube-transcript.loader.js +254 -0
- package/dist/loaders/youtube-transcript.loader.js.map +1 -0
- package/dist/memory/index.d.ts +11 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +31 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/memory-manager.d.ts +96 -0
- package/dist/memory/memory-manager.d.ts.map +1 -0
- package/dist/memory/memory-manager.js +369 -0
- package/dist/memory/memory-manager.js.map +1 -0
- package/dist/memory/memory-store.interface.d.ts +73 -0
- package/dist/memory/memory-store.interface.d.ts.map +1 -0
- package/dist/memory/memory-store.interface.js +6 -0
- package/dist/memory/memory-store.interface.js.map +1 -0
- package/dist/memory/stores/buffer-memory.d.ts +47 -0
- package/dist/memory/stores/buffer-memory.d.ts.map +1 -0
- package/dist/memory/stores/buffer-memory.js +280 -0
- package/dist/memory/stores/buffer-memory.js.map +1 -0
- package/dist/memory/stores/hybrid-memory.d.ts +49 -0
- package/dist/memory/stores/hybrid-memory.d.ts.map +1 -0
- package/dist/memory/stores/hybrid-memory.js +194 -0
- package/dist/memory/stores/hybrid-memory.js.map +1 -0
- package/dist/memory/stores/vector-memory.d.ts +48 -0
- package/dist/memory/stores/vector-memory.d.ts.map +1 -0
- package/dist/memory/stores/vector-memory.js +312 -0
- package/dist/memory/stores/vector-memory.js.map +1 -0
- package/dist/memory/types.d.ts +119 -0
- package/dist/memory/types.d.ts.map +1 -0
- package/dist/memory/types.js +18 -0
- package/dist/memory/types.js.map +1 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts +8 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.js +27 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.js.map +1 -0
- package/dist/prompts/agentic/corrective-rag.prompt.d.ts +9 -0
- package/dist/prompts/agentic/corrective-rag.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/corrective-rag.prompt.js +23 -0
- package/dist/prompts/agentic/corrective-rag.prompt.js.map +1 -0
- package/dist/prompts/agentic/hyde.prompt.d.ts +9 -0
- package/dist/prompts/agentic/hyde.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/hyde.prompt.js +18 -0
- package/dist/prompts/agentic/hyde.prompt.js.map +1 -0
- package/dist/prompts/agentic/multi-hop.prompt.d.ts +15 -0
- package/dist/prompts/agentic/multi-hop.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/multi-hop.prompt.js +38 -0
- package/dist/prompts/agentic/multi-hop.prompt.js.map +1 -0
- package/dist/prompts/agentic/query-planner.prompt.d.ts +8 -0
- package/dist/prompts/agentic/query-planner.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/query-planner.prompt.js +30 -0
- package/dist/prompts/agentic/query-planner.prompt.js.map +1 -0
- package/dist/prompts/agentic/query-rewriter.prompt.d.ts +10 -0
- package/dist/prompts/agentic/query-rewriter.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/query-rewriter.prompt.js +17 -0
- package/dist/prompts/agentic/query-rewriter.prompt.js.map +1 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts +10 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.js +24 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.js.map +1 -0
- package/dist/prompts/agentic/self-reflective.prompt.d.ts +9 -0
- package/dist/prompts/agentic/self-reflective.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/self-reflective.prompt.js +32 -0
- package/dist/prompts/agentic/self-reflective.prompt.js.map +1 -0
- package/dist/prompts/community-summary.prompt.d.ts +9 -0
- package/dist/prompts/community-summary.prompt.d.ts.map +1 -0
- package/dist/prompts/community-summary.prompt.js +30 -0
- package/dist/prompts/community-summary.prompt.js.map +1 -0
- package/dist/prompts/entity-extraction.prompt.d.ts +10 -0
- package/dist/prompts/entity-extraction.prompt.d.ts.map +1 -0
- package/dist/prompts/entity-extraction.prompt.js +39 -0
- package/dist/prompts/entity-extraction.prompt.js.map +1 -0
- package/dist/prompts/graph-search.prompt.d.ts +10 -0
- package/dist/prompts/graph-search.prompt.d.ts.map +1 -0
- package/dist/prompts/graph-search.prompt.js +23 -0
- package/dist/prompts/graph-search.prompt.js.map +1 -0
- package/dist/prompts/index.d.ts +13 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +29 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/prompts/rag-answer.prompt.d.ts +9 -0
- package/dist/prompts/rag-answer.prompt.d.ts.map +1 -0
- package/dist/prompts/rag-answer.prompt.js +20 -0
- package/dist/prompts/rag-answer.prompt.js.map +1 -0
- package/dist/rag-pipeline-with-memory.d.ts +68 -0
- package/dist/rag-pipeline-with-memory.d.ts.map +1 -0
- package/dist/rag-pipeline-with-memory.js +186 -0
- package/dist/rag-pipeline-with-memory.js.map +1 -0
- package/dist/rag-pipeline.d.ts +59 -0
- package/dist/rag-pipeline.d.ts.map +1 -0
- package/dist/rag-pipeline.js +181 -0
- package/dist/rag-pipeline.js.map +1 -0
- package/dist/rag.module.d.ts +26 -0
- package/dist/rag.module.d.ts.map +1 -0
- package/dist/rag.module.js +40 -0
- package/dist/rag.module.js.map +1 -0
- package/dist/rag.service.d.ts +96 -0
- package/dist/rag.service.d.ts.map +1 -0
- package/dist/rag.service.js +173 -0
- package/dist/rag.service.js.map +1 -0
- package/dist/retrieval/bm25.d.ts +57 -0
- package/dist/retrieval/bm25.d.ts.map +1 -0
- package/dist/retrieval/bm25.js +106 -0
- package/dist/retrieval/bm25.js.map +1 -0
- package/dist/retrieval/hybrid-search.d.ts +48 -0
- package/dist/retrieval/hybrid-search.d.ts.map +1 -0
- package/dist/retrieval/hybrid-search.js +123 -0
- package/dist/retrieval/hybrid-search.js.map +1 -0
- package/dist/retrieval/multi-query.d.ts +38 -0
- package/dist/retrieval/multi-query.d.ts.map +1 -0
- package/dist/retrieval/multi-query.js +135 -0
- package/dist/retrieval/multi-query.js.map +1 -0
- package/dist/text-splitters/recursive-text-splitter.d.ts +21 -0
- package/dist/text-splitters/recursive-text-splitter.d.ts.map +1 -0
- package/dist/text-splitters/recursive-text-splitter.js +95 -0
- package/dist/text-splitters/recursive-text-splitter.js.map +1 -0
- package/dist/types/index.d.ts +144 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +16 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +16 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +58 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/vector-stores/chroma.store.d.ts +42 -0
- package/dist/vector-stores/chroma.store.d.ts.map +1 -0
- package/dist/vector-stores/chroma.store.js +242 -0
- package/dist/vector-stores/chroma.store.js.map +1 -0
- package/dist/vector-stores/memory-vector-store.d.ts +20 -0
- package/dist/vector-stores/memory-vector-store.d.ts.map +1 -0
- package/dist/vector-stores/memory-vector-store.js +94 -0
- package/dist/vector-stores/memory-vector-store.js.map +1 -0
- package/dist/vector-stores/pinecone.store.d.ts +34 -0
- package/dist/vector-stores/pinecone.store.d.ts.map +1 -0
- package/dist/vector-stores/pinecone.store.js +146 -0
- package/dist/vector-stores/pinecone.store.js.map +1 -0
- package/dist/vector-stores/qdrant.store.d.ts +33 -0
- package/dist/vector-stores/qdrant.store.d.ts.map +1 -0
- package/dist/vector-stores/qdrant.store.js +174 -0
- package/dist/vector-stores/qdrant.store.js.map +1 -0
- package/dist/vector-stores/weaviate.store.d.ts +37 -0
- package/dist/vector-stores/weaviate.store.d.ts.map +1 -0
- package/dist/vector-stores/weaviate.store.js +226 -0
- package/dist/vector-stores/weaviate.store.js.map +1 -0
- package/package.json +146 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* DirectoryLoader
|
|
4
|
+
*
|
|
5
|
+
* Walks a directory (optionally recursively) and loads every file whose
|
|
6
|
+
* extension is matched by a registered loader or the `loaders` map.
|
|
7
|
+
*
|
|
8
|
+
* Supported patterns:
|
|
9
|
+
* 1. **Extension map** (recommended) — pass a `loaders` map of
|
|
10
|
+
* `{ '.pdf': (path) => new PdfLoader({ path }) }` to customise which
|
|
11
|
+
* loader handles which extension.
|
|
12
|
+
* 2. **Auto-detect** — falls back to the built-in file loaders for the
|
|
13
|
+
* common types (.txt, .md, .html, .json, .csv) with default options.
|
|
14
|
+
* 3. **Glob filter** — pass `glob` to only load files matching a pattern.
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* const loader = new DirectoryLoader({
|
|
19
|
+
* path: './docs',
|
|
20
|
+
* recursive: true,
|
|
21
|
+
* loaders: {
|
|
22
|
+
* '.txt': (p) => new TextFileLoader({ path: p }),
|
|
23
|
+
* '.md': (p) => new MarkdownFileLoader({ path: p, splitByHeading: 'h2' }),
|
|
24
|
+
* '.pdf': (p) => new PdfLoader({ path: p }),
|
|
25
|
+
* },
|
|
26
|
+
* });
|
|
27
|
+
* const docs = await loader.load();
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
31
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
32
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
33
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
34
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
35
|
+
};
|
|
36
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
37
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
38
|
+
};
|
|
39
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
40
|
+
exports.DirectoryLoader = void 0;
|
|
41
|
+
const promises_1 = require("fs/promises");
|
|
42
|
+
const path_1 = require("path");
|
|
43
|
+
const base_loader_1 = require("./base.loader");
|
|
44
|
+
const text_file_loader_1 = require("./text-file.loader");
|
|
45
|
+
const markdown_file_loader_1 = require("./markdown-file.loader");
|
|
46
|
+
const json_file_loader_1 = require("./json-file.loader");
|
|
47
|
+
const csv_file_loader_1 = require("./csv-file.loader");
|
|
48
|
+
const html_file_loader_1 = require("./html-file.loader");
|
|
49
|
+
let DirectoryLoader = class DirectoryLoader extends base_loader_1.BaseDocumentLoader {
|
|
50
|
+
constructor(options) {
|
|
51
|
+
super();
|
|
52
|
+
this.opts = {
|
|
53
|
+
path: options.path,
|
|
54
|
+
loaders: options.loaders ?? {},
|
|
55
|
+
recursive: options.recursive ?? true,
|
|
56
|
+
includeExtensions: options.includeExtensions ?? [],
|
|
57
|
+
excludeExtensions: options.excludeExtensions ?? [],
|
|
58
|
+
maxFiles: options.maxFiles ?? 1000,
|
|
59
|
+
metadata: options.metadata ?? {},
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
async load() {
|
|
63
|
+
const filePaths = await this.collectFiles(this.opts.path, 0);
|
|
64
|
+
const allDocs = [];
|
|
65
|
+
for (const filePath of filePaths) {
|
|
66
|
+
const loader = this.resolveLoader(filePath);
|
|
67
|
+
if (!loader)
|
|
68
|
+
continue;
|
|
69
|
+
try {
|
|
70
|
+
const docs = await loader.load();
|
|
71
|
+
// Merge directory-level metadata into each document
|
|
72
|
+
for (const doc of docs) {
|
|
73
|
+
doc.metadata = {
|
|
74
|
+
...doc.metadata,
|
|
75
|
+
directoryPath: this.opts.path,
|
|
76
|
+
...this.opts.metadata,
|
|
77
|
+
};
|
|
78
|
+
allDocs.push(doc);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
catch (err) {
|
|
82
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
83
|
+
// eslint-disable-next-line no-console
|
|
84
|
+
console.warn(`[DirectoryLoader] Skipping ${(0, path_1.basename)(filePath)}: ${message}`);
|
|
85
|
+
}
|
|
86
|
+
if (allDocs.length >= this.opts.maxFiles) {
|
|
87
|
+
// eslint-disable-next-line no-console
|
|
88
|
+
console.warn(`[DirectoryLoader] maxFiles limit (${this.opts.maxFiles}) reached. ` +
|
|
89
|
+
`Stopping early. Loaded ${allDocs.length} documents.`);
|
|
90
|
+
break;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return allDocs;
|
|
94
|
+
}
|
|
95
|
+
// ── Private helpers ──────────────────────────────────────────────────────
|
|
96
|
+
async collectFiles(dir, depth) {
|
|
97
|
+
const entries = await (0, promises_1.readdir)(dir);
|
|
98
|
+
const files = [];
|
|
99
|
+
for (const entry of entries) {
|
|
100
|
+
// Skip hidden files/directories
|
|
101
|
+
if (entry.startsWith('.'))
|
|
102
|
+
continue;
|
|
103
|
+
const fullPath = (0, path_1.join)(dir, entry);
|
|
104
|
+
const stats = await (0, promises_1.stat)(fullPath);
|
|
105
|
+
if (stats.isDirectory()) {
|
|
106
|
+
if (this.opts.recursive) {
|
|
107
|
+
const subFiles = await this.collectFiles(fullPath, depth + 1);
|
|
108
|
+
files.push(...subFiles);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
const ext = (0, path_1.extname)(entry).toLowerCase();
|
|
113
|
+
if (this.opts.includeExtensions.length > 0 && !this.opts.includeExtensions.includes(ext)) {
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
if (this.opts.excludeExtensions.includes(ext)) {
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
files.push(fullPath);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return files;
|
|
123
|
+
}
|
|
124
|
+
resolveLoader(filePath) {
|
|
125
|
+
const ext = (0, path_1.extname)(filePath).toLowerCase();
|
|
126
|
+
// 1. Check explicitly configured loaders first
|
|
127
|
+
const factory = this.opts.loaders[ext];
|
|
128
|
+
if (factory)
|
|
129
|
+
return factory(filePath);
|
|
130
|
+
// 2. Auto-detect using built-in loaders for common types
|
|
131
|
+
return this.autoDetectLoader(filePath, ext);
|
|
132
|
+
}
|
|
133
|
+
autoDetectLoader(filePath, ext) {
|
|
134
|
+
switch (ext) {
|
|
135
|
+
case '.txt':
|
|
136
|
+
case '.text':
|
|
137
|
+
case '.log':
|
|
138
|
+
return new text_file_loader_1.TextFileLoader({ path: filePath });
|
|
139
|
+
case '.md':
|
|
140
|
+
case '.markdown':
|
|
141
|
+
case '.mdx':
|
|
142
|
+
return new markdown_file_loader_1.MarkdownFileLoader({ path: filePath });
|
|
143
|
+
case '.json':
|
|
144
|
+
return new json_file_loader_1.JSONFileLoader({ path: filePath });
|
|
145
|
+
case '.csv':
|
|
146
|
+
return new csv_file_loader_1.CSVFileLoader({ path: filePath });
|
|
147
|
+
case '.html':
|
|
148
|
+
case '.htm':
|
|
149
|
+
return new html_file_loader_1.HTMLFileLoader({ path: filePath });
|
|
150
|
+
default:
|
|
151
|
+
return undefined;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
};
|
|
155
|
+
exports.DirectoryLoader = DirectoryLoader;
|
|
156
|
+
exports.DirectoryLoader = DirectoryLoader = __decorate([
|
|
157
|
+
(0, base_loader_1.Loader)({
|
|
158
|
+
name: 'DirectoryLoader',
|
|
159
|
+
description: 'Walks a directory and loads all supported files using registered loaders.',
|
|
160
|
+
}),
|
|
161
|
+
__metadata("design:paramtypes", [Object])
|
|
162
|
+
], DirectoryLoader);
|
|
163
|
+
//# sourceMappingURL=directory.loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"directory.loader.js","sourceRoot":"","sources":["../../src/loaders/directory.loader.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;;;;;;;;;;;;AAEH,0CAA4C;AAC5C,+BAA+C;AAC/C,+CAA2D;AAG3D,yDAAoD;AACpD,iEAA4D;AAC5D,yDAAoD;AACpD,uDAAkD;AAClD,yDAAoD;AAmC7C,IAAM,eAAe,GAArB,MAAM,eAAgB,SAAQ,gCAAkB;IAGrD,YAAY,OAA+B;QACzC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,IAAI,GAAG;YACV,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;YAC9B,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,IAAI;YACpC,iBAAiB,EAAE,OAAO,CAAC,iBAAiB,IAAI,EAAE;YAClD,iBAAiB,EAAE,OAAO,CAAC,iBAAiB,IAAI,EAAE;YAClD,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI;YAClC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,EAAE;SACjC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;YAC5C,IAAI,CAAC,MAAM;gBAAE,SAAS;YAEtB,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBACjC,oDAAoD;gBACpD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;oBACvB,GAAG,CAAC,QAAQ,GAAG;wBACb,GAAG,GAAG,CAAC,QAAQ;wBACf,aAAa,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI;wBAC7B,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ;qBACtB,CAAC;oBACF,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACpB,CAAC;YACH,CAAC;YAAC,OAAO,GAAY,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACjE,sCAAsC;gBACtC,OAAO,CAAC,IAAI,CAAC,8BAA8B,IAAA,eAAQ,EAAC,QAAQ,CAAC,KAAK,OAAO,EAAE,CAAC,CAAC;YAC/E,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACzC,sCAAsC;gBACtC,OAAO,CAAC,IAAI,CACV,qCAAqC,IAAI,CAAC,IAAI,CAAC,QAAQ,aAAa;oBAClE,0BAA0B,OAAO,CAAC,MAAM,aAAa,CACxD,CAAC;gBACF,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,4EAA4E;IAEpE,KAAK,CAAC,YAAY,CAAC,GAAW,EAAE,KAAa;QACnD,MAAM,OAAO,GAAG,MAAM,IAAA,kBAAO,EAAC,GAAG,CAAC,CAAC;QACnC,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,gCAAgC;YAChC,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,SAAS;YAEpC,MAAM,QAAQ,GAAG,IAAA,WAAI,EAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YAClC,MAAM,KAAK,GAAG,MAAM,IAAA,eAAI,EAAC,QAAQ,CAAC,CAAC;YAEnC,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;oBACxB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;oBAC9D,KAAK,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,CAAC;gBAC1B,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,GAAG,IAAA,cAAO,EAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;gBAEzC,IAAI,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACzF,SAAS;gBACX,CAAC;gBACD,IAAI,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBAC9C,SAAS;gBACX,CAAC;gBAED,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,aAAa,CAAC,QAAgB;QACpC,MAAM,GAAG,GAAG,IAAA,cAAO,EAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;QAE5C,+CAA+C;QAC/C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACvC,IAAI,OAAO;YAAE,OAAO,OAAO,CAAC,QAAQ,CAAC,CAAC;QAEtC,yDAAyD;QACzD,OAAO,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC9C,CAAC;IAEO,gBAAgB,CAAC,QAAgB,EAAE,GAAW;QACpD,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,MAAM,CAAC;YACZ,KAAK,OAAO,CAAC;YACb,KAAK,MAAM;gBACT,OAAO,IAAI,iCAAc,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;YAChD,KAAK,KAAK,CAAC;YACX,KAAK,WAAW,CAAC;YACjB,KAAK,MAAM;gBACT,OAAO,IAAI,yCAAkB,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;YACpD,KAAK,OAAO;gBACV,OAAO,IAAI,iCAAc,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;YAChD,KAAK,MAAM;gBACT,OAAO,IAAI,+BAAa,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;YAC/C,KAAK,OAAO,CAAC;YACb,KAAK,MAAM;gBACT,OAAO,IAAI,iCAAc,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;YAChD;gBACE,OAAO,SAAS,CAAC;QACrB,CAAC;IACH,CAAC;CACF,CAAA;AAzHY,0CAAe;0BAAf,eAAe;IAJ3B,IAAA,oBAAM,EAAC;QACN,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE,2EAA2E;KACzF,CAAC;;GACW,eAAe,CAyH3B"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DocxLoader
|
|
3
|
+
*
|
|
4
|
+
* Loads Microsoft Word (.docx) files using the optional `mammoth` peer
|
|
5
|
+
* dependency. Converts rich-text document content to plain text or HTML.
|
|
6
|
+
*
|
|
7
|
+
* Install the peer dependency:
|
|
8
|
+
* ```bash
|
|
9
|
+
* npm install mammoth
|
|
10
|
+
* ```
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* const loader = new DocxLoader({ path: './proposal.docx' });
|
|
15
|
+
* const docs = await loader.load();
|
|
16
|
+
* // docs[0].content === extracted plain text
|
|
17
|
+
* // docs[0].metadata.source === 'proposal.docx'
|
|
18
|
+
* ```
|
|
19
|
+
*
|
|
20
|
+
* Extract HTML instead of plain text (useful for preserving table structure):
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const loader = new DocxLoader({ path: './report.docx', outputFormat: 'html' });
|
|
23
|
+
* ```
|
|
24
|
+
*
|
|
25
|
+
* Legacy .doc files are NOT supported — mammoth only handles the modern OOXML
|
|
26
|
+
* format (.docx). Use LibreOffice to convert .doc → .docx first.
|
|
27
|
+
*/
|
|
28
|
+
import { BaseDocumentLoader } from './base.loader';
|
|
29
|
+
import type { Document } from '../types';
|
|
30
|
+
export interface DocxLoaderOptions {
|
|
31
|
+
/** Path to the .docx file. */
|
|
32
|
+
path: string;
|
|
33
|
+
/**
|
|
34
|
+
* Output format.
|
|
35
|
+
* - `'text'` (default) — plain text, stripping all formatting
|
|
36
|
+
* - `'html'` — HTML string preserving basic structure
|
|
37
|
+
*/
|
|
38
|
+
outputFormat?: 'text' | 'html';
|
|
39
|
+
/**
|
|
40
|
+
* Custom style map passed to mammoth for HTML output.
|
|
41
|
+
* @see https://github.com/mwilliamson/mammoth.js#writing-style-maps
|
|
42
|
+
*/
|
|
43
|
+
styleMap?: string[];
|
|
44
|
+
/** Extra metadata merged into every document. */
|
|
45
|
+
metadata?: Record<string, unknown>;
|
|
46
|
+
}
|
|
47
|
+
export declare class DocxLoader extends BaseDocumentLoader {
|
|
48
|
+
private readonly opts;
|
|
49
|
+
constructor(options: DocxLoaderOptions);
|
|
50
|
+
load(): Promise<Document[]>;
|
|
51
|
+
}
|
|
52
|
+
//# sourceMappingURL=docx.loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docx.loader.d.ts","sourceRoot":"","sources":["../../src/loaders/docx.loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAIH,OAAO,EAAE,kBAAkB,EAAU,MAAM,eAAe,CAAC;AAC3D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,WAAW,iBAAiB;IAChC,8BAA8B;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IAC/B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,qBAMa,UAAW,SAAQ,kBAAkB;IAChD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAA8B;gBAEvC,OAAO,EAAE,iBAAiB;IAUhC,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;CAyDlC"}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* DocxLoader
|
|
4
|
+
*
|
|
5
|
+
* Loads Microsoft Word (.docx) files using the optional `mammoth` peer
|
|
6
|
+
* dependency. Converts rich-text document content to plain text or HTML.
|
|
7
|
+
*
|
|
8
|
+
* Install the peer dependency:
|
|
9
|
+
* ```bash
|
|
10
|
+
* npm install mammoth
|
|
11
|
+
* ```
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* const loader = new DocxLoader({ path: './proposal.docx' });
|
|
16
|
+
* const docs = await loader.load();
|
|
17
|
+
* // docs[0].content === extracted plain text
|
|
18
|
+
* // docs[0].metadata.source === 'proposal.docx'
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* Extract HTML instead of plain text (useful for preserving table structure):
|
|
22
|
+
* ```typescript
|
|
23
|
+
* const loader = new DocxLoader({ path: './report.docx', outputFormat: 'html' });
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* Legacy .doc files are NOT supported — mammoth only handles the modern OOXML
|
|
27
|
+
* format (.docx). Use LibreOffice to convert .doc → .docx first.
|
|
28
|
+
*/
|
|
29
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
30
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
31
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
32
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
33
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
34
|
+
};
|
|
35
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
36
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.DocxLoader = void 0;
|
|
40
|
+
const promises_1 = require("fs/promises");
|
|
41
|
+
const path_1 = require("path");
|
|
42
|
+
const base_loader_1 = require("./base.loader");
|
|
43
|
+
let DocxLoader = class DocxLoader extends base_loader_1.BaseDocumentLoader {
|
|
44
|
+
constructor(options) {
|
|
45
|
+
super();
|
|
46
|
+
this.opts = {
|
|
47
|
+
path: options.path,
|
|
48
|
+
outputFormat: options.outputFormat ?? 'text',
|
|
49
|
+
styleMap: options.styleMap ?? [],
|
|
50
|
+
metadata: options.metadata ?? {},
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
async load() {
|
|
54
|
+
// Dynamic import — mammoth is only required when DocxLoader is actually used
|
|
55
|
+
let mammoth;
|
|
56
|
+
try {
|
|
57
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
58
|
+
const mod = require('mammoth');
|
|
59
|
+
mammoth = mod.default ?? mod;
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
throw new Error('[DocxLoader] mammoth is not installed. Run: npm install mammoth');
|
|
63
|
+
}
|
|
64
|
+
const buffer = await (0, promises_1.readFile)(this.opts.path);
|
|
65
|
+
const fileName = (0, path_1.basename)(this.opts.path);
|
|
66
|
+
let content;
|
|
67
|
+
let messages;
|
|
68
|
+
if (this.opts.outputFormat === 'html') {
|
|
69
|
+
const result = await mammoth.convertToHtml({
|
|
70
|
+
buffer,
|
|
71
|
+
styleMap: this.opts.styleMap.length > 0 ? this.opts.styleMap : undefined,
|
|
72
|
+
});
|
|
73
|
+
content = result.value;
|
|
74
|
+
messages = result.messages;
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
const result = await mammoth.extractRawText({ buffer });
|
|
78
|
+
content = result.value;
|
|
79
|
+
messages = result.messages;
|
|
80
|
+
}
|
|
81
|
+
// Log any conversion warnings (e.g. unsupported features in the DOCX)
|
|
82
|
+
const warnings = messages
|
|
83
|
+
.filter((m) => m.type === 'warning')
|
|
84
|
+
.map((m) => m.message);
|
|
85
|
+
if (warnings.length > 0) {
|
|
86
|
+
// eslint-disable-next-line no-console
|
|
87
|
+
console.warn(`[DocxLoader] Conversion warnings for ${fileName}:`, warnings);
|
|
88
|
+
}
|
|
89
|
+
return [
|
|
90
|
+
this.createDocument(content, {
|
|
91
|
+
source: fileName,
|
|
92
|
+
filePath: this.opts.path,
|
|
93
|
+
loaderType: 'docx',
|
|
94
|
+
outputFormat: this.opts.outputFormat,
|
|
95
|
+
...this.opts.metadata,
|
|
96
|
+
}),
|
|
97
|
+
];
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
exports.DocxLoader = DocxLoader;
|
|
101
|
+
exports.DocxLoader = DocxLoader = __decorate([
|
|
102
|
+
(0, base_loader_1.Loader)({
|
|
103
|
+
name: 'DocxLoader',
|
|
104
|
+
description: 'Loads Microsoft Word .docx files using mammoth (optional peer dependency).',
|
|
105
|
+
extensions: ['.docx'],
|
|
106
|
+
mimeTypes: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
|
|
107
|
+
}),
|
|
108
|
+
__metadata("design:paramtypes", [Object])
|
|
109
|
+
], DocxLoader);
|
|
110
|
+
//# sourceMappingURL=docx.loader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docx.loader.js","sourceRoot":"","sources":["../../src/loaders/docx.loader.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;;;;;;;;;;;;AAEH,0CAAuC;AACvC,+BAAgC;AAChC,+CAA2D;AA2BpD,IAAM,UAAU,GAAhB,MAAM,UAAW,SAAQ,gCAAkB;IAGhD,YAAY,OAA0B;QACpC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,IAAI,GAAG;YACV,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,MAAM;YAC5C,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,EAAE;YAChC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,EAAE;SACjC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,IAAI;QACR,6EAA6E;QAC7E,IAAI,OAMH,CAAC;QAEF,IAAI,CAAC;YACH,iEAAiE;YACjE,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,CAAkD,CAAC;YAChF,OAAO,GAAG,GAAG,CAAC,OAAO,IAAI,GAAG,CAAC;QAC/B,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,iEAAiE,CAAC,CAAC;QACrF,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAQ,EAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,IAAA,eAAQ,EAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE1C,IAAI,OAAe,CAAC;QACpB,IAAI,QAAmB,CAAC;QAExB,IAAI,IAAI,CAAC,IAAI,CAAC,YAAY,KAAK,MAAM,EAAE,CAAC;YACtC,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC;gBACzC,MAAM;gBACN,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;aACzE,CAAC,CAAC;YACH,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACvB,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QAC7B,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;YACxD,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACvB,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QAC7B,CAAC;QAED,sEAAsE;QACtE,MAAM,QAAQ,GAAI,QAAqD;aACpE,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC;aACnC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAEzB,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,sCAAsC;YACtC,OAAO,CAAC,IAAI,CAAC,wCAAwC,QAAQ,GAAG,EAAE,QAAQ,CAAC,CAAC;QAC9E,CAAC;QAED,OAAO;YACL,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE;gBAC3B,MAAM,EAAE,QAAQ;gBAChB,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI;gBACxB,UAAU,EAAE,MAAM;gBAClB,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY;gBACpC,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ;aACtB,CAAC;SACH,CAAC;IACJ,CAAC;CACF,CAAA;AAtEY,gCAAU;qBAAV,UAAU;IANtB,IAAA,oBAAM,EAAC;QACN,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,4EAA4E;QACzF,UAAU,EAAE,CAAC,OAAO,CAAC;QACrB,SAAS,EAAE,CAAC,yEAAyE,CAAC;KACvF,CAAC;;GACW,UAAU,CAsEtB"}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GitHubLoader
|
|
3
|
+
*
|
|
4
|
+
* Loads files from a GitHub repository using the GitHub REST API.
|
|
5
|
+
* No extra npm dependency required — uses Node.js built-in `fetch`.
|
|
6
|
+
*
|
|
7
|
+
* Supports:
|
|
8
|
+
* - Public repositories (no token needed, but rate-limited to 60 req/h)
|
|
9
|
+
* - Private repositories (requires `token`)
|
|
10
|
+
* - Specific branches, tags, or commits via `ref`
|
|
11
|
+
* - Recursive directory traversal with depth limit
|
|
12
|
+
* - Glob-style extension filtering (`includeExtensions`)
|
|
13
|
+
* - Path prefix filtering (`includePaths`)
|
|
14
|
+
*
|
|
15
|
+
* Rate limits:
|
|
16
|
+
* - Unauthenticated: 60 requests per hour per IP
|
|
17
|
+
* - Authenticated: 5,000 requests per hour per token
|
|
18
|
+
* For large repos, always provide a `token`.
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* // Load all Markdown files from a public repo
|
|
23
|
+
* const loader = new GitHubLoader({
|
|
24
|
+
* owner: 'hazeljs',
|
|
25
|
+
* repo: 'hazeljs',
|
|
26
|
+
* ref: 'main',
|
|
27
|
+
* includeExtensions: ['.md'],
|
|
28
|
+
* token: process.env.GITHUB_TOKEN,
|
|
29
|
+
* });
|
|
30
|
+
* const docs = await loader.load();
|
|
31
|
+
* // docs[0].metadata.path === 'README.md'
|
|
32
|
+
* // docs[0].metadata.url === 'https://github.com/hazeljs/hazeljs/blob/main/README.md'
|
|
33
|
+
* ```
|
|
34
|
+
*
|
|
35
|
+
* Load a specific directory only:
|
|
36
|
+
* ```typescript
|
|
37
|
+
* const loader = new GitHubLoader({
|
|
38
|
+
* owner: 'facebook',
|
|
39
|
+
* repo: 'react',
|
|
40
|
+
* ref: 'main',
|
|
41
|
+
* directory: 'packages/react/src',
|
|
42
|
+
* includeExtensions: ['.js', '.ts'],
|
|
43
|
+
* });
|
|
44
|
+
* ```
|
|
45
|
+
*/
|
|
46
|
+
import { BaseDocumentLoader } from './base.loader';
|
|
47
|
+
import type { Document } from '../types';
|
|
48
|
+
export interface GitHubLoaderOptions {
|
|
49
|
+
/** GitHub organisation or user name. */
|
|
50
|
+
owner: string;
|
|
51
|
+
/** Repository name. */
|
|
52
|
+
repo: string;
|
|
53
|
+
/**
|
|
54
|
+
* Branch, tag, or commit SHA.
|
|
55
|
+
* @default 'main'
|
|
56
|
+
*/
|
|
57
|
+
ref?: string;
|
|
58
|
+
/**
|
|
59
|
+
* Sub-directory inside the repository to load.
|
|
60
|
+
* Defaults to the repository root (`''`).
|
|
61
|
+
*/
|
|
62
|
+
directory?: string;
|
|
63
|
+
/**
|
|
64
|
+
* Only load files with these extensions.
|
|
65
|
+
* @example ['.ts', '.md']
|
|
66
|
+
*/
|
|
67
|
+
includeExtensions?: string[];
|
|
68
|
+
/**
|
|
69
|
+
* Only load files under these path prefixes (relative to `directory`).
|
|
70
|
+
* @example ['src/', 'docs/']
|
|
71
|
+
*/
|
|
72
|
+
includePaths?: string[];
|
|
73
|
+
/**
|
|
74
|
+
* Skip files under these path prefixes.
|
|
75
|
+
* @example ['node_modules/', 'dist/', '__tests__/']
|
|
76
|
+
*/
|
|
77
|
+
excludePaths?: string[];
|
|
78
|
+
/**
|
|
79
|
+
* Maximum number of files to load (safety limit).
|
|
80
|
+
* @default 500
|
|
81
|
+
*/
|
|
82
|
+
maxFiles?: number;
|
|
83
|
+
/**
|
|
84
|
+
* Maximum file size in bytes to load.
|
|
85
|
+
* Files larger than this are skipped with a warning.
|
|
86
|
+
* @default 102400 (100 KB)
|
|
87
|
+
*/
|
|
88
|
+
maxFileSize?: number;
|
|
89
|
+
/**
|
|
90
|
+
* GitHub personal access token (PAT) or fine-grained token.
|
|
91
|
+
* Required for private repos; recommended for public repos to avoid rate limits.
|
|
92
|
+
*/
|
|
93
|
+
token?: string;
|
|
94
|
+
/**
|
|
95
|
+
* Request timeout in milliseconds.
|
|
96
|
+
* @default 15000
|
|
97
|
+
*/
|
|
98
|
+
timeout?: number;
|
|
99
|
+
/** Extra metadata merged into every document. */
|
|
100
|
+
metadata?: Record<string, unknown>;
|
|
101
|
+
}
|
|
102
|
+
export declare class GitHubLoader extends BaseDocumentLoader {
|
|
103
|
+
private readonly opts;
|
|
104
|
+
private readonly apiBase;
|
|
105
|
+
constructor(options: GitHubLoaderOptions);
|
|
106
|
+
load(): Promise<Document[]>;
|
|
107
|
+
private getTree;
|
|
108
|
+
private filterItems;
|
|
109
|
+
private getFileContent;
|
|
110
|
+
private apiGet;
|
|
111
|
+
private httpGet;
|
|
112
|
+
private isBinary;
|
|
113
|
+
}
|
|
114
|
+
//# sourceMappingURL=github.loader.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github.loader.d.ts","sourceRoot":"","sources":["../../src/loaders/github.loader.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4CG;AAEH,OAAO,EAAE,kBAAkB,EAAU,MAAM,eAAe,CAAC;AAC3D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,WAAW,mBAAmB;IAClC,wCAAwC;IACxC,KAAK,EAAE,MAAM,CAAC;IACd,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAUD,qBAIa,YAAa,SAAQ,kBAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAgC;IACrD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA4B;gBAExC,OAAO,EAAE,mBAAmB;IAkBlC,IAAI,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YA6CnB,OAAO;IA8BrB,OAAO,CAAC,WAAW;YAoCL,cAAc;YAgBd,MAAM;YAKN,OAAO;IA6BrB,OAAO,CAAC,QAAQ;CAYjB"}
|