ragwire 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. ragwire-1.0.0/LICENSE +21 -0
  2. ragwire-1.0.0/PKG-INFO +233 -0
  3. ragwire-1.0.0/README.md +169 -0
  4. ragwire-1.0.0/assets/ragwire.png +0 -0
  5. ragwire-1.0.0/pyproject.toml +150 -0
  6. ragwire-1.0.0/ragwire/__init__.py +73 -0
  7. ragwire-1.0.0/ragwire/core/__init__.py +6 -0
  8. ragwire-1.0.0/ragwire/core/config.py +113 -0
  9. ragwire-1.0.0/ragwire/core/pipeline.py +403 -0
  10. ragwire-1.0.0/ragwire/embeddings/__init__.py +5 -0
  11. ragwire-1.0.0/ragwire/embeddings/factory.py +159 -0
  12. ragwire-1.0.0/ragwire/loaders/__init__.py +5 -0
  13. ragwire-1.0.0/ragwire/loaders/markitdown_loader.py +169 -0
  14. ragwire-1.0.0/ragwire/metadata/__init__.py +6 -0
  15. ragwire-1.0.0/ragwire/metadata/extractor.py +171 -0
  16. ragwire-1.0.0/ragwire/metadata/schema.py +80 -0
  17. ragwire-1.0.0/ragwire/processing/__init__.py +14 -0
  18. ragwire-1.0.0/ragwire/processing/hashing.py +116 -0
  19. ragwire-1.0.0/ragwire/processing/splitter.py +137 -0
  20. ragwire-1.0.0/ragwire/py.typed +0 -0
  21. ragwire-1.0.0/ragwire/retriever/__init__.py +5 -0
  22. ragwire-1.0.0/ragwire/retriever/hybrid.py +172 -0
  23. ragwire-1.0.0/ragwire/utils/__init__.py +5 -0
  24. ragwire-1.0.0/ragwire/utils/logging.py +146 -0
  25. ragwire-1.0.0/ragwire/vectorstores/__init__.py +5 -0
  26. ragwire-1.0.0/ragwire/vectorstores/qdrant_store.py +261 -0
  27. ragwire-1.0.0/ragwire.egg-info/PKG-INFO +233 -0
  28. ragwire-1.0.0/ragwire.egg-info/SOURCES.txt +31 -0
  29. ragwire-1.0.0/ragwire.egg-info/dependency_links.txt +1 -0
  30. ragwire-1.0.0/ragwire.egg-info/requires.txt +43 -0
  31. ragwire-1.0.0/ragwire.egg-info/top_level.txt +1 -0
  32. ragwire-1.0.0/setup.cfg +4 -0
  33. ragwire-1.0.0/tests/test_imports.py +71 -0
ragwire-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 KGP Talkie Private Limited
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
ragwire-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,233 @@
1
+ Metadata-Version: 2.4
2
+ Name: ragwire
3
+ Version: 1.0.0
4
+ Summary: RAGWire — Production-grade RAG toolkit for document ingestion and retrieval with hybrid search support
5
+ Author-email: KGP Talkie Private Limited <udemy@kgptalkie.com>
6
+ Maintainer-email: KGP Talkie Private Limited <udemy@kgptalkie.com>
7
+ License-Expression: MIT
8
+ Project-URL: Homepage, https://kgptalkie.com
9
+ Project-URL: Documentation, https://github.com/laxmimerit/ragwire#readme
10
+ Project-URL: Repository, https://github.com/laxmimerit/ragwire.git
11
+ Project-URL: Issues, https://github.com/laxmimerit/ragwire/issues
12
+ Project-URL: YouTube, https://youtube.com/kgptalkie
13
+ Keywords: rag,retrieval,vector-database,qdrant,embeddings,hybrid-search,nlp,document-processing
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Classifier: Topic :: Text Processing :: Linguistic
24
+ Requires-Python: >=3.9
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: langchain>=0.1.0
28
+ Requires-Dist: langchain-core>=0.1.0
29
+ Requires-Dist: langchain-community>=0.0.0
30
+ Requires-Dist: langchain-text-splitters>=0.0.1
31
+ Requires-Dist: qdrant-client>=1.6.0
32
+ Requires-Dist: langchain-qdrant>=0.1.0
33
+ Requires-Dist: markitdown[pdf]>=0.0.1
34
+ Requires-Dist: pyyaml>=6.0
35
+ Requires-Dist: python-dotenv>=1.0.0
36
+ Requires-Dist: pydantic>=2.0.0
37
+ Requires-Dist: tqdm>=4.66.0
38
+ Provides-Extra: openai
39
+ Requires-Dist: langchain-openai>=0.0.0; extra == "openai"
40
+ Provides-Extra: huggingface
41
+ Requires-Dist: langchain-huggingface>=0.0.0; extra == "huggingface"
42
+ Provides-Extra: ollama
43
+ Requires-Dist: langchain-ollama>=0.0.0; extra == "ollama"
44
+ Provides-Extra: google
45
+ Requires-Dist: langchain-google-genai>=0.0.0; extra == "google"
46
+ Provides-Extra: fastembed
47
+ Requires-Dist: fastembed>=0.2.0; extra == "fastembed"
48
+ Provides-Extra: dev
49
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
50
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
51
+ Requires-Dist: black>=23.0.0; extra == "dev"
52
+ Requires-Dist: isort>=5.0.0; extra == "dev"
53
+ Requires-Dist: flake8>=6.0.0; extra == "dev"
54
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
55
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
56
+ Provides-Extra: all
57
+ Requires-Dist: ragwire[openai]; extra == "all"
58
+ Requires-Dist: ragwire[huggingface]; extra == "all"
59
+ Requires-Dist: ragwire[ollama]; extra == "all"
60
+ Requires-Dist: ragwire[google]; extra == "all"
61
+ Requires-Dist: ragwire[fastembed]; extra == "all"
62
+ Requires-Dist: ragwire[dev]; extra == "all"
63
+ Dynamic: license-file
64
+
65
+ <p align="center">
66
+ <img src="assets/ragwire.png" alt="RAGWire logo" width="120"/>
67
+ </p>
68
+
69
+ <h1 align="center">RAGWire</h1>
70
+ <p align="center">Production-grade RAG toolkit for document ingestion and retrieval</p>
71
+
72
+ <p align="center">
73
+ <a href="https://pypi.org/project/ragwire"><img src="https://img.shields.io/pypi/v/ragwire" alt="PyPI"/></a>
74
+ <a href="https://github.com/laxmimerit/ragwire/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="License"/></a>
75
+ <a href="https://youtube.com/kgptalkie"><img src="https://img.shields.io/badge/YouTube-KGP%20Talkie-red" alt="YouTube"/></a>
76
+ </p>
77
+
78
+ ---
79
+
80
+ ## Features
81
+
82
+ - **Document Loading** — PDF, DOCX, XLSX, PPTX and more via MarkItDown
83
+ - **LLM Metadata Extraction** — extracts company, doc type, fiscal period using your LLM
84
+ - **Smart Text Splitting** — markdown-aware and recursive chunking strategies
85
+ - **Multiple Embedding Providers** — Ollama, OpenAI, HuggingFace, Google, FastEmbed
86
+ - **Qdrant Vector Store** — dense, sparse, and hybrid search
87
+ - **Advanced Retrieval** — similarity, MMR, and hybrid search
88
+ - **SHA256 Deduplication** — at both file and chunk level
89
+
90
+ ## Installation
91
+
92
+ ```bash
93
+ pip install ragwire
94
+
95
+ # With Ollama support (local, no API key)
96
+ pip install "ragwire[ollama]"
97
+
98
+ # With all providers
99
+ pip install "ragwire[all]"
100
+ ```
101
+
102
+ ## Quick Start
103
+
104
+ ```python
105
+ from ragwire import RAGPipeline
106
+
107
+ pipeline = RAGPipeline("config.yaml")
108
+
109
+ # Ingest documents
110
+ stats = pipeline.ingest_documents(["data/Apple_10k_2025.pdf"])
111
+ print(f"Chunks created: {stats['chunks_created']}")
112
+
113
+ # Retrieve
114
+ results = pipeline.retrieve("What is Apple's total revenue?", top_k=5)
115
+ for doc in results:
116
+ print(doc.metadata.get("company_name"), doc.page_content[:200])
117
+ ```
118
+
119
+ ## Configuration
120
+
121
+ Copy `config.example.yaml` to `config.yaml` and edit:
122
+
123
+ ```yaml
124
+ embeddings:
125
+ provider: "ollama"
126
+ model: "qwen3-embedding:0.6b"
127
+ base_url: "http://localhost:11434"
128
+
129
+ llm:
130
+ provider: "ollama"
131
+ model: "qwen3.5:9b"
132
+ temperature: 0.0
133
+ num_ctx: 16384
134
+
135
+ vectorstore:
136
+ url: "http://localhost:6333"
137
+ collection_name: "my_docs"
138
+ use_sparse: true
139
+
140
+ retriever:
141
+ search_type: "hybrid"
142
+ top_k: 5
143
+ ```
144
+
145
+ ## Embedding Providers
146
+
147
+ ```yaml
148
+ # Ollama (local)
149
+ embeddings:
150
+ provider: "ollama"
151
+ model: "qwen3-embedding:0.6b"
152
+
153
+ # OpenAI
154
+ embeddings:
155
+ provider: "openai"
156
+ model: "text-embedding-3-small"
157
+
158
+ # HuggingFace (local)
159
+ embeddings:
160
+ provider: "huggingface"
161
+ model_name: "sentence-transformers/all-MiniLM-L6-v2"
162
+
163
+ # Google
164
+ embeddings:
165
+ provider: "google"
166
+ model: "models/embedding-001"
167
+ ```
168
+
169
+ ## Component Usage
170
+
171
+ ```python
172
+ from ragwire import (
173
+ MarkItDownLoader,
174
+ get_splitter,
175
+ get_markdown_splitter,
176
+ get_embedding,
177
+ QdrantStore,
178
+ MetadataExtractor,
179
+ hybrid_search,
180
+ mmr_search,
181
+ )
182
+
183
+ # Load a document
184
+ loader = MarkItDownLoader()
185
+ result = loader.load("document.pdf")
186
+
187
+ # Split text
188
+ splitter = get_markdown_splitter(chunk_size=10000, chunk_overlap=2000)
189
+ chunks = splitter.split_text(result["text_content"])
190
+
191
+ # Embeddings
192
+ embedding = get_embedding({"provider": "ollama", "model": "qwen3-embedding:0.6b"})
193
+
194
+ # Vector store
195
+ store = QdrantStore(config={"url": "http://localhost:6333"}, embedding=embedding)
196
+ store.set_collection("my_collection")
197
+ vectorstore = store.get_store()
198
+ ```
199
+
200
+ ## Architecture
201
+
202
+ ```
203
+ ragwire/
204
+ ├── core/ # Config loader + RAGPipeline orchestrator
205
+ ├── loaders/ # MarkItDown document converter
206
+ ├── processing/ # Text splitters + SHA256 hashing
207
+ ├── metadata/ # Pydantic schema + LLM extractor
208
+ ├── embeddings/ # Multi-provider embedding factory
209
+ ├── vectorstores/ # Qdrant wrapper with hybrid search
210
+ ├── retriever/ # Similarity, MMR, hybrid retrieval
211
+ └── utils/ # Logging
212
+ ```
213
+
214
+ ## Troubleshooting
215
+
216
+ | Error | Fix |
217
+ |-------|-----|
218
+ | Qdrant connection refused | `docker run -p 6333:6333 qdrant/qdrant` |
219
+ | `markitdown[pdf]` missing | `pip install "markitdown[pdf]"` |
220
+ | Ollama model not found | `ollama pull <model-name>` |
221
+ | `fastembed` missing | `pip install fastembed` (needed for hybrid search) |
222
+ | Embedding dimension mismatch | Set `force_recreate: true` in config once, then back to `false` |
223
+
224
+ ## License
225
+
226
+ MIT © 2026 [KGP Talkie Private Limited](https://kgptalkie.com)
227
+
228
+ ## Links
229
+
230
+ - Website: [kgptalkie.com](https://kgptalkie.com)
231
+ - YouTube: [youtube.com/kgptalkie](https://youtube.com/kgptalkie)
232
+ - GitHub: [github.com/laxmimerit/ragwire](https://github.com/laxmimerit/ragwire)
233
+ - Email: udemy@kgptalkie.com
@@ -0,0 +1,169 @@
1
+ <p align="center">
2
+ <img src="assets/ragwire.png" alt="RAGWire logo" width="120"/>
3
+ </p>
4
+
5
+ <h1 align="center">RAGWire</h1>
6
+ <p align="center">Production-grade RAG toolkit for document ingestion and retrieval</p>
7
+
8
+ <p align="center">
9
+ <a href="https://pypi.org/project/ragwire"><img src="https://img.shields.io/pypi/v/ragwire" alt="PyPI"/></a>
10
+ <a href="https://github.com/laxmimerit/ragwire/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="License"/></a>
11
+ <a href="https://youtube.com/kgptalkie"><img src="https://img.shields.io/badge/YouTube-KGP%20Talkie-red" alt="YouTube"/></a>
12
+ </p>
13
+
14
+ ---
15
+
16
+ ## Features
17
+
18
+ - **Document Loading** — PDF, DOCX, XLSX, PPTX and more via MarkItDown
19
+ - **LLM Metadata Extraction** — extracts company, doc type, fiscal period using your LLM
20
+ - **Smart Text Splitting** — markdown-aware and recursive chunking strategies
21
+ - **Multiple Embedding Providers** — Ollama, OpenAI, HuggingFace, Google, FastEmbed
22
+ - **Qdrant Vector Store** — dense, sparse, and hybrid search
23
+ - **Advanced Retrieval** — similarity, MMR, and hybrid search
24
+ - **SHA256 Deduplication** — at both file and chunk level
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install ragwire
30
+
31
+ # With Ollama support (local, no API key)
32
+ pip install "ragwire[ollama]"
33
+
34
+ # With all providers
35
+ pip install "ragwire[all]"
36
+ ```
37
+
38
+ ## Quick Start
39
+
40
+ ```python
41
+ from ragwire import RAGPipeline
42
+
43
+ pipeline = RAGPipeline("config.yaml")
44
+
45
+ # Ingest documents
46
+ stats = pipeline.ingest_documents(["data/Apple_10k_2025.pdf"])
47
+ print(f"Chunks created: {stats['chunks_created']}")
48
+
49
+ # Retrieve
50
+ results = pipeline.retrieve("What is Apple's total revenue?", top_k=5)
51
+ for doc in results:
52
+ print(doc.metadata.get("company_name"), doc.page_content[:200])
53
+ ```
54
+
55
+ ## Configuration
56
+
57
+ Copy `config.example.yaml` to `config.yaml` and edit:
58
+
59
+ ```yaml
60
+ embeddings:
61
+ provider: "ollama"
62
+ model: "qwen3-embedding:0.6b"
63
+ base_url: "http://localhost:11434"
64
+
65
+ llm:
66
+ provider: "ollama"
67
+ model: "qwen3.5:9b"
68
+ temperature: 0.0
69
+ num_ctx: 16384
70
+
71
+ vectorstore:
72
+ url: "http://localhost:6333"
73
+ collection_name: "my_docs"
74
+ use_sparse: true
75
+
76
+ retriever:
77
+ search_type: "hybrid"
78
+ top_k: 5
79
+ ```
80
+
81
+ ## Embedding Providers
82
+
83
+ ```yaml
84
+ # Ollama (local)
85
+ embeddings:
86
+ provider: "ollama"
87
+ model: "qwen3-embedding:0.6b"
88
+
89
+ # OpenAI
90
+ embeddings:
91
+ provider: "openai"
92
+ model: "text-embedding-3-small"
93
+
94
+ # HuggingFace (local)
95
+ embeddings:
96
+ provider: "huggingface"
97
+ model_name: "sentence-transformers/all-MiniLM-L6-v2"
98
+
99
+ # Google
100
+ embeddings:
101
+ provider: "google"
102
+ model: "models/embedding-001"
103
+ ```
104
+
105
+ ## Component Usage
106
+
107
+ ```python
108
+ from ragwire import (
109
+ MarkItDownLoader,
110
+ get_splitter,
111
+ get_markdown_splitter,
112
+ get_embedding,
113
+ QdrantStore,
114
+ MetadataExtractor,
115
+ hybrid_search,
116
+ mmr_search,
117
+ )
118
+
119
+ # Load a document
120
+ loader = MarkItDownLoader()
121
+ result = loader.load("document.pdf")
122
+
123
+ # Split text
124
+ splitter = get_markdown_splitter(chunk_size=10000, chunk_overlap=2000)
125
+ chunks = splitter.split_text(result["text_content"])
126
+
127
+ # Embeddings
128
+ embedding = get_embedding({"provider": "ollama", "model": "qwen3-embedding:0.6b"})
129
+
130
+ # Vector store
131
+ store = QdrantStore(config={"url": "http://localhost:6333"}, embedding=embedding)
132
+ store.set_collection("my_collection")
133
+ vectorstore = store.get_store()
134
+ ```
135
+
136
+ ## Architecture
137
+
138
+ ```
139
+ ragwire/
140
+ ├── core/ # Config loader + RAGPipeline orchestrator
141
+ ├── loaders/ # MarkItDown document converter
142
+ ├── processing/ # Text splitters + SHA256 hashing
143
+ ├── metadata/ # Pydantic schema + LLM extractor
144
+ ├── embeddings/ # Multi-provider embedding factory
145
+ ├── vectorstores/ # Qdrant wrapper with hybrid search
146
+ ├── retriever/ # Similarity, MMR, hybrid retrieval
147
+ └── utils/ # Logging
148
+ ```
149
+
150
+ ## Troubleshooting
151
+
152
+ | Error | Fix |
153
+ |-------|-----|
154
+ | Qdrant connection refused | `docker run -p 6333:6333 qdrant/qdrant` |
155
+ | `markitdown[pdf]` missing | `pip install "markitdown[pdf]"` |
156
+ | Ollama model not found | `ollama pull <model-name>` |
157
+ | `fastembed` missing | `pip install fastembed` (needed for hybrid search) |
158
+ | Embedding dimension mismatch | Set `force_recreate: true` in config once, then back to `false` |
159
+
160
+ ## License
161
+
162
+ MIT © 2026 [KGP Talkie Private Limited](https://kgptalkie.com)
163
+
164
+ ## Links
165
+
166
+ - Website: [kgptalkie.com](https://kgptalkie.com)
167
+ - YouTube: [youtube.com/kgptalkie](https://youtube.com/kgptalkie)
168
+ - GitHub: [github.com/laxmimerit/ragwire](https://github.com/laxmimerit/ragwire)
169
+ - Email: udemy@kgptalkie.com
Binary file
@@ -0,0 +1,150 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ragwire"
7
+ version = "1.0.0"
8
+ description = "RAGWire — Production-grade RAG toolkit for document ingestion and retrieval with hybrid search support"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ authors = [
12
+ {name = "KGP Talkie Private Limited", email = "udemy@kgptalkie.com"}
13
+ ]
14
+ maintainers = [
15
+ {name = "KGP Talkie Private Limited", email = "udemy@kgptalkie.com"}
16
+ ]
17
+ keywords = [
18
+ "rag",
19
+ "retrieval",
20
+ "vector-database",
21
+ "qdrant",
22
+ "embeddings",
23
+ "hybrid-search",
24
+ "nlp",
25
+ "document-processing"
26
+ ]
27
+ classifiers = [
28
+ "Development Status :: 4 - Beta",
29
+ "Intended Audience :: Developers",
30
+ "Intended Audience :: Science/Research",
31
+ "Programming Language :: Python :: 3",
32
+ "Programming Language :: Python :: 3.9",
33
+ "Programming Language :: Python :: 3.10",
34
+ "Programming Language :: Python :: 3.11",
35
+ "Programming Language :: Python :: 3.12",
36
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
37
+ "Topic :: Text Processing :: Linguistic",
38
+ ]
39
+ requires-python = ">=3.9"
40
+ dependencies = [
41
+ # Core dependencies
42
+ "langchain>=0.1.0",
43
+ "langchain-core>=0.1.0",
44
+ "langchain-community>=0.0.0",
45
+ "langchain-text-splitters>=0.0.1",
46
+
47
+ # Vector database
48
+ "qdrant-client>=1.6.0",
49
+ "langchain-qdrant>=0.1.0",
50
+
51
+ # Document processing
52
+ "markitdown[pdf]>=0.0.1",
53
+
54
+ # Configuration
55
+ "pyyaml>=6.0",
56
+ "python-dotenv>=1.0.0",
57
+
58
+ # Data validation
59
+ "pydantic>=2.0.0",
60
+
61
+ # Progress bars
62
+ "tqdm>=4.66.0",
63
+ ]
64
+
65
+ [project.optional-dependencies]
66
+ # OpenAI embeddings
67
+ openai = [
68
+ "langchain-openai>=0.0.0",
69
+ ]
70
+
71
+ # HuggingFace embeddings
72
+ huggingface = [
73
+ "langchain-huggingface>=0.0.0",
74
+ ]
75
+
76
+ # Ollama embeddings
77
+ ollama = [
78
+ "langchain-ollama>=0.0.0",
79
+ ]
80
+
81
+ # Google/Gemini embeddings and LLM
82
+ google = [
83
+ "langchain-google-genai>=0.0.0",
84
+ ]
85
+
86
+ # FastEmbed for sparse vectors
87
+ fastembed = [
88
+ "fastembed>=0.2.0",
89
+ ]
90
+
91
+ # Development dependencies
92
+ dev = [
93
+ "pytest>=7.0.0",
94
+ "pytest-cov>=4.0.0",
95
+ "black>=23.0.0",
96
+ "isort>=5.0.0",
97
+ "flake8>=6.0.0",
98
+ "mypy>=1.0.0",
99
+ "pre-commit>=3.0.0",
100
+ ]
101
+
102
+ # All dependencies
103
+ all = [
104
+ "ragwire[openai]",
105
+ "ragwire[huggingface]",
106
+ "ragwire[ollama]",
107
+ "ragwire[google]",
108
+ "ragwire[fastembed]",
109
+ "ragwire[dev]",
110
+ ]
111
+
112
+ [project.urls]
113
+ Homepage = "https://kgptalkie.com"
114
+ Documentation = "https://github.com/laxmimerit/ragwire#readme"
115
+ Repository = "https://github.com/laxmimerit/ragwire.git"
116
+ Issues = "https://github.com/laxmimerit/ragwire/issues"
117
+ YouTube = "https://youtube.com/kgptalkie"
118
+
119
+ [tool.setuptools.packages.find]
120
+ where = ["."]
121
+ include = ["ragwire*"]
122
+
123
+ [tool.setuptools.package-data]
124
+ ragwire = ["py.typed"]
125
+
126
+ [tool.setuptools.data-files]
127
+ "assets" = ["assets/ragwire.png"]
128
+
129
+ [tool.black]
130
+ line-length = 88
131
+ target-version = ['py39', 'py310', 'py311', 'py312']
132
+ include = '\.pyi?$'
133
+
134
+ [tool.isort]
135
+ profile = "black"
136
+ line_length = 88
137
+ multi_line_output = 3
138
+
139
+ [tool.mypy]
140
+ python_version = "3.9"
141
+ warn_return_any = true
142
+ warn_unused_configs = true
143
+ disallow_untyped_defs = false
144
+
145
+ [tool.pytest.ini_options]
146
+ testpaths = ["tests"]
147
+ python_files = "test_*.py"
148
+ python_classes = "Test*"
149
+ python_functions = "test_*"
150
+ addopts = "-v --cov=ragwire --cov-report=term-missing"
@@ -0,0 +1,73 @@
1
+ """
2
+ RAGWire — Production-grade RAG toolkit for document ingestion and retrieval.
3
+
4
+ A clean, installable Python toolkit providing:
5
+ - Document loading and conversion (PDF, DOCX, XLSX, etc.)
6
+ - Text splitting and chunking
7
+ - LLM-based metadata extraction
8
+ - Multiple embedding provider support (OpenAI, HuggingFace, Ollama, Google)
9
+ - Qdrant vector store with hybrid search
10
+ - Advanced retrieval strategies (similarity, MMR, hybrid)
11
+
12
+ Example:
13
+ >>> from ragwire import RAGPipeline
14
+ >>>
15
+ >>> pipeline = RAGPipeline("config.yaml")
16
+ >>> stats = pipeline.ingest_documents(["doc.pdf"])
17
+ >>> results = pipeline.retrieve("What is the revenue?")
18
+ >>> for doc in results:
19
+ ... print(doc.page_content)
20
+
21
+ Author: KGP Talkie Private Limited
22
+ License: MIT
23
+ """
24
+
25
+ from importlib.metadata import version, PackageNotFoundError
26
+
27
+ try:
28
+ __version__ = version("ragwire")
29
+ except PackageNotFoundError:
30
+ __version__ = "1.0.0"
31
+
32
+ __author__ = "KGP Talkie Private Limited"
33
+
34
+ from .core.config import Config
35
+ from .core.pipeline import RAGPipeline
36
+ from .metadata.schema import DocumentMetadata
37
+ from .metadata.extractor import MetadataExtractor
38
+ from .loaders.markitdown_loader import MarkItDownLoader
39
+ from .processing.splitter import get_splitter, get_markdown_splitter, get_code_splitter
40
+ from .processing.hashing import sha256_text, sha256_file_from_path, sha256_chunk
41
+ from .embeddings.factory import get_embedding
42
+ from .vectorstores.qdrant_store import QdrantStore
43
+ from .retriever.hybrid import get_retriever, hybrid_search, mmr_search
44
+ from .utils.logging import setup_logging, get_logger
45
+
46
+ __all__ = [
47
+ # Core
48
+ "Config",
49
+ "RAGPipeline",
50
+ # Metadata
51
+ "DocumentMetadata",
52
+ "MetadataExtractor",
53
+ # Loaders
54
+ "MarkItDownLoader",
55
+ # Processing
56
+ "get_splitter",
57
+ "get_markdown_splitter",
58
+ "get_code_splitter",
59
+ "sha256_text",
60
+ "sha256_file_from_path",
61
+ "sha256_chunk",
62
+ # Embeddings
63
+ "get_embedding",
64
+ # Vector Stores
65
+ "QdrantStore",
66
+ # Retrieval
67
+ "get_retriever",
68
+ "hybrid_search",
69
+ "mmr_search",
70
+ # Utilities
71
+ "setup_logging",
72
+ "get_logger",
73
+ ]
@@ -0,0 +1,6 @@
1
+ """Core module for RAG pipeline."""
2
+
3
+ from .config import Config
4
+ from .pipeline import RAGPipeline
5
+
6
+ __all__ = ["Config", "RAGPipeline"]