bibirags 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bibirags-0.1.0/.gitignore +36 -0
- bibirags-0.1.0/.pre-commit-config.yaml +17 -0
- bibirags-0.1.0/CHANGELOG.md +19 -0
- bibirags-0.1.0/LICENSE +21 -0
- bibirags-0.1.0/Makefile +27 -0
- bibirags-0.1.0/PKG-INFO +268 -0
- bibirags-0.1.0/README.md +171 -0
- bibirags-0.1.0/pyproject.toml +125 -0
- bibirags-0.1.0/src/bibirags/__init__.py +42 -0
- bibirags-0.1.0/src/bibirags/backends/__init__.py +9 -0
- bibirags-0.1.0/src/bibirags/backends/chroma.py +143 -0
- bibirags-0.1.0/src/bibirags/backends/lightrag.py +226 -0
- bibirags-0.1.0/src/bibirags/backends/qdrant.py +177 -0
- bibirags-0.1.0/src/bibirags/backends/txtai.py +138 -0
- bibirags-0.1.0/src/bibirags/chunking.py +70 -0
- bibirags-0.1.0/src/bibirags/llm.py +82 -0
- bibirags-0.1.0/tests/conftest.py +7 -0
- bibirags-0.1.0/tests/test_simplerags.py +221 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
*.pyd
|
|
6
|
+
*.so
|
|
7
|
+
*.egg
|
|
8
|
+
*.egg-info/
|
|
9
|
+
dist/
|
|
10
|
+
build/
|
|
11
|
+
.eggs/
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
env/
|
|
17
|
+
|
|
18
|
+
# Testing & coverage
|
|
19
|
+
.pytest_cache/
|
|
20
|
+
.coverage
|
|
21
|
+
htmlcov/
|
|
22
|
+
.mypy_cache/
|
|
23
|
+
.ruff_cache/
|
|
24
|
+
|
|
25
|
+
# RAG indices (generated at runtime)
|
|
26
|
+
rag_root_*/
|
|
27
|
+
|
|
28
|
+
# IDEs
|
|
29
|
+
.idea/
|
|
30
|
+
.vscode/
|
|
31
|
+
*.swp
|
|
32
|
+
*.swo
|
|
33
|
+
|
|
34
|
+
# OS
|
|
35
|
+
.DS_Store
|
|
36
|
+
Thumbs.db
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.4.4
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
|
|
9
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
10
|
+
rev: v4.6.0
|
|
11
|
+
hooks:
|
|
12
|
+
- id: trailing-whitespace
|
|
13
|
+
- id: end-of-file-fixer
|
|
14
|
+
- id: check-yaml
|
|
15
|
+
- id: check-toml
|
|
16
|
+
- id: check-merge-conflict
|
|
17
|
+
- id: debug-statements
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2024-01-01
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- Initial release.
|
|
14
|
+
- Four vector-store backends: **txtai**, **Chroma**, **Qdrant**, **LightRAG**.
|
|
15
|
+
- Consistent `save / search / query` API across all backends.
|
|
16
|
+
- LiteLLM-powered embedding and completion helpers (sync + async).
|
|
17
|
+
- `chunk_docs` utility for PDF and TXT document loading and splitting.
|
|
18
|
+
- Optional extras for each backend (`pip install simplerags[qdrant]` etc.).
|
|
19
|
+
- Full type annotations and Loguru structured logging.
|
bibirags-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Bibi Parrot
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
bibirags-0.1.0/Makefile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
.PHONY: install test lint fmt build publish clean
|
|
2
|
+
|
|
3
|
+
install:
|
|
4
|
+
pip install -e ".[dev]"
|
|
5
|
+
|
|
6
|
+
test:
|
|
7
|
+
pytest tests/ -v --cov=simplerags --cov-report=term-missing
|
|
8
|
+
|
|
9
|
+
lint:
|
|
10
|
+
ruff check src/ tests/
|
|
11
|
+
|
|
12
|
+
fmt:
|
|
13
|
+
ruff format src/ tests/
|
|
14
|
+
|
|
15
|
+
build:
|
|
16
|
+
python -m build
|
|
17
|
+
|
|
18
|
+
# Publish to TestPyPI first: make publish-test
|
|
19
|
+
# Then to PyPI: make publish
|
|
20
|
+
publish-test:
|
|
21
|
+
twine upload --repository testpypi dist/*
|
|
22
|
+
|
|
23
|
+
publish:
|
|
24
|
+
twine upload dist/*
|
|
25
|
+
|
|
26
|
+
clean:
|
|
27
|
+
rm -rf dist/ build/ *.egg-info .coverage htmlcov/ .pytest_cache/ .mypy_cache/ .ruff_cache/
|
bibirags-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bibirags
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A simple, unified interface for RAG across multiple vector store backends (txtai, Chroma, Qdrant, LightRAG)
|
|
5
|
+
Project-URL: Homepage, https://github.com/bibiparrot/bibirags
|
|
6
|
+
Project-URL: Repository, https://github.com/bibiparrot/bibirags
|
|
7
|
+
Project-URL: Issues, https://github.com/bibiparrot/bibirags/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/bibiparrot/bibirags/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: chunqishi <scq830@163.com>
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 Bibi Parrot
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Keywords: ai,embeddings,llm,rag,retrieval,vector-store
|
|
33
|
+
Classifier: Development Status :: 3 - Alpha
|
|
34
|
+
Classifier: Intended Audience :: Developers
|
|
35
|
+
Classifier: Intended Audience :: Science/Research
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Operating System :: OS Independent
|
|
38
|
+
Classifier: Programming Language :: Python :: 3
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
42
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
43
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
44
|
+
Requires-Python: >=3.10
|
|
45
|
+
Requires-Dist: litellm>=1.86.2
|
|
46
|
+
Requires-Dist: loguru>=0.7.0
|
|
47
|
+
Requires-Dist: numpy>=1.24.0
|
|
48
|
+
Provides-Extra: all
|
|
49
|
+
Requires-Dist: langchain-chroma>=0.1.0; extra == 'all'
|
|
50
|
+
Requires-Dist: langchain-classic>=0.1.0; extra == 'all'
|
|
51
|
+
Requires-Dist: langchain-community>=0.2.0; extra == 'all'
|
|
52
|
+
Requires-Dist: langchain-core>=0.2.0; extra == 'all'
|
|
53
|
+
Requires-Dist: langchain-litellm>=0.1.0; extra == 'all'
|
|
54
|
+
Requires-Dist: langchain-text-splitters>=0.2.0; extra == 'all'
|
|
55
|
+
Requires-Dist: lightrag-hku>=1.0.0; extra == 'all'
|
|
56
|
+
Requires-Dist: nest-asyncio>=1.6.0; extra == 'all'
|
|
57
|
+
Requires-Dist: pypdf>=4.0.0; extra == 'all'
|
|
58
|
+
Requires-Dist: qdrant-client>=1.9.0; extra == 'all'
|
|
59
|
+
Requires-Dist: txtai[pipeline]>=7.0.0; extra == 'all'
|
|
60
|
+
Provides-Extra: chroma
|
|
61
|
+
Requires-Dist: langchain-chroma>=0.1.0; extra == 'chroma'
|
|
62
|
+
Requires-Dist: langchain-classic>=0.1.0; extra == 'chroma'
|
|
63
|
+
Requires-Dist: langchain-core>=0.2.0; extra == 'chroma'
|
|
64
|
+
Requires-Dist: langchain-litellm>=0.1.0; extra == 'chroma'
|
|
65
|
+
Provides-Extra: dev
|
|
66
|
+
Requires-Dist: build>=1.2.0; extra == 'dev'
|
|
67
|
+
Requires-Dist: langchain-chroma>=0.1.0; extra == 'dev'
|
|
68
|
+
Requires-Dist: langchain-classic>=0.1.0; extra == 'dev'
|
|
69
|
+
Requires-Dist: langchain-community>=0.2.0; extra == 'dev'
|
|
70
|
+
Requires-Dist: langchain-core>=0.2.0; extra == 'dev'
|
|
71
|
+
Requires-Dist: langchain-litellm>=0.1.0; extra == 'dev'
|
|
72
|
+
Requires-Dist: langchain-text-splitters>=0.2.0; extra == 'dev'
|
|
73
|
+
Requires-Dist: lightrag-hku>=1.0.0; extra == 'dev'
|
|
74
|
+
Requires-Dist: mypy>=1.10.0; extra == 'dev'
|
|
75
|
+
Requires-Dist: nest-asyncio>=1.6.0; extra == 'dev'
|
|
76
|
+
Requires-Dist: pre-commit>=3.7.0; extra == 'dev'
|
|
77
|
+
Requires-Dist: pypdf>=4.0.0; extra == 'dev'
|
|
78
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
79
|
+
Requires-Dist: pytest-cov>=5.0.0; extra == 'dev'
|
|
80
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
81
|
+
Requires-Dist: qdrant-client>=1.9.0; extra == 'dev'
|
|
82
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
83
|
+
Requires-Dist: twine>=5.0.0; extra == 'dev'
|
|
84
|
+
Requires-Dist: txtai[pipeline]>=7.0.0; extra == 'dev'
|
|
85
|
+
Provides-Extra: docs
|
|
86
|
+
Requires-Dist: langchain-community>=0.2.0; extra == 'docs'
|
|
87
|
+
Requires-Dist: langchain-text-splitters>=0.2.0; extra == 'docs'
|
|
88
|
+
Requires-Dist: pypdf>=4.0.0; extra == 'docs'
|
|
89
|
+
Provides-Extra: lightrag
|
|
90
|
+
Requires-Dist: lightrag-hku>=1.0.0; extra == 'lightrag'
|
|
91
|
+
Requires-Dist: nest-asyncio>=1.6.0; extra == 'lightrag'
|
|
92
|
+
Provides-Extra: qdrant
|
|
93
|
+
Requires-Dist: qdrant-client>=1.9.0; extra == 'qdrant'
|
|
94
|
+
Provides-Extra: txtai
|
|
95
|
+
Requires-Dist: txtai[pipeline]>=7.0.0; extra == 'txtai'
|
|
96
|
+
Description-Content-Type: text/markdown
|
|
97
|
+
|
|
98
|
+
# simplerags
|
|
99
|
+
|
|
100
|
+
**A simple, unified interface for RAG (Retrieval-Augmented Generation) across multiple vector store backends.**
|
|
101
|
+
|
|
102
|
+
`simplerags` wraps [txtai](https://github.com/neuml/txtai), [Chroma](https://www.trychroma.com/), [Qdrant](https://qdrant.tech/), and [LightRAG](https://github.com/HKUDS/LightRAG) behind a consistent three-function API so you can swap backends without rewriting your pipeline.
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
save_<backend>(chunks, rag_root, embed_model, ...) → index documents
|
|
106
|
+
search_<backend>(query, rag_root, embed_model, ...) → retrieve chunks
|
|
107
|
+
query_<backend>(query, rag_root, llm_model, ...) → retrieve + answer
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
All LLM and embedding calls go through [LiteLLM](https://github.com/BerriAI/litellm), meaning any model provider (OpenAI, Anthropic, Cohere, Ollama, etc.) works out of the box.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Installation
|
|
115
|
+
|
|
116
|
+
Install the core package plus the backends you need:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Qdrant only
|
|
120
|
+
pip install simplerags[qdrant]
|
|
121
|
+
|
|
122
|
+
# Chroma only
|
|
123
|
+
pip install simplerags[chroma]
|
|
124
|
+
|
|
125
|
+
# txtai only
|
|
126
|
+
pip install simplerags[txtai]
|
|
127
|
+
|
|
128
|
+
# LightRAG only
|
|
129
|
+
pip install simplerags[lightrag]
|
|
130
|
+
|
|
131
|
+
# All backends + document loading helpers
|
|
132
|
+
pip install simplerags[all]
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Add `[docs]` to get PDF and TXT loading via LangChain:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
pip install simplerags[qdrant,docs]
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Quick start
|
|
144
|
+
|
|
145
|
+
### Index raw text chunks
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
from simplerags import save_qdrant, search_qdrant, query_qdrant
|
|
149
|
+
|
|
150
|
+
chunks = [
|
|
151
|
+
"The Eiffel Tower was completed in 1889.",
|
|
152
|
+
"The Louvre is the world's largest art museum.",
|
|
153
|
+
"Paris is the capital of France.",
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
embed_model = "text-embedding-3-small" # any LiteLLM-compatible model
|
|
157
|
+
llm_model = "gpt-4o-mini"
|
|
158
|
+
rag_root = "./my_rag_index"
|
|
159
|
+
|
|
160
|
+
# 1. Index
|
|
161
|
+
save_qdrant(chunks, rag_root, embed_model)
|
|
162
|
+
|
|
163
|
+
# 2. Semantic search
|
|
164
|
+
results = search_qdrant("When was the Eiffel Tower built?", rag_root, embed_model)
|
|
165
|
+
|
|
166
|
+
# 3. RAG query → answer + source chunks
|
|
167
|
+
answer, sources = query_qdrant(
|
|
168
|
+
"When was the Eiffel Tower built?", rag_root, llm_model, embed_model
|
|
169
|
+
)
|
|
170
|
+
print(answer)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Load documents from disk
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
from simplerags import chunk_docs, save_chroma, query_chroma
|
|
177
|
+
|
|
178
|
+
chunks = chunk_docs("./my_docs/", chunk_size=800, chunk_overlap=120)
|
|
179
|
+
save_chroma(chunks, "./chroma_index", embed_model="text-embedding-3-small")
|
|
180
|
+
|
|
181
|
+
answer, sources = query_chroma(
|
|
182
|
+
"What does the contract say about termination?",
|
|
183
|
+
rag_root="./chroma_index",
|
|
184
|
+
llm_model="gpt-4o",
|
|
185
|
+
embed_model="text-embedding-3-small",
|
|
186
|
+
)
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Using Ollama (local models)
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
from simplerags import save_txtai, query_txtai
|
|
193
|
+
|
|
194
|
+
save_txtai(chunks, "./txtai_index", embed_model="ollama/bge-m3:latest")
|
|
195
|
+
|
|
196
|
+
answer, sources = query_txtai(
|
|
197
|
+
"What happened in the news?",
|
|
198
|
+
rag_root="./txtai_index",
|
|
199
|
+
llm_model="ollama/gemma3:8b",
|
|
200
|
+
embed_model="ollama/bge-m3:latest",
|
|
201
|
+
)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## Backends at a glance
|
|
207
|
+
|
|
208
|
+
| Backend | Best for | Index format | Notes |
|
|
209
|
+
|----------|----------|--------------|-------|
|
|
210
|
+
| **Qdrant** | Production workloads, filtering | Local files or server | Cosine similarity, rich payload filtering |
|
|
211
|
+
| **Chroma** | LangChain ecosystems | Local SQLite | Easy LangChain integration |
|
|
212
|
+
| **txtai** | All-in-one HuggingFace pipelines | SQLite + FAISS | Built-in pipeline support |
|
|
213
|
+
| **LightRAG** | Knowledge-graph RAG | Local JSON + vector | Graph-enhanced hybrid retrieval |
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## API reference
|
|
218
|
+
|
|
219
|
+
### `chunk_docs`
|
|
220
|
+
|
|
221
|
+
```python
|
|
222
|
+
chunk_docs(docs_path, chunk_size=800, chunk_overlap=120) → list[str]
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Recursively loads `.pdf` and `.txt` files from `docs_path` and returns text chunks.
|
|
226
|
+
|
|
227
|
+
### `save_<backend>`
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
save_qdrant(chunks, rag_root, embed_model)
|
|
231
|
+
save_chroma(chunks, rag_root, embed_model)
|
|
232
|
+
save_txtai(chunks, rag_root, embed_model)
|
|
233
|
+
save_lightrag(chunks, rag_root, embed_model, llm_model)
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### `search_<backend>`
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
results: list[str] = search_qdrant(query, rag_root, embed_model, top_k=3)
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Returns the `top_k` most relevant chunk texts.
|
|
243
|
+
|
|
244
|
+
### `query_<backend>`
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
answer, sources = query_qdrant(query, rag_root, llm_model, embed_model, top_k=3)
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
Returns `(answer_string, list_of_source_chunks)`.
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
## Contributing
|
|
255
|
+
|
|
256
|
+
```bash
|
|
257
|
+
git clone https://github.com/yourname/simplerags
|
|
258
|
+
cd simplerags
|
|
259
|
+
pip install -e ".[dev]"
|
|
260
|
+
pre-commit install
|
|
261
|
+
pytest
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
266
|
+
## License
|
|
267
|
+
|
|
268
|
+
MIT – see [LICENSE](LICENSE).
|
bibirags-0.1.0/README.md
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# simplerags
|
|
2
|
+
|
|
3
|
+
**A simple, unified interface for RAG (Retrieval-Augmented Generation) across multiple vector store backends.**
|
|
4
|
+
|
|
5
|
+
`simplerags` wraps [txtai](https://github.com/neuml/txtai), [Chroma](https://www.trychroma.com/), [Qdrant](https://qdrant.tech/), and [LightRAG](https://github.com/HKUDS/LightRAG) behind a consistent three-function API so you can swap backends without rewriting your pipeline.
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
save_<backend>(chunks, rag_root, embed_model, ...) → index documents
|
|
9
|
+
search_<backend>(query, rag_root, embed_model, ...) → retrieve chunks
|
|
10
|
+
query_<backend>(query, rag_root, llm_model, ...) → retrieve + answer
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
All LLM and embedding calls go through [LiteLLM](https://github.com/BerriAI/litellm), meaning any model provider (OpenAI, Anthropic, Cohere, Ollama, etc.) works out of the box.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
Install the core package plus the backends you need:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Qdrant only
|
|
23
|
+
pip install simplerags[qdrant]
|
|
24
|
+
|
|
25
|
+
# Chroma only
|
|
26
|
+
pip install simplerags[chroma]
|
|
27
|
+
|
|
28
|
+
# txtai only
|
|
29
|
+
pip install simplerags[txtai]
|
|
30
|
+
|
|
31
|
+
# LightRAG only
|
|
32
|
+
pip install simplerags[lightrag]
|
|
33
|
+
|
|
34
|
+
# All backends + document loading helpers
|
|
35
|
+
pip install simplerags[all]
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Add `[docs]` to get PDF and TXT loading via LangChain:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install simplerags[qdrant,docs]
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Quick start
|
|
47
|
+
|
|
48
|
+
### Index raw text chunks
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
from simplerags import save_qdrant, search_qdrant, query_qdrant
|
|
52
|
+
|
|
53
|
+
chunks = [
|
|
54
|
+
"The Eiffel Tower was completed in 1889.",
|
|
55
|
+
"The Louvre is the world's largest art museum.",
|
|
56
|
+
"Paris is the capital of France.",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
embed_model = "text-embedding-3-small" # any LiteLLM-compatible model
|
|
60
|
+
llm_model = "gpt-4o-mini"
|
|
61
|
+
rag_root = "./my_rag_index"
|
|
62
|
+
|
|
63
|
+
# 1. Index
|
|
64
|
+
save_qdrant(chunks, rag_root, embed_model)
|
|
65
|
+
|
|
66
|
+
# 2. Semantic search
|
|
67
|
+
results = search_qdrant("When was the Eiffel Tower built?", rag_root, embed_model)
|
|
68
|
+
|
|
69
|
+
# 3. RAG query → answer + source chunks
|
|
70
|
+
answer, sources = query_qdrant(
|
|
71
|
+
"When was the Eiffel Tower built?", rag_root, llm_model, embed_model
|
|
72
|
+
)
|
|
73
|
+
print(answer)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### Load documents from disk
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from simplerags import chunk_docs, save_chroma, query_chroma
|
|
80
|
+
|
|
81
|
+
chunks = chunk_docs("./my_docs/", chunk_size=800, chunk_overlap=120)
|
|
82
|
+
save_chroma(chunks, "./chroma_index", embed_model="text-embedding-3-small")
|
|
83
|
+
|
|
84
|
+
answer, sources = query_chroma(
|
|
85
|
+
"What does the contract say about termination?",
|
|
86
|
+
rag_root="./chroma_index",
|
|
87
|
+
llm_model="gpt-4o",
|
|
88
|
+
embed_model="text-embedding-3-small",
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Using Ollama (local models)
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from simplerags import save_txtai, query_txtai
|
|
96
|
+
|
|
97
|
+
save_txtai(chunks, "./txtai_index", embed_model="ollama/bge-m3:latest")
|
|
98
|
+
|
|
99
|
+
answer, sources = query_txtai(
|
|
100
|
+
"What happened in the news?",
|
|
101
|
+
rag_root="./txtai_index",
|
|
102
|
+
llm_model="ollama/gemma3:8b",
|
|
103
|
+
embed_model="ollama/bge-m3:latest",
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Backends at a glance
|
|
110
|
+
|
|
111
|
+
| Backend | Best for | Index format | Notes |
|
|
112
|
+
|----------|----------|--------------|-------|
|
|
113
|
+
| **Qdrant** | Production workloads, filtering | Local files or server | Cosine similarity, rich payload filtering |
|
|
114
|
+
| **Chroma** | LangChain ecosystems | Local SQLite | Easy LangChain integration |
|
|
115
|
+
| **txtai** | All-in-one HuggingFace pipelines | SQLite + FAISS | Built-in pipeline support |
|
|
116
|
+
| **LightRAG** | Knowledge-graph RAG | Local JSON + vector | Graph-enhanced hybrid retrieval |
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## API reference
|
|
121
|
+
|
|
122
|
+
### `chunk_docs`
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
chunk_docs(docs_path, chunk_size=800, chunk_overlap=120) → list[str]
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Recursively loads `.pdf` and `.txt` files from `docs_path` and returns text chunks.
|
|
129
|
+
|
|
130
|
+
### `save_<backend>`
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
save_qdrant(chunks, rag_root, embed_model)
|
|
134
|
+
save_chroma(chunks, rag_root, embed_model)
|
|
135
|
+
save_txtai(chunks, rag_root, embed_model)
|
|
136
|
+
save_lightrag(chunks, rag_root, embed_model, llm_model)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### `search_<backend>`
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
results: list[str] = search_qdrant(query, rag_root, embed_model, top_k=3)
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Returns the `top_k` most relevant chunk texts.
|
|
146
|
+
|
|
147
|
+
### `query_<backend>`
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
answer, sources = query_qdrant(query, rag_root, llm_model, embed_model, top_k=3)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Returns `(answer_string, list_of_source_chunks)`.
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Contributing
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
git clone https://github.com/yourname/simplerags
|
|
161
|
+
cd simplerags
|
|
162
|
+
pip install -e ".[dev]"
|
|
163
|
+
pre-commit install
|
|
164
|
+
pytest
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## License
|
|
170
|
+
|
|
171
|
+
MIT – see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "bibirags"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A simple, unified interface for RAG across multiple vector store backends (txtai, Chroma, Qdrant, LightRAG)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
authors = [{ name = "chunqishi", email = "scq830@163.com" }]
|
|
12
|
+
keywords = ["rag", "retrieval", "llm", "vector-store", "embeddings", "ai"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
24
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
25
|
+
]
|
|
26
|
+
requires-python = ">=3.10"
|
|
27
|
+
|
|
28
|
+
# Core dependencies (always installed)
|
|
29
|
+
dependencies = [
|
|
30
|
+
"litellm>=1.86.2",
|
|
31
|
+
"loguru>=0.7.0",
|
|
32
|
+
"numpy>=1.24.0",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
# Backend extras – install only what you need
|
|
37
|
+
|
|
38
|
+
txtai = [
|
|
39
|
+
"txtai[pipeline]>=7.0.0",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
chroma = [
|
|
43
|
+
"langchain-chroma>=0.1.0",
|
|
44
|
+
"langchain-litellm>=0.1.0",
|
|
45
|
+
"langchain-classic>=0.1.0",
|
|
46
|
+
"langchain-core>=0.2.0",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
qdrant = [
|
|
50
|
+
"qdrant-client>=1.9.0",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
lightrag = [
|
|
54
|
+
"lightrag-hku>=1.0.0",
|
|
55
|
+
"nest-asyncio>=1.6.0",
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
# Document loading extras
|
|
59
|
+
docs = [
|
|
60
|
+
"langchain-community>=0.2.0",
|
|
61
|
+
"langchain-text-splitters>=0.2.0",
|
|
62
|
+
"pypdf>=4.0.0",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
# Install everything
|
|
66
|
+
all = [
|
|
67
|
+
"bibirags[txtai]",
|
|
68
|
+
"bibirags[chroma]",
|
|
69
|
+
"bibirags[qdrant]",
|
|
70
|
+
"bibirags[lightrag]",
|
|
71
|
+
"bibirags[docs]",
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
# Development toolchain
|
|
75
|
+
dev = [
|
|
76
|
+
"bibirags[all]",
|
|
77
|
+
"pytest>=8.0.0",
|
|
78
|
+
"pytest-asyncio>=0.23.0",
|
|
79
|
+
"pytest-cov>=5.0.0",
|
|
80
|
+
"ruff>=0.4.0",
|
|
81
|
+
"mypy>=1.10.0",
|
|
82
|
+
"pre-commit>=3.7.0",
|
|
83
|
+
"build>=1.2.0",
|
|
84
|
+
"twine>=5.0.0",
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
[project.urls]
|
|
88
|
+
Homepage = "https://github.com/bibiparrot/bibirags"
|
|
89
|
+
Repository = "https://github.com/bibiparrot/bibirags"
|
|
90
|
+
Issues = "https://github.com/bibiparrot/bibirags/issues"
|
|
91
|
+
Changelog = "https://github.com/bibiparrot/bibirags/blob/main/CHANGELOG.md"
|
|
92
|
+
|
|
93
|
+
# ---------------------------------------------------------------------------
|
|
94
|
+
# Hatch build
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
[tool.hatch.build.targets.wheel]
|
|
97
|
+
packages = ["src/bibirags"]
|
|
98
|
+
|
|
99
|
+
# ---------------------------------------------------------------------------
|
|
100
|
+
# Ruff (linting + formatting)
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
[tool.ruff]
|
|
103
|
+
src = ["src"]
|
|
104
|
+
line-length = 99
|
|
105
|
+
target-version = "py310"
|
|
106
|
+
|
|
107
|
+
[tool.ruff.lint]
|
|
108
|
+
select = ["E", "W", "F", "I", "UP", "B", "C4", "PIE", "SIM"]
|
|
109
|
+
ignore = ["E501"]
|
|
110
|
+
|
|
111
|
+
# ---------------------------------------------------------------------------
|
|
112
|
+
# Mypy
|
|
113
|
+
# ---------------------------------------------------------------------------
|
|
114
|
+
[tool.mypy]
|
|
115
|
+
python_version = "3.10"
|
|
116
|
+
strict = false
|
|
117
|
+
ignore_missing_imports = true
|
|
118
|
+
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
# Pytest
|
|
121
|
+
# ---------------------------------------------------------------------------
|
|
122
|
+
[tool.pytest.ini_options]
|
|
123
|
+
testpaths = ["tests"]
|
|
124
|
+
asyncio_mode = "auto"
|
|
125
|
+
addopts = "--tb=short -q"
|