nexrag 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexrag-0.1.0/PKG-INFO +186 -0
- nexrag-0.1.0/README.md +122 -0
- nexrag-0.1.0/pyproject.toml +161 -0
- nexrag-0.1.0/src/nexrag/__init__.py +38 -0
- nexrag-0.1.0/src/nexrag/exceptions.py +140 -0
- nexrag-0.1.0/src/nexrag/py.typed +0 -0
nexrag-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: nexrag
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Framework-agnostic RAG pipeline SDK. Plug in any component, swap any stage, configure everything in YAML
|
|
5
|
+
Keywords: rag,retrieval-augmented-generation,llm,vector-database,embeddings,ai,nlp,pipeline,sdk
|
|
6
|
+
Author: KevinRawal
|
|
7
|
+
Author-email: KevinRawal <kevinrawal30@gmail.com>
|
|
8
|
+
License: TBD
|
|
9
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Dist: pydantic>=2.0
|
|
17
|
+
Requires-Dist: pyyaml>=6.0
|
|
18
|
+
Requires-Dist: nexrag[all-providers] ; extra == 'all'
|
|
19
|
+
Requires-Dist: nexrag[all-loaders] ; extra == 'all'
|
|
20
|
+
Requires-Dist: nexrag[pdf] ; extra == 'all-loaders'
|
|
21
|
+
Requires-Dist: nexrag[word] ; extra == 'all-loaders'
|
|
22
|
+
Requires-Dist: nexrag[excel] ; extra == 'all-loaders'
|
|
23
|
+
Requires-Dist: nexrag[html] ; extra == 'all-loaders'
|
|
24
|
+
Requires-Dist: nexrag[openai] ; extra == 'all-providers'
|
|
25
|
+
Requires-Dist: nexrag[anthropic] ; extra == 'all-providers'
|
|
26
|
+
Requires-Dist: nexrag[ollama] ; extra == 'all-providers'
|
|
27
|
+
Requires-Dist: nexrag[chromadb] ; extra == 'all-providers'
|
|
28
|
+
Requires-Dist: nexrag[huggingface] ; extra == 'all-providers'
|
|
29
|
+
Requires-Dist: anthropic>=0.20 ; extra == 'anthropic'
|
|
30
|
+
Requires-Dist: chromadb>=0.5 ; extra == 'chromadb'
|
|
31
|
+
Requires-Dist: pytest>=8.0 ; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-cov>=5.0 ; extra == 'dev'
|
|
33
|
+
Requires-Dist: ruff>=0.4 ; extra == 'dev'
|
|
34
|
+
Requires-Dist: mypy>=1.10 ; extra == 'dev'
|
|
35
|
+
Requires-Dist: pre-commit>=3.7 ; extra == 'dev'
|
|
36
|
+
Requires-Dist: types-pyyaml ; extra == 'dev'
|
|
37
|
+
Requires-Dist: openpyxl>=3.1 ; extra == 'excel'
|
|
38
|
+
Requires-Dist: beautifulsoup4>=4.12 ; extra == 'html'
|
|
39
|
+
Requires-Dist: lxml>=5.0 ; extra == 'html'
|
|
40
|
+
Requires-Dist: sentence-transformers>=2.0 ; extra == 'huggingface'
|
|
41
|
+
Requires-Dist: ollama>=0.1 ; extra == 'ollama'
|
|
42
|
+
Requires-Dist: openai>=1.0 ; extra == 'openai'
|
|
43
|
+
Requires-Dist: pypdf>=4.0 ; extra == 'pdf'
|
|
44
|
+
Requires-Dist: python-docx>=1.0 ; extra == 'word'
|
|
45
|
+
Requires-Python: >=3.12
|
|
46
|
+
Project-URL: Homepage, https://github.com/kevinrawal/nexrag
|
|
47
|
+
Project-URL: Repository, https://github.com/kevinrawal/nexrag
|
|
48
|
+
Project-URL: Issues, https://github.com/kevinrawal/nexrag/issues
|
|
49
|
+
Project-URL: Changelog, https://github.com/kevinrawal/nexrag/blob/main/CHANGELOG.md
|
|
50
|
+
Provides-Extra: all
|
|
51
|
+
Provides-Extra: all-loaders
|
|
52
|
+
Provides-Extra: all-providers
|
|
53
|
+
Provides-Extra: anthropic
|
|
54
|
+
Provides-Extra: chromadb
|
|
55
|
+
Provides-Extra: dev
|
|
56
|
+
Provides-Extra: excel
|
|
57
|
+
Provides-Extra: html
|
|
58
|
+
Provides-Extra: huggingface
|
|
59
|
+
Provides-Extra: ollama
|
|
60
|
+
Provides-Extra: openai
|
|
61
|
+
Provides-Extra: pdf
|
|
62
|
+
Provides-Extra: word
|
|
63
|
+
Description-Content-Type: text/markdown
|
|
64
|
+
|
|
65
|
+
# NexRAG
|
|
66
|
+
|
|
67
|
+
> Framework-agnostic RAG pipeline SDK. Plug in any component, swap any stage, configure everything in YAML.
|
|
68
|
+
|
|
69
|
+
[](https://pypi.org/project/nexrag/)
|
|
70
|
+
[](https://www.python.org/downloads/)
|
|
71
|
+
[]()
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## What is NexRAG?
|
|
76
|
+
|
|
77
|
+
NexRAG is a production-grade RAG (Retrieval-Augmented Generation) pipeline SDK for Python.
|
|
78
|
+
|
|
79
|
+
**NexRAG owns the pipeline shape. You own the components.**
|
|
80
|
+
|
|
81
|
+
Every stage — loading, chunking, embedding, retrieval, generation — is a clean interface. NexRAG ships default implementations for each. You can swap any of them by implementing the interface and declaring it in YAML. No framework lock-in. No magic. No hidden behavior.
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Quickstart
|
|
86
|
+
|
|
87
|
+
> **Note:** NexRAG v1.0 is under active development. This section will be updated on first release.
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from nexrag import NexRAG
|
|
91
|
+
|
|
92
|
+
pipeline = NexRAG.from_config("nexrag.yaml")
|
|
93
|
+
|
|
94
|
+
# Ingest documents
|
|
95
|
+
pipeline.ingest("docs/contracts/")
|
|
96
|
+
|
|
97
|
+
# Query
|
|
98
|
+
result = pipeline.query("What are the termination clauses?")
|
|
99
|
+
print(result.answer)
|
|
100
|
+
print(result.source_chunks)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
```yaml
|
|
104
|
+
# nexrag.yaml
|
|
105
|
+
nexrag:
|
|
106
|
+
version: "1.0"
|
|
107
|
+
|
|
108
|
+
ingestion:
|
|
109
|
+
chunker:
|
|
110
|
+
strategy: recursive
|
|
111
|
+
chunk_size: 512
|
|
112
|
+
embedder:
|
|
113
|
+
provider: openai
|
|
114
|
+
model: text-embedding-3-small
|
|
115
|
+
vector_db:
|
|
116
|
+
provider: chroma
|
|
117
|
+
default_collection: contracts
|
|
118
|
+
|
|
119
|
+
query:
|
|
120
|
+
llm:
|
|
121
|
+
provider: openai
|
|
122
|
+
model: gpt-4o
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Installation
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# Core only
|
|
131
|
+
pip install nexrag
|
|
132
|
+
|
|
133
|
+
# With OpenAI support
|
|
134
|
+
pip install "nexrag[openai]"
|
|
135
|
+
|
|
136
|
+
# With everything
|
|
137
|
+
pip install "nexrag[all]"
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## Design Principles
|
|
143
|
+
|
|
144
|
+
| Principle | What it means |
|
|
145
|
+
|---|---|
|
|
146
|
+
| Interface-first | Every stage is a contract. Implementation is secondary. |
|
|
147
|
+
| Config-driven | YAML configures the pipeline. Code defines the logic. |
|
|
148
|
+
| Zero lock-in | Core has no dependency on LangChain, LlamaIndex, or any AI SDK. |
|
|
149
|
+
| Explicit over implicit | No hidden defaults. Every behavior is declared or documented. |
|
|
150
|
+
| Extensible by design | New components plug in without touching core. |
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## Architecture
|
|
155
|
+
|
|
156
|
+
NexRAG has two independent pipelines:
|
|
157
|
+
|
|
158
|
+
```
|
|
159
|
+
INGESTION → Loader → Sanitizer → Chunker → Embedder → VectorDB
|
|
160
|
+
QUERY → Embedder → Retriever → PromptBuilder → LLM → PipelineResult
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
See [Architecture Documentation](docs/) for full pipeline diagrams.
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Supported Providers (V1)
|
|
168
|
+
|
|
169
|
+
| Category | Providers |
|
|
170
|
+
|---|---|
|
|
171
|
+
| Embedders | OpenAI, HuggingFace, Ollama |
|
|
172
|
+
| Vector DBs | ChromaDB (local + remote) |
|
|
173
|
+
| LLMs | OpenAI, Anthropic, Ollama |
|
|
174
|
+
| Loaders | PDF, TXT/MD, Word, Excel, JSON, HTML, Code |
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Contributing
|
|
179
|
+
|
|
180
|
+
NexRAG is in early development. Contribution guidelines will be published with v1.0.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## Changelog
|
|
185
|
+
|
|
186
|
+
See [CHANGELOG.md](CHANGELOG.md).
|
nexrag-0.1.0/README.md
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# NexRAG
|
|
2
|
+
|
|
3
|
+
> Framework-agnostic RAG pipeline SDK. Plug in any component, swap any stage, configure everything in YAML.
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/nexrag/)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
[]()
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## What is NexRAG?
|
|
12
|
+
|
|
13
|
+
NexRAG is a production-grade RAG (Retrieval-Augmented Generation) pipeline SDK for Python.
|
|
14
|
+
|
|
15
|
+
**NexRAG owns the pipeline shape. You own the components.**
|
|
16
|
+
|
|
17
|
+
Every stage — loading, chunking, embedding, retrieval, generation — is a clean interface. NexRAG ships default implementations for each. You can swap any of them by implementing the interface and declaring it in YAML. No framework lock-in. No magic. No hidden behavior.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Quickstart
|
|
22
|
+
|
|
23
|
+
> **Note:** NexRAG v1.0 is under active development. This section will be updated on first release.
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from nexrag import NexRAG
|
|
27
|
+
|
|
28
|
+
pipeline = NexRAG.from_config("nexrag.yaml")
|
|
29
|
+
|
|
30
|
+
# Ingest documents
|
|
31
|
+
pipeline.ingest("docs/contracts/")
|
|
32
|
+
|
|
33
|
+
# Query
|
|
34
|
+
result = pipeline.query("What are the termination clauses?")
|
|
35
|
+
print(result.answer)
|
|
36
|
+
print(result.source_chunks)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
```yaml
|
|
40
|
+
# nexrag.yaml
|
|
41
|
+
nexrag:
|
|
42
|
+
version: "1.0"
|
|
43
|
+
|
|
44
|
+
ingestion:
|
|
45
|
+
chunker:
|
|
46
|
+
strategy: recursive
|
|
47
|
+
chunk_size: 512
|
|
48
|
+
embedder:
|
|
49
|
+
provider: openai
|
|
50
|
+
model: text-embedding-3-small
|
|
51
|
+
vector_db:
|
|
52
|
+
provider: chroma
|
|
53
|
+
default_collection: contracts
|
|
54
|
+
|
|
55
|
+
query:
|
|
56
|
+
llm:
|
|
57
|
+
provider: openai
|
|
58
|
+
model: gpt-4o
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Core only
|
|
67
|
+
pip install nexrag
|
|
68
|
+
|
|
69
|
+
# With OpenAI support
|
|
70
|
+
pip install "nexrag[openai]"
|
|
71
|
+
|
|
72
|
+
# With everything
|
|
73
|
+
pip install "nexrag[all]"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Design Principles
|
|
79
|
+
|
|
80
|
+
| Principle | What it means |
|
|
81
|
+
|---|---|
|
|
82
|
+
| Interface-first | Every stage is a contract. Implementation is secondary. |
|
|
83
|
+
| Config-driven | YAML configures the pipeline. Code defines the logic. |
|
|
84
|
+
| Zero lock-in | Core has no dependency on LangChain, LlamaIndex, or any AI SDK. |
|
|
85
|
+
| Explicit over implicit | No hidden defaults. Every behavior is declared or documented. |
|
|
86
|
+
| Extensible by design | New components plug in without touching core. |
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Architecture
|
|
91
|
+
|
|
92
|
+
NexRAG has two independent pipelines:
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
INGESTION → Loader → Sanitizer → Chunker → Embedder → VectorDB
|
|
96
|
+
QUERY → Embedder → Retriever → PromptBuilder → LLM → PipelineResult
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
See [Architecture Documentation](docs/) for full pipeline diagrams.
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## Supported Providers (V1)
|
|
104
|
+
|
|
105
|
+
| Category | Providers |
|
|
106
|
+
|---|---|
|
|
107
|
+
| Embedders | OpenAI, HuggingFace, Ollama |
|
|
108
|
+
| Vector DBs | ChromaDB (local + remote) |
|
|
109
|
+
| LLMs | OpenAI, Anthropic, Ollama |
|
|
110
|
+
| Loaders | PDF, TXT/MD, Word, Excel, JSON, HTML, Code |
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Contributing
|
|
115
|
+
|
|
116
|
+
NexRAG is in early development. Contribution guidelines will be published with v1.0.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Changelog
|
|
121
|
+
|
|
122
|
+
See [CHANGELOG.md](CHANGELOG.md).
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["uv_build>=0.11.16,<0.12.0"]
|
|
3
|
+
build-backend = "uv_build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "nexrag"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Framework-agnostic RAG pipeline SDK. Plug in any component, swap any stage, configure everything in YAML"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "TBD" }
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "KevinRawal", email = "kevinrawal30@gmail.com" }
|
|
13
|
+
]
|
|
14
|
+
requires-python = ">=3.12"
|
|
15
|
+
keywords = [
|
|
16
|
+
"rag",
|
|
17
|
+
"retrieval-augmented-generation",
|
|
18
|
+
"llm",
|
|
19
|
+
"vector-database",
|
|
20
|
+
"embeddings",
|
|
21
|
+
"ai",
|
|
22
|
+
"nlp",
|
|
23
|
+
"pipeline",
|
|
24
|
+
"sdk",
|
|
25
|
+
]
|
|
26
|
+
classifiers = [
|
|
27
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
28
|
+
"Intended Audience :: Developers",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
"Typing :: Typed",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
# Keep this minimal. Only what the pipeline skeleton needs.
|
|
37
|
+
dependencies = [
|
|
38
|
+
"pydantic>=2.0", # config schema validation
|
|
39
|
+
"pyyaml>=6.0", # YAML config loading
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.optional-dependencies]
|
|
43
|
+
|
|
44
|
+
# Provider extras — install only what you use
|
|
45
|
+
openai = [
|
|
46
|
+
"openai>=1.0",
|
|
47
|
+
]
|
|
48
|
+
anthropic = [
|
|
49
|
+
"anthropic>=0.20",
|
|
50
|
+
]
|
|
51
|
+
ollama = [
|
|
52
|
+
"ollama>=0.1",
|
|
53
|
+
]
|
|
54
|
+
chromadb = [
|
|
55
|
+
"chromadb>=0.5",
|
|
56
|
+
]
|
|
57
|
+
huggingface = [
|
|
58
|
+
"sentence-transformers>=2.0",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
# Document loaders
|
|
62
|
+
pdf = [
|
|
63
|
+
"pypdf>=4.0",
|
|
64
|
+
]
|
|
65
|
+
word = [
|
|
66
|
+
"python-docx>=1.0",
|
|
67
|
+
]
|
|
68
|
+
excel = [
|
|
69
|
+
"openpyxl>=3.1",
|
|
70
|
+
]
|
|
71
|
+
html = [
|
|
72
|
+
"beautifulsoup4>=4.12",
|
|
73
|
+
"lxml>=5.0",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
# Convenience bundles
|
|
77
|
+
all-providers = [
|
|
78
|
+
"nexrag[openai]",
|
|
79
|
+
"nexrag[anthropic]",
|
|
80
|
+
"nexrag[ollama]",
|
|
81
|
+
"nexrag[chromadb]",
|
|
82
|
+
"nexrag[huggingface]",
|
|
83
|
+
]
|
|
84
|
+
all-loaders = [
|
|
85
|
+
"nexrag[pdf]",
|
|
86
|
+
"nexrag[word]",
|
|
87
|
+
"nexrag[excel]",
|
|
88
|
+
"nexrag[html]",
|
|
89
|
+
]
|
|
90
|
+
all = [
|
|
91
|
+
"nexrag[all-providers]",
|
|
92
|
+
"nexrag[all-loaders]",
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
# Dev dependencies
|
|
96
|
+
dev = [
|
|
97
|
+
"pytest>=8.0",
|
|
98
|
+
"pytest-cov>=5.0",
|
|
99
|
+
"ruff>=0.4", # linter + formatter
|
|
100
|
+
"mypy>=1.10", # type checker
|
|
101
|
+
"pre-commit>=3.7",
|
|
102
|
+
"types-pyyaml",
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
[project.urls]
|
|
106
|
+
Homepage = "https://github.com/kevinrawal/nexrag"
|
|
107
|
+
Repository = "https://github.com/kevinrawal/nexrag"
|
|
108
|
+
Issues = "https://github.com/kevinrawal/nexrag/issues"
|
|
109
|
+
Changelog = "https://github.com/kevinrawal/nexrag/blob/main/CHANGELOG.md"
|
|
110
|
+
|
|
111
|
+
# Ruff (linter + formatter)
|
|
112
|
+
|
|
113
|
+
[tool.ruff]
|
|
114
|
+
target-version = "py312"
|
|
115
|
+
line-length = 100
|
|
116
|
+
src = ["src"]
|
|
117
|
+
|
|
118
|
+
[tool.ruff.lint]
|
|
119
|
+
select = [
|
|
120
|
+
"E", # pycodestyle errors
|
|
121
|
+
"W", # pycodestyle warnings
|
|
122
|
+
"F", # pyflakes
|
|
123
|
+
"I", # isort
|
|
124
|
+
"B", # flake8-bugbear
|
|
125
|
+
"UP", # pyupgrade
|
|
126
|
+
]
|
|
127
|
+
ignore = ["E501"] # line length handled by formatter
|
|
128
|
+
|
|
129
|
+
[tool.ruff.lint.isort]
|
|
130
|
+
known-first-party = ["nexrag"]
|
|
131
|
+
|
|
132
|
+
# Mypy
|
|
133
|
+
|
|
134
|
+
[tool.mypy]
|
|
135
|
+
python_version = "3.12"
|
|
136
|
+
strict = true
|
|
137
|
+
warn_return_any = true
|
|
138
|
+
warn_unused_configs = true
|
|
139
|
+
mypy_path = "src"
|
|
140
|
+
|
|
141
|
+
# Pytest
|
|
142
|
+
|
|
143
|
+
[tool.pytest.ini_options]
|
|
144
|
+
testpaths = ["tests"]
|
|
145
|
+
addopts = "-v --tb=short"
|
|
146
|
+
markers = [
|
|
147
|
+
"integration: marks tests that require live services (deselect with '-m not integration')",
|
|
148
|
+
"unit: marks fast, isolated unit tests",
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
[tool.coverage.run]
|
|
152
|
+
source = ["src/nexrag"]
|
|
153
|
+
omit = ["*/tests/*"]
|
|
154
|
+
|
|
155
|
+
[tool.coverage.report]
|
|
156
|
+
exclude_lines = [
|
|
157
|
+
"pragma: no cover",
|
|
158
|
+
"def __repr__",
|
|
159
|
+
"raise NotImplementedError",
|
|
160
|
+
"if TYPE_CHECKING:",
|
|
161
|
+
]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NexRAG — Framework-agnostic RAG pipeline SDK.
|
|
3
|
+
|
|
4
|
+
Public API surface. Import from here, not from internal modules.
|
|
5
|
+
|
|
6
|
+
from nexrag import NexRAG, PipelineResult
|
|
7
|
+
from nexrag.exceptions import NexRAGError
|
|
8
|
+
|
|
9
|
+
Everything under nexrag.core, nexrag.adapters, nexrag.loaders, etc.
|
|
10
|
+
is internal. Internal APIs may change between minor versions.
|
|
11
|
+
The public surface below is stable across minor versions.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from nexrag.exceptions import NexRAGError
|
|
15
|
+
|
|
16
|
+
__version__ = "0.1.0"
|
|
17
|
+
__all__ = [
|
|
18
|
+
"NexRAG",
|
|
19
|
+
"PipelineResult",
|
|
20
|
+
"NexRAGError",
|
|
21
|
+
"__version__",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class NexRAG:
|
|
26
|
+
"""Placeholder — real implementation comes in Phase 3."""
|
|
27
|
+
|
|
28
|
+
def __init__(self) -> None:
|
|
29
|
+
raise NotImplementedError(
|
|
30
|
+
"NexRAG entrypoint is not yet implemented. "
|
|
31
|
+
"Core interfaces and config system are next."
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PipelineResult:
|
|
36
|
+
"""Placeholder — real implementation comes in Phase 0 data models."""
|
|
37
|
+
def __init__(self) -> None:
|
|
38
|
+
raise NotImplementedError("PipelineResult is not yet implemented.")
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NexRAG exception hierarchy.
|
|
3
|
+
|
|
4
|
+
Every exception carries: stage name, component name, pipeline_id, and the
|
|
5
|
+
original exception. No generic "something went wrong" — every failure is
|
|
6
|
+
traceable to an exact stage and component.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NexRAGError(Exception):
|
|
13
|
+
"""Base exception for all NexRAG errors."""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
message: str,
|
|
18
|
+
*,
|
|
19
|
+
stage: str | None = None,
|
|
20
|
+
component: str | None = None,
|
|
21
|
+
pipeline_id: str | None = None,
|
|
22
|
+
cause: BaseException | None = None,
|
|
23
|
+
) -> None:
|
|
24
|
+
self.stage = stage
|
|
25
|
+
self.component = component
|
|
26
|
+
self.pipeline_id = pipeline_id
|
|
27
|
+
self.cause = cause
|
|
28
|
+
super().__init__(self._format(message))
|
|
29
|
+
|
|
30
|
+
def _format(self, message: str) -> str:
|
|
31
|
+
parts = [message]
|
|
32
|
+
if self.stage:
|
|
33
|
+
parts.append(f"stage={self.stage}")
|
|
34
|
+
if self.component:
|
|
35
|
+
parts.append(f"component={self.component}")
|
|
36
|
+
if self.pipeline_id:
|
|
37
|
+
parts.append(f"pipeline_id={self.pipeline_id}")
|
|
38
|
+
if self.cause:
|
|
39
|
+
parts.append(f"cause={type(self.cause).__name__}: {self.cause}")
|
|
40
|
+
return " | ".join(parts)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Configuration
|
|
44
|
+
|
|
45
|
+
class ConfigError(NexRAGError):
|
|
46
|
+
"""Bad or missing nexrag.yaml values."""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ClassResolutionError(ConfigError):
|
|
50
|
+
"""Dotted class path not found, not importable, or wrong interface."""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Ingestion stages
|
|
54
|
+
|
|
55
|
+
class LoaderError(NexRAGError):
|
|
56
|
+
"""Failed to read or parse a source file."""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class SanitizerError(NexRAGError):
|
|
60
|
+
"""User-provided sanitizer raised an exception."""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ChunkError(NexRAGError):
|
|
64
|
+
"""Chunking failed — empty output, invalid config, or runtime error."""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class EmbedderError(NexRAGError):
|
|
68
|
+
"""Embedding API failed or returned an unexpected shape."""
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class EmbedderMismatchError(EmbedderError):
|
|
72
|
+
"""
|
|
73
|
+
Embedding model changed since the collection was created.
|
|
74
|
+
|
|
75
|
+
Vectors produced by different models are incompatible.
|
|
76
|
+
Resolution: run with --rebuild to wipe and re-ingest the collection.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(
|
|
80
|
+
self,
|
|
81
|
+
stored_model: str,
|
|
82
|
+
configured_model: str,
|
|
83
|
+
collection: str,
|
|
84
|
+
**kwargs: object,
|
|
85
|
+
) -> None:
|
|
86
|
+
self.stored_model = stored_model
|
|
87
|
+
self.configured_model = configured_model
|
|
88
|
+
self.collection = collection
|
|
89
|
+
message = (
|
|
90
|
+
f"Embedding model mismatch in collection '{collection}'. "
|
|
91
|
+
f"Stored: '{stored_model}', configured: '{configured_model}'. "
|
|
92
|
+
f"Vectors are incompatible. Run: nexrag rebuild --config nexrag.yaml"
|
|
93
|
+
)
|
|
94
|
+
super().__init__(message, **kwargs) # type: ignore[arg-type]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class VectorDBError(NexRAGError):
|
|
98
|
+
"""Vector database operation failed."""
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class VectorDBConnectionError(VectorDBError):
|
|
102
|
+
"""Could not connect to the vector database."""
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class VectorDBUpsertError(VectorDBError):
|
|
106
|
+
"""Failed to write chunks to the vector database."""
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# Query stages
|
|
110
|
+
|
|
111
|
+
class RetrieverError(NexRAGError):
|
|
112
|
+
"""Retrieval failed or returned no results."""
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class PromptError(NexRAGError):
|
|
116
|
+
"""Prompt template rendering failed."""
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class LLMError(NexRAGError):
|
|
120
|
+
"""LLM API call failed."""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class LLMTimeoutError(LLMError):
|
|
124
|
+
"""LLM call exceeded the configured timeout."""
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class LLMRateLimitError(LLMError):
|
|
128
|
+
"""LLM provider rate limit hit."""
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# Pipeline orchestration
|
|
132
|
+
|
|
133
|
+
class PipelineError(NexRAGError):
|
|
134
|
+
"""
|
|
135
|
+
Orchestration-level error.
|
|
136
|
+
|
|
137
|
+
Wraps a stage-level exception with pipeline context.
|
|
138
|
+
Inspect .cause for the original stage error.
|
|
139
|
+
"""
|
|
140
|
+
|
|
File without changes
|