nexrag 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nexrag/__init__.py ADDED
@@ -0,0 +1,38 @@
1
+ """
2
+ NexRAG — Framework-agnostic RAG pipeline SDK.
3
+
4
+ Public API surface. Import from here, not from internal modules.
5
+
6
+ from nexrag import NexRAG, PipelineResult
7
+ from nexrag.exceptions import NexRAGError
8
+
9
+ Everything under nexrag.core, nexrag.adapters, nexrag.loaders, etc.
10
+ is internal. Internal APIs may change between minor versions.
11
+ The public surface below is stable across minor versions.
12
+ """
13
+
14
+ from nexrag.exceptions import NexRAGError
15
+
16
+ __version__ = "0.1.0"
17
+ __all__ = [
18
+ "NexRAG",
19
+ "PipelineResult",
20
+ "NexRAGError",
21
+ "__version__",
22
+ ]
23
+
24
+
25
+ class NexRAG:
26
+ """Placeholder — real implementation comes in Phase 3."""
27
+
28
+ def __init__(self) -> None:
29
+ raise NotImplementedError(
30
+ "NexRAG entrypoint is not yet implemented. "
31
+ "Core interfaces and config system are next."
32
+ )
33
+
34
+
35
+ class PipelineResult:
36
+ """Placeholder — real implementation comes in Phase 0 data models."""
37
+ def __init__(self) -> None:
38
+ raise NotImplementedError("PipelineResult is not yet implemented.")
nexrag/exceptions.py ADDED
@@ -0,0 +1,140 @@
1
+ """
2
+ NexRAG exception hierarchy.
3
+
4
+ Every exception carries: stage name, component name, pipeline_id, and the
5
+ original exception. No generic "something went wrong" — every failure is
6
+ traceable to an exact stage and component.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+
12
+ class NexRAGError(Exception):
13
+ """Base exception for all NexRAG errors."""
14
+
15
+ def __init__(
16
+ self,
17
+ message: str,
18
+ *,
19
+ stage: str | None = None,
20
+ component: str | None = None,
21
+ pipeline_id: str | None = None,
22
+ cause: BaseException | None = None,
23
+ ) -> None:
24
+ self.stage = stage
25
+ self.component = component
26
+ self.pipeline_id = pipeline_id
27
+ self.cause = cause
28
+ super().__init__(self._format(message))
29
+
30
+ def _format(self, message: str) -> str:
31
+ parts = [message]
32
+ if self.stage:
33
+ parts.append(f"stage={self.stage}")
34
+ if self.component:
35
+ parts.append(f"component={self.component}")
36
+ if self.pipeline_id:
37
+ parts.append(f"pipeline_id={self.pipeline_id}")
38
+ if self.cause:
39
+ parts.append(f"cause={type(self.cause).__name__}: {self.cause}")
40
+ return " | ".join(parts)
41
+
42
+
43
+ # Configuration
44
+
45
+ class ConfigError(NexRAGError):
46
+ """Bad or missing nexrag.yaml values."""
47
+
48
+
49
+ class ClassResolutionError(ConfigError):
50
+ """Dotted class path not found, not importable, or wrong interface."""
51
+
52
+
53
+ # Ingestion stages
54
+
55
+ class LoaderError(NexRAGError):
56
+ """Failed to read or parse a source file."""
57
+
58
+
59
+ class SanitizerError(NexRAGError):
60
+ """User-provided sanitizer raised an exception."""
61
+
62
+
63
+ class ChunkError(NexRAGError):
64
+ """Chunking failed — empty output, invalid config, or runtime error."""
65
+
66
+
67
+ class EmbedderError(NexRAGError):
68
+ """Embedding API failed or returned an unexpected shape."""
69
+
70
+
71
+ class EmbedderMismatchError(EmbedderError):
72
+ """
73
+ Embedding model changed since the collection was created.
74
+
75
+ Vectors produced by different models are incompatible.
76
+ Resolution: run with --rebuild to wipe and re-ingest the collection.
77
+ """
78
+
79
+ def __init__(
80
+ self,
81
+ stored_model: str,
82
+ configured_model: str,
83
+ collection: str,
84
+ **kwargs: object,
85
+ ) -> None:
86
+ self.stored_model = stored_model
87
+ self.configured_model = configured_model
88
+ self.collection = collection
89
+ message = (
90
+ f"Embedding model mismatch in collection '{collection}'. "
91
+ f"Stored: '{stored_model}', configured: '{configured_model}'. "
92
+ f"Vectors are incompatible. Run: nexrag rebuild --config nexrag.yaml"
93
+ )
94
+ super().__init__(message, **kwargs) # type: ignore[arg-type]
95
+
96
+
97
+ class VectorDBError(NexRAGError):
98
+ """Vector database operation failed."""
99
+
100
+
101
+ class VectorDBConnectionError(VectorDBError):
102
+ """Could not connect to the vector database."""
103
+
104
+
105
+ class VectorDBUpsertError(VectorDBError):
106
+ """Failed to write chunks to the vector database."""
107
+
108
+
109
+ # Query stages
110
+
111
+ class RetrieverError(NexRAGError):
112
+ """Retrieval failed or returned no results."""
113
+
114
+
115
+ class PromptError(NexRAGError):
116
+ """Prompt template rendering failed."""
117
+
118
+
119
+ class LLMError(NexRAGError):
120
+ """LLM API call failed."""
121
+
122
+
123
+ class LLMTimeoutError(LLMError):
124
+ """LLM call exceeded the configured timeout."""
125
+
126
+
127
+ class LLMRateLimitError(LLMError):
128
+ """LLM provider rate limit hit."""
129
+
130
+
131
+ # Pipeline orchestration
132
+
133
+ class PipelineError(NexRAGError):
134
+ """
135
+ Orchestration-level error.
136
+
137
+ Wraps a stage-level exception with pipeline context.
138
+ Inspect .cause for the original stage error.
139
+ """
140
+
nexrag/py.typed ADDED
File without changes
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.3
2
+ Name: nexrag
3
+ Version: 0.1.0
4
+ Summary: Framework-agnostic RAG pipeline SDK. Plug in any component, swap any stage, configure everything in YAML
5
+ Keywords: rag,retrieval-augmented-generation,llm,vector-database,embeddings,ai,nlp,pipeline,sdk
6
+ Author: KevinRawal
7
+ Author-email: KevinRawal <kevinrawal30@gmail.com>
8
+ License: TBD
9
+ Classifier: Development Status :: 2 - Pre-Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
15
+ Classifier: Typing :: Typed
16
+ Requires-Dist: pydantic>=2.0
17
+ Requires-Dist: pyyaml>=6.0
18
+ Requires-Dist: nexrag[all-providers] ; extra == 'all'
19
+ Requires-Dist: nexrag[all-loaders] ; extra == 'all'
20
+ Requires-Dist: nexrag[pdf] ; extra == 'all-loaders'
21
+ Requires-Dist: nexrag[word] ; extra == 'all-loaders'
22
+ Requires-Dist: nexrag[excel] ; extra == 'all-loaders'
23
+ Requires-Dist: nexrag[html] ; extra == 'all-loaders'
24
+ Requires-Dist: nexrag[openai] ; extra == 'all-providers'
25
+ Requires-Dist: nexrag[anthropic] ; extra == 'all-providers'
26
+ Requires-Dist: nexrag[ollama] ; extra == 'all-providers'
27
+ Requires-Dist: nexrag[chromadb] ; extra == 'all-providers'
28
+ Requires-Dist: nexrag[huggingface] ; extra == 'all-providers'
29
+ Requires-Dist: anthropic>=0.20 ; extra == 'anthropic'
30
+ Requires-Dist: chromadb>=0.5 ; extra == 'chromadb'
31
+ Requires-Dist: pytest>=8.0 ; extra == 'dev'
32
+ Requires-Dist: pytest-cov>=5.0 ; extra == 'dev'
33
+ Requires-Dist: ruff>=0.4 ; extra == 'dev'
34
+ Requires-Dist: mypy>=1.10 ; extra == 'dev'
35
+ Requires-Dist: pre-commit>=3.7 ; extra == 'dev'
36
+ Requires-Dist: types-pyyaml ; extra == 'dev'
37
+ Requires-Dist: openpyxl>=3.1 ; extra == 'excel'
38
+ Requires-Dist: beautifulsoup4>=4.12 ; extra == 'html'
39
+ Requires-Dist: lxml>=5.0 ; extra == 'html'
40
+ Requires-Dist: sentence-transformers>=2.0 ; extra == 'huggingface'
41
+ Requires-Dist: ollama>=0.1 ; extra == 'ollama'
42
+ Requires-Dist: openai>=1.0 ; extra == 'openai'
43
+ Requires-Dist: pypdf>=4.0 ; extra == 'pdf'
44
+ Requires-Dist: python-docx>=1.0 ; extra == 'word'
45
+ Requires-Python: >=3.12
46
+ Project-URL: Homepage, https://github.com/kevinrawal/nexrag
47
+ Project-URL: Repository, https://github.com/kevinrawal/nexrag
48
+ Project-URL: Issues, https://github.com/kevinrawal/nexrag/issues
49
+ Project-URL: Changelog, https://github.com/kevinrawal/nexrag/blob/main/CHANGELOG.md
50
+ Provides-Extra: all
51
+ Provides-Extra: all-loaders
52
+ Provides-Extra: all-providers
53
+ Provides-Extra: anthropic
54
+ Provides-Extra: chromadb
55
+ Provides-Extra: dev
56
+ Provides-Extra: excel
57
+ Provides-Extra: html
58
+ Provides-Extra: huggingface
59
+ Provides-Extra: ollama
60
+ Provides-Extra: openai
61
+ Provides-Extra: pdf
62
+ Provides-Extra: word
63
+ Description-Content-Type: text/markdown
64
+
65
+ # NexRAG
66
+
67
+ > Framework-agnostic RAG pipeline SDK. Plug in any component, swap any stage, configure everything in YAML.
68
+
69
+ [![PyPI version](https://img.shields.io/pypi/v/nexrag.svg)](https://pypi.org/project/nexrag/)
70
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
71
+ [![License](https://img.shields.io/badge/license-TBD-lightgrey.svg)]()
72
+
73
+ ---
74
+
75
+ ## What is NexRAG?
76
+
77
+ NexRAG is a production-grade RAG (Retrieval-Augmented Generation) pipeline SDK for Python.
78
+
79
+ **NexRAG owns the pipeline shape. You own the components.**
80
+
81
+ Every stage — loading, chunking, embedding, retrieval, generation — is a clean interface. NexRAG ships default implementations for each. You can swap any of them by implementing the interface and declaring it in YAML. No framework lock-in. No magic. No hidden behavior.
82
+
83
+ ---
84
+
85
+ ## Quickstart
86
+
87
+ > **Note:** NexRAG v1.0 is under active development. This section will be updated on first release.
88
+
89
+ ```python
90
+ from nexrag import NexRAG
91
+
92
+ pipeline = NexRAG.from_config("nexrag.yaml")
93
+
94
+ # Ingest documents
95
+ pipeline.ingest("docs/contracts/")
96
+
97
+ # Query
98
+ result = pipeline.query("What are the termination clauses?")
99
+ print(result.answer)
100
+ print(result.source_chunks)
101
+ ```
102
+
103
+ ```yaml
104
+ # nexrag.yaml
105
+ nexrag:
106
+ version: "1.0"
107
+
108
+ ingestion:
109
+ chunker:
110
+ strategy: recursive
111
+ chunk_size: 512
112
+ embedder:
113
+ provider: openai
114
+ model: text-embedding-3-small
115
+ vector_db:
116
+ provider: chroma
117
+ default_collection: contracts
118
+
119
+ query:
120
+ llm:
121
+ provider: openai
122
+ model: gpt-4o
123
+ ```
124
+
125
+ ---
126
+
127
+ ## Installation
128
+
129
+ ```bash
130
+ # Core only
131
+ pip install nexrag
132
+
133
+ # With OpenAI support
134
+ pip install "nexrag[openai]"
135
+
136
+ # With everything
137
+ pip install "nexrag[all]"
138
+ ```
139
+
140
+ ---
141
+
142
+ ## Design Principles
143
+
144
+ | Principle | What it means |
145
+ |---|---|
146
+ | Interface-first | Every stage is a contract. Implementation is secondary. |
147
+ | Config-driven | YAML configures the pipeline. Code defines the logic. |
148
+ | Zero lock-in | Core has no dependency on LangChain, LlamaIndex, or any AI SDK. |
149
+ | Explicit over implicit | No hidden defaults. Every behavior is declared or documented. |
150
+ | Extensible by design | New components plug in without touching core. |
151
+
152
+ ---
153
+
154
+ ## Architecture
155
+
156
+ NexRAG has two independent pipelines:
157
+
158
+ ```
159
+ INGESTION → Loader → Sanitizer → Chunker → Embedder → VectorDB
160
+ QUERY → Embedder → Retriever → PromptBuilder → LLM → PipelineResult
161
+ ```
162
+
163
+ See [Architecture Documentation](docs/) for full pipeline diagrams.
164
+
165
+ ---
166
+
167
+ ## Supported Providers (V1)
168
+
169
+ | Category | Providers |
170
+ |---|---|
171
+ | Embedders | OpenAI, HuggingFace, Ollama |
172
+ | Vector DBs | ChromaDB (local + remote) |
173
+ | LLMs | OpenAI, Anthropic, Ollama |
174
+ | Loaders | PDF, TXT/MD, Word, Excel, JSON, HTML, Code |
175
+
176
+ ---
177
+
178
+ ## Contributing
179
+
180
+ NexRAG is in early development. Contribution guidelines will be published with v1.0.
181
+
182
+ ---
183
+
184
+ ## Changelog
185
+
186
+ See [CHANGELOG.md](CHANGELOG.md).
@@ -0,0 +1,6 @@
1
+ nexrag/__init__.py,sha256=8kitDa-E01dK4CctmVD1rQiRJD3l72262k2MUsWy4kQ,1039
2
+ nexrag/exceptions.py,sha256=b5jYBETZT8X7PBu4aRO1QoWnPgLuCI8l-jo9AnlkMb4,3600
3
+ nexrag/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ nexrag-0.1.0.dist-info/WHEEL,sha256=f5fWSvWsg5Knq5GWa6t1nJIug0Tqo69GqAWD_9LbBKw,81
5
+ nexrag-0.1.0.dist-info/METADATA,sha256=hVIHw5EIhH1blnCHmoju3SeZ0FH6ELIvb9VB4EmmZYk,5663
6
+ nexrag-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.11.16
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any