ragit 0.8.2__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragit/utils/__init__.py CHANGED
@@ -12,8 +12,6 @@ from datetime import datetime
12
12
  from math import floor
13
13
  from typing import Any
14
14
 
15
- import pandas as pd
16
-
17
15
 
18
16
  def get_hashable_repr(dct: dict[str, object]) -> tuple[tuple[str, object, float, int | None], ...]:
19
17
  """
@@ -62,26 +60,6 @@ def remove_duplicates(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
62
60
  return deduplicated_items
63
61
 
64
62
 
65
- def handle_missing_values_in_combinations(df: pd.DataFrame) -> pd.DataFrame:
66
- """
67
- Handle missing values in experiment data combinations.
68
-
69
- Parameters
70
- ----------
71
- df : pd.DataFrame
72
- Experiment data with combinations being explored.
73
-
74
- Returns
75
- -------
76
- pd.DataFrame
77
- Data with NaN values properly replaced.
78
- """
79
- if "chunk_overlap" in df.columns:
80
- df["chunk_overlap"] = df["chunk_overlap"].map(lambda el: 0 if pd.isna(el) else el)
81
-
82
- return df
83
-
84
-
85
63
  def datetime_str_to_epoch_time(timestamp: str | int) -> str | int:
86
64
  """
87
65
  Convert datetime string to epoch time.
ragit/version.py CHANGED
@@ -2,4 +2,4 @@
2
2
  # Copyright RODMENA LIMITED 2025
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  #
5
- __version__ = "0.8.2"
5
+ __version__ = "0.11.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragit
3
- Version: 0.8.2
3
+ Version: 0.11.0
4
4
  Summary: Automatic RAG Pattern Optimization Engine
5
5
  Author: RODMENA LIMITED
6
6
  Maintainer-email: RODMENA LIMITED <info@rodmena.co.uk>
@@ -16,18 +16,16 @@ Classifier: Programming Language :: Python :: 3.13
16
16
  Classifier: Programming Language :: Python :: 3.14
17
17
  Classifier: Operating System :: MacOS :: MacOS X
18
18
  Classifier: Operating System :: POSIX :: Linux
19
- Requires-Python: <3.14,>=3.12
19
+ Requires-Python: >=3.12
20
20
  Description-Content-Type: text/markdown
21
21
  License-File: LICENSE
22
22
  Requires-Dist: requests>=2.31.0
23
23
  Requires-Dist: numpy>=1.26.0
24
- Requires-Dist: pandas>=2.2.0
25
24
  Requires-Dist: pydantic>=2.0.0
26
25
  Requires-Dist: python-dotenv>=1.0.0
27
- Requires-Dist: scikit-learn>=1.5.0
28
26
  Requires-Dist: tqdm>=4.66.0
29
- Requires-Dist: trio>=0.24.0
30
27
  Requires-Dist: httpx>=0.27.0
28
+ Requires-Dist: resilient-circuit>=0.4.7
31
29
  Provides-Extra: dev
32
30
  Requires-Dist: ragit[test]; extra == "dev"
33
31
  Requires-Dist: pytest; extra == "dev"
@@ -39,8 +37,6 @@ Provides-Extra: test
39
37
  Requires-Dist: pytest; extra == "test"
40
38
  Requires-Dist: pytest-cov; extra == "test"
41
39
  Requires-Dist: pytest-mock; extra == "test"
42
- Provides-Extra: transformers
43
- Requires-Dist: sentence-transformers>=2.2.0; extra == "transformers"
44
40
  Provides-Extra: docs
45
41
  Requires-Dist: sphinx>=7.0; extra == "docs"
46
42
  Requires-Dist: sphinx-rtd-theme>=2.0; extra == "docs"
@@ -55,14 +51,11 @@ RAG toolkit for Python. Document loading, chunking, vector search, LLM integrati
55
51
 
56
52
  ```bash
57
53
  pip install ragit
58
-
59
- # For offline embedding
60
- pip install ragit[transformers]
61
54
  ```
62
55
 
63
56
  ## Quick Start
64
57
 
65
- You must provide an embedding source: custom function, SentenceTransformers, or any provider.
58
+ You must provide an embedding source: custom function, Ollama, or any provider.
66
59
 
67
60
  ### Custom Embedding Function
68
61
 
@@ -90,26 +83,17 @@ assistant = RAGAssistant("docs/", embed_fn=my_embed, generate_fn=my_generate)
90
83
  answer = assistant.ask("How does authentication work?")
91
84
  ```
92
85
 
93
- ### Offline Embedding (SentenceTransformers)
94
-
95
- Models are downloaded automatically on first use (~90MB for default model).
86
+ ### With Ollama (nomic-embed-text)
96
87
 
97
88
  ```python
98
89
  from ragit import RAGAssistant
99
- from ragit.providers import SentenceTransformersProvider
90
+ from ragit.providers import OllamaProvider
100
91
 
101
- # Uses all-MiniLM-L6-v2 by default
102
- assistant = RAGAssistant("docs/", provider=SentenceTransformersProvider())
103
-
104
- # Or specify a model
105
- assistant = RAGAssistant(
106
- "docs/",
107
- provider=SentenceTransformersProvider(model_name="all-mpnet-base-v2")
108
- )
92
+ # Uses nomic-embed-text for embeddings (768d)
93
+ assistant = RAGAssistant("docs/", provider=OllamaProvider())
94
+ results = assistant.retrieve("search query")
109
95
  ```
110
96
 
111
- Available models: `all-MiniLM-L6-v2` (384d), `all-mpnet-base-v2` (768d), `paraphrase-MiniLM-L6-v2` (384d)
112
-
113
97
  ## Core API
114
98
 
115
99
  ```python
@@ -128,6 +112,45 @@ answer = assistant.ask(question, top_k=3) # Requires generate_fn/LLM
128
112
  code = assistant.generate_code(request) # Requires generate_fn/LLM
129
113
  ```
130
114
 
115
+ ## Index Persistence
116
+
117
+ Save and load indexes to avoid re-computing embeddings:
118
+
119
+ ```python
120
+ # Save index to disk
121
+ assistant.save_index("./my_index")
122
+
123
+ # Load index later (much faster than re-indexing)
124
+ loaded = RAGAssistant.load_index("./my_index", provider=OllamaProvider())
125
+ results = loaded.retrieve("query")
126
+ ```
127
+
128
+ ## Thread Safety
129
+
130
+ RAGAssistant is thread-safe. Multiple threads can safely read while another writes:
131
+
132
+ ```python
133
+ import threading
134
+
135
+ assistant = RAGAssistant("docs/", provider=OllamaProvider())
136
+
137
+ # Safe: concurrent reads and writes
138
+ threading.Thread(target=lambda: assistant.retrieve("query")).start()
139
+ threading.Thread(target=lambda: assistant.add_documents([new_doc])).start()
140
+ ```
141
+
142
+ ## Resource Management
143
+
144
+ Use context managers for automatic cleanup:
145
+
146
+ ```python
147
+ from ragit.providers import OllamaProvider
148
+
149
+ with OllamaProvider() as provider:
150
+ response = provider.generate("Hello", model="llama3")
151
+ # Session automatically closed
152
+ ```
153
+
131
154
  ## Document Loading
132
155
 
133
156
  ```python
@@ -0,0 +1,22 @@
1
+ ragit/__init__.py,sha256=54z3-xCkEa4_P4eonrweSu3Lbig1BWLIGOGT3QUJ4N8,3263
2
+ ragit/assistant.py,sha256=pjB58KyHGD7PwpwLE-lDyXxMhaehDe3IFiO9j7yewxk,33252
3
+ ragit/config.py,sha256=M3YCyogalJ-_cNbY3vAnKIknNsBmqeUFH6lhknuPKV4,6399
4
+ ragit/exceptions.py,sha256=2nBdAWbeLxTkykmwJBTn6BFBNib2dgPfr_Z58p1IwlY,7215
5
+ ragit/loaders.py,sha256=r9hDPTpnVHs9-nMeL2IhEfjIda-TCwYmG3RvnpDcs70,11042
6
+ ragit/logging.py,sha256=YnvhOfnOE3nTd-fR9LKPUHrWdh8fcSHIBEBS5iWDMs8,5739
7
+ ragit/monitor.py,sha256=ajYTdQKM4QlYhlzjiKbSiks4kQj94v0pOhW4q16vJWY,10272
8
+ ragit/version.py,sha256=e-rBQeeVkLzfQCMzS0MEjneUF2NDFJmoWYFtrbdq75c,98
9
+ ragit/core/__init__.py,sha256=j53PFfoSMXwSbK1rRHpMbo8mX2i4R1LJ5kvTxBd7-0w,100
10
+ ragit/core/experiment/__init__.py,sha256=4vAPOOYlY5Dcr2gOolyhBSPGIUxZKwEkgQffxS9BodA,452
11
+ ragit/core/experiment/experiment.py,sha256=Ydf3jz5AXbttc2xcvIMecfc3lh4MKgCtCtyNCsFsn9c,19573
12
+ ragit/core/experiment/results.py,sha256=KHpN3YSLJ83_JUfIMccRPS-q7LEt0S9p8ehDRawk_4k,3487
13
+ ragit/providers/__init__.py,sha256=DSdv2-N9kJwrF6PymKYiktKbjc7g22J_7MD1Rm2ep4g,919
14
+ ragit/providers/base.py,sha256=MJ8mVeXuGWhkX2XGTbkWIY3cVoTOPr4h5XBXw8rAX2Q,3434
15
+ ragit/providers/function_adapter.py,sha256=A-TQhBgBWbuO_w1sy795Dxep1FOCBpAlWpXCKVQD8rc,7778
16
+ ragit/providers/ollama.py,sha256=oV6_FojbMrxYyh-g5x77EM1vhzFT4aF98aj2TybWrlw,27600
17
+ ragit/utils/__init__.py,sha256=6oQm2KwXFWIMtAE-0TgcDB6WwKyMy736UPnhG3bFFK4,2531
18
+ ragit-0.11.0.dist-info/licenses/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
19
+ ragit-0.11.0.dist-info/METADATA,sha256=msgmpc2zt4zWkLbKN0XSiIxvQ5Nt4f-nU5HnVLtoc4c,5300
20
+ ragit-0.11.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
21
+ ragit-0.11.0.dist-info/top_level.txt,sha256=pkPbG7yrw61wt9_y_xcLE2vq2a55fzockASD0yq0g4s,6
22
+ ragit-0.11.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,225 +0,0 @@
1
- #
2
- # Copyright RODMENA LIMITED 2025
3
- # SPDX-License-Identifier: Apache-2.0
4
- #
5
- """
6
- SentenceTransformers provider for offline embedding.
7
-
8
- This module provides embedding capabilities using the sentence-transformers
9
- library, enabling fully offline RAG pipelines without API dependencies.
10
-
11
- Requires: pip install ragit[transformers]
12
- """
13
-
14
- from typing import TYPE_CHECKING
15
-
16
- from ragit.providers.base import (
17
- BaseEmbeddingProvider,
18
- EmbeddingResponse,
19
- )
20
-
21
- if TYPE_CHECKING:
22
- from sentence_transformers import SentenceTransformer
23
-
24
- # Lazy import flag
25
- _sentence_transformers_available: bool | None = None
26
- _model_cache: dict[str, "SentenceTransformer"] = {}
27
-
28
-
29
- def _check_sentence_transformers() -> bool:
30
- """Check if sentence-transformers is available."""
31
- global _sentence_transformers_available
32
- if _sentence_transformers_available is None:
33
- try:
34
- from sentence_transformers import SentenceTransformer # noqa: F401
35
-
36
- _sentence_transformers_available = True
37
- except ImportError:
38
- _sentence_transformers_available = False
39
- return _sentence_transformers_available
40
-
41
-
42
- def _get_model(model_name: str, device: str | None = None) -> "SentenceTransformer":
43
- """Get or create a cached SentenceTransformer model."""
44
- cache_key = f"{model_name}:{device or 'auto'}"
45
- if cache_key not in _model_cache:
46
- from sentence_transformers import SentenceTransformer
47
-
48
- _model_cache[cache_key] = SentenceTransformer(model_name, device=device)
49
- return _model_cache[cache_key]
50
-
51
-
52
- class SentenceTransformersProvider(BaseEmbeddingProvider):
53
- """
54
- Embedding provider using sentence-transformers for offline operation.
55
-
56
- This provider uses the sentence-transformers library to generate embeddings
57
- locally without requiring any API calls. It's ideal for:
58
- - Offline/air-gapped environments
59
- - Development and testing
60
- - Cost-sensitive applications
61
- - Privacy-sensitive use cases
62
-
63
- Parameters
64
- ----------
65
- model_name : str
66
- HuggingFace model name. Default: "all-MiniLM-L6-v2" (fast, 384 dims).
67
- Other popular options:
68
- - "all-mpnet-base-v2" (768 dims, higher quality)
69
- - "paraphrase-MiniLM-L6-v2" (384 dims)
70
- - "multi-qa-MiniLM-L6-cos-v1" (384 dims, optimized for QA)
71
- device : str, optional
72
- Device to run on ("cpu", "cuda", "mps"). Auto-detected if None.
73
-
74
- Examples
75
- --------
76
- >>> # Basic usage
77
- >>> from ragit.providers import SentenceTransformersProvider
78
- >>> provider = SentenceTransformersProvider()
79
- >>>
80
- >>> # With RAGAssistant (retrieval-only)
81
- >>> assistant = RAGAssistant(docs, provider=provider)
82
- >>> results = assistant.retrieve("query")
83
- >>>
84
- >>> # Custom model
85
- >>> provider = SentenceTransformersProvider(model_name="all-mpnet-base-v2")
86
-
87
- Raises
88
- ------
89
- ImportError
90
- If sentence-transformers is not installed.
91
-
92
- Note
93
- ----
94
- Install with: pip install ragit[transformers]
95
- """
96
-
97
- # Known model dimensions for common models
98
- MODEL_DIMENSIONS: dict[str, int] = {
99
- "all-MiniLM-L6-v2": 384,
100
- "all-mpnet-base-v2": 768,
101
- "paraphrase-MiniLM-L6-v2": 384,
102
- "multi-qa-MiniLM-L6-cos-v1": 384,
103
- "all-distilroberta-v1": 768,
104
- "paraphrase-multilingual-MiniLM-L12-v2": 384,
105
- }
106
-
107
- def __init__(
108
- self,
109
- model_name: str = "all-MiniLM-L6-v2",
110
- device: str | None = None,
111
- ) -> None:
112
- if not _check_sentence_transformers():
113
- raise ImportError(
114
- "sentence-transformers is required for SentenceTransformersProvider. "
115
- "Install with: pip install ragit[transformers]"
116
- )
117
-
118
- self._model_name = model_name
119
- self._device = device
120
- self._model: SentenceTransformer | None = None # Lazy loaded
121
- self._dimensions: int | None = self.MODEL_DIMENSIONS.get(model_name)
122
-
123
- def _ensure_model(self) -> "SentenceTransformer":
124
- """Ensure model is loaded (lazy loading)."""
125
- if self._model is None:
126
- model = _get_model(self._model_name, self._device)
127
- self._model = model
128
- # Update dimensions from actual model
129
- self._dimensions = model.get_sentence_embedding_dimension()
130
- return self._model
131
-
132
- @property
133
- def provider_name(self) -> str:
134
- return "sentence_transformers"
135
-
136
- @property
137
- def dimensions(self) -> int:
138
- if self._dimensions is None:
139
- # Load model to get dimensions
140
- self._ensure_model()
141
- return self._dimensions or 384 # Fallback
142
-
143
- @property
144
- def model_name(self) -> str:
145
- """Return the model name being used."""
146
- return self._model_name
147
-
148
- def is_available(self) -> bool:
149
- """Check if sentence-transformers is installed and model can be loaded."""
150
- if not _check_sentence_transformers():
151
- return False
152
- try:
153
- self._ensure_model()
154
- return True
155
- except Exception:
156
- return False
157
-
158
- def embed(self, text: str, model: str = "") -> EmbeddingResponse:
159
- """
160
- Generate embedding for text.
161
-
162
- Parameters
163
- ----------
164
- text : str
165
- Text to embed.
166
- model : str
167
- Model identifier (ignored, uses model from constructor).
168
-
169
- Returns
170
- -------
171
- EmbeddingResponse
172
- The embedding response.
173
- """
174
- model_instance = self._ensure_model()
175
- embedding = model_instance.encode(text, convert_to_numpy=True)
176
-
177
- # Convert to tuple
178
- embedding_tuple = tuple(float(x) for x in embedding)
179
-
180
- return EmbeddingResponse(
181
- embedding=embedding_tuple,
182
- model=self._model_name,
183
- provider=self.provider_name,
184
- dimensions=len(embedding_tuple),
185
- )
186
-
187
- def embed_batch(self, texts: list[str], model: str = "") -> list[EmbeddingResponse]:
188
- """
189
- Generate embeddings for multiple texts efficiently.
190
-
191
- Uses batch encoding for better performance.
192
-
193
- Parameters
194
- ----------
195
- texts : list[str]
196
- Texts to embed.
197
- model : str
198
- Model identifier (ignored).
199
-
200
- Returns
201
- -------
202
- list[EmbeddingResponse]
203
- List of embedding responses.
204
- """
205
- if not texts:
206
- return []
207
-
208
- model_instance = self._ensure_model()
209
-
210
- # Batch encode for efficiency
211
- embeddings = model_instance.encode(texts, convert_to_numpy=True, show_progress_bar=False)
212
-
213
- results = []
214
- for embedding in embeddings:
215
- embedding_tuple = tuple(float(x) for x in embedding)
216
- results.append(
217
- EmbeddingResponse(
218
- embedding=embedding_tuple,
219
- model=self._model_name,
220
- provider=self.provider_name,
221
- dimensions=len(embedding_tuple),
222
- )
223
- )
224
-
225
- return results
@@ -1,20 +0,0 @@
1
- ragit/__init__.py,sha256=JUkL7ivgr4o4nZak-96P1C-pzKdNuN3Tl0X0WvpeXBU,3142
2
- ragit/assistant.py,sha256=LNof1zJAQWLIfhd7aPmKCpPQDCShpt9ezeM2nQ8ouyQ,18777
3
- ragit/config.py,sha256=7XnueNO4h22ibeWd1akHnfVoGSD8xE5vuOCMYeQOOU4,1898
4
- ragit/loaders.py,sha256=1JXgDLorvmtaDaRpbnKEqQjbQ4O5yfZxlb4QRUdGr58,6415
5
- ragit/version.py,sha256=WCqbf2oV6eXhq3DvqECcVFop-dseJIExoMxZ4fCtkvs,97
6
- ragit/core/__init__.py,sha256=j53PFfoSMXwSbK1rRHpMbo8mX2i4R1LJ5kvTxBd7-0w,100
7
- ragit/core/experiment/__init__.py,sha256=4vAPOOYlY5Dcr2gOolyhBSPGIUxZKwEkgQffxS9BodA,452
8
- ragit/core/experiment/experiment.py,sha256=aANDJ-XlMB0ijT8SBsPkb2U-lM3cChOuRO3oP9u3XxA,19331
9
- ragit/core/experiment/results.py,sha256=KHpN3YSLJ83_JUfIMccRPS-q7LEt0S9p8ehDRawk_4k,3487
10
- ragit/providers/__init__.py,sha256=tKWjUV31OZprD8k9aUUidtDMg7C_dWBXN7igtxeB8Ec,1339
11
- ragit/providers/base.py,sha256=MJ8mVeXuGWhkX2XGTbkWIY3cVoTOPr4h5XBXw8rAX2Q,3434
12
- ragit/providers/function_adapter.py,sha256=A-TQhBgBWbuO_w1sy795Dxep1FOCBpAlWpXCKVQD8rc,7778
13
- ragit/providers/ollama.py,sha256=YJH5a9nQHnP0NrIK7G9PqjV5A53f9JxmEJDAJ6d297M,15410
14
- ragit/providers/sentence_transformers.py,sha256=tTkd4HpE1MyfFJAwur-a7w-GlBxe93HlyM_dRffDrdY,6996
15
- ragit/utils/__init__.py,sha256=-UsE5oJSnmEnBDswl-ph0A09Iu8yKNbPhd1-_7Lcb8Y,3051
16
- ragit-0.8.2.dist-info/licenses/LICENSE,sha256=tAkwu8-AdEyGxGoSvJ2gVmQdcicWw3j1ZZueVV74M-E,11357
17
- ragit-0.8.2.dist-info/METADATA,sha256=wlBpVj_aHxR7ZWy5yzpo2Wt-IoLcVlFGo4oBXGzMajY,4888
18
- ragit-0.8.2.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
19
- ragit-0.8.2.dist-info/top_level.txt,sha256=pkPbG7yrw61wt9_y_xcLE2vq2a55fzockASD0yq0g4s,6
20
- ragit-0.8.2.dist-info/RECORD,,