ragit 0.8.2__tar.gz → 0.11.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {ragit-0.8.2/ragit.egg-info → ragit-0.11.0}/PKG-INFO +48 -25
  2. {ragit-0.8.2 → ragit-0.11.0}/README.md +45 -18
  3. {ragit-0.8.2 → ragit-0.11.0}/pyproject.toml +2 -6
  4. {ragit-0.8.2 → ragit-0.11.0}/ragit/__init__.py +27 -15
  5. {ragit-0.8.2 → ragit-0.11.0}/ragit/assistant.py +431 -40
  6. ragit-0.11.0/ragit/config.py +203 -0
  7. {ragit-0.8.2 → ragit-0.11.0}/ragit/core/experiment/experiment.py +7 -1
  8. ragit-0.11.0/ragit/exceptions.py +271 -0
  9. ragit-0.11.0/ragit/loaders.py +401 -0
  10. ragit-0.11.0/ragit/logging.py +194 -0
  11. ragit-0.11.0/ragit/monitor.py +307 -0
  12. {ragit-0.8.2 → ragit-0.11.0}/ragit/providers/__init__.py +1 -13
  13. ragit-0.11.0/ragit/providers/ollama.py +704 -0
  14. {ragit-0.8.2 → ragit-0.11.0}/ragit/utils/__init__.py +0 -22
  15. {ragit-0.8.2 → ragit-0.11.0}/ragit/version.py +1 -1
  16. {ragit-0.8.2 → ragit-0.11.0/ragit.egg-info}/PKG-INFO +48 -25
  17. {ragit-0.8.2 → ragit-0.11.0}/ragit.egg-info/SOURCES.txt +3 -1
  18. {ragit-0.8.2 → ragit-0.11.0}/ragit.egg-info/requires.txt +1 -6
  19. ragit-0.8.2/ragit/config.py +0 -60
  20. ragit-0.8.2/ragit/loaders.py +0 -245
  21. ragit-0.8.2/ragit/providers/ollama.py +0 -446
  22. ragit-0.8.2/ragit/providers/sentence_transformers.py +0 -225
  23. {ragit-0.8.2 → ragit-0.11.0}/LICENSE +0 -0
  24. {ragit-0.8.2 → ragit-0.11.0}/ragit/core/__init__.py +0 -0
  25. {ragit-0.8.2 → ragit-0.11.0}/ragit/core/experiment/__init__.py +0 -0
  26. {ragit-0.8.2 → ragit-0.11.0}/ragit/core/experiment/results.py +0 -0
  27. {ragit-0.8.2 → ragit-0.11.0}/ragit/providers/base.py +0 -0
  28. {ragit-0.8.2 → ragit-0.11.0}/ragit/providers/function_adapter.py +0 -0
  29. {ragit-0.8.2 → ragit-0.11.0}/ragit.egg-info/dependency_links.txt +0 -0
  30. {ragit-0.8.2 → ragit-0.11.0}/ragit.egg-info/top_level.txt +0 -0
  31. {ragit-0.8.2 → ragit-0.11.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ragit
3
- Version: 0.8.2
3
+ Version: 0.11.0
4
4
  Summary: Automatic RAG Pattern Optimization Engine
5
5
  Author: RODMENA LIMITED
6
6
  Maintainer-email: RODMENA LIMITED <info@rodmena.co.uk>
@@ -16,18 +16,16 @@ Classifier: Programming Language :: Python :: 3.13
16
16
  Classifier: Programming Language :: Python :: 3.14
17
17
  Classifier: Operating System :: MacOS :: MacOS X
18
18
  Classifier: Operating System :: POSIX :: Linux
19
- Requires-Python: <3.14,>=3.12
19
+ Requires-Python: >=3.12
20
20
  Description-Content-Type: text/markdown
21
21
  License-File: LICENSE
22
22
  Requires-Dist: requests>=2.31.0
23
23
  Requires-Dist: numpy>=1.26.0
24
- Requires-Dist: pandas>=2.2.0
25
24
  Requires-Dist: pydantic>=2.0.0
26
25
  Requires-Dist: python-dotenv>=1.0.0
27
- Requires-Dist: scikit-learn>=1.5.0
28
26
  Requires-Dist: tqdm>=4.66.0
29
- Requires-Dist: trio>=0.24.0
30
27
  Requires-Dist: httpx>=0.27.0
28
+ Requires-Dist: resilient-circuit>=0.4.7
31
29
  Provides-Extra: dev
32
30
  Requires-Dist: ragit[test]; extra == "dev"
33
31
  Requires-Dist: pytest; extra == "dev"
@@ -39,8 +37,6 @@ Provides-Extra: test
39
37
  Requires-Dist: pytest; extra == "test"
40
38
  Requires-Dist: pytest-cov; extra == "test"
41
39
  Requires-Dist: pytest-mock; extra == "test"
42
- Provides-Extra: transformers
43
- Requires-Dist: sentence-transformers>=2.2.0; extra == "transformers"
44
40
  Provides-Extra: docs
45
41
  Requires-Dist: sphinx>=7.0; extra == "docs"
46
42
  Requires-Dist: sphinx-rtd-theme>=2.0; extra == "docs"
@@ -55,14 +51,11 @@ RAG toolkit for Python. Document loading, chunking, vector search, LLM integrati
55
51
 
56
52
  ```bash
57
53
  pip install ragit
58
-
59
- # For offline embedding
60
- pip install ragit[transformers]
61
54
  ```
62
55
 
63
56
  ## Quick Start
64
57
 
65
- You must provide an embedding source: custom function, SentenceTransformers, or any provider.
58
+ You must provide an embedding source: custom function, Ollama, or any provider.
66
59
 
67
60
  ### Custom Embedding Function
68
61
 
@@ -90,26 +83,17 @@ assistant = RAGAssistant("docs/", embed_fn=my_embed, generate_fn=my_generate)
90
83
  answer = assistant.ask("How does authentication work?")
91
84
  ```
92
85
 
93
- ### Offline Embedding (SentenceTransformers)
94
-
95
- Models are downloaded automatically on first use (~90MB for default model).
86
+ ### With Ollama (nomic-embed-text)
96
87
 
97
88
  ```python
98
89
  from ragit import RAGAssistant
99
- from ragit.providers import SentenceTransformersProvider
90
+ from ragit.providers import OllamaProvider
100
91
 
101
- # Uses all-MiniLM-L6-v2 by default
102
- assistant = RAGAssistant("docs/", provider=SentenceTransformersProvider())
103
-
104
- # Or specify a model
105
- assistant = RAGAssistant(
106
- "docs/",
107
- provider=SentenceTransformersProvider(model_name="all-mpnet-base-v2")
108
- )
92
+ # Uses nomic-embed-text for embeddings (768d)
93
+ assistant = RAGAssistant("docs/", provider=OllamaProvider())
94
+ results = assistant.retrieve("search query")
109
95
  ```
110
96
 
111
- Available models: `all-MiniLM-L6-v2` (384d), `all-mpnet-base-v2` (768d), `paraphrase-MiniLM-L6-v2` (384d)
112
-
113
97
  ## Core API
114
98
 
115
99
  ```python
@@ -128,6 +112,45 @@ answer = assistant.ask(question, top_k=3) # Requires generate_fn/LLM
128
112
  code = assistant.generate_code(request) # Requires generate_fn/LLM
129
113
  ```
130
114
 
115
+ ## Index Persistence
116
+
117
+ Save and load indexes to avoid re-computing embeddings:
118
+
119
+ ```python
120
+ # Save index to disk
121
+ assistant.save_index("./my_index")
122
+
123
+ # Load index later (much faster than re-indexing)
124
+ loaded = RAGAssistant.load_index("./my_index", provider=OllamaProvider())
125
+ results = loaded.retrieve("query")
126
+ ```
127
+
128
+ ## Thread Safety
129
+
130
+ RAGAssistant is thread-safe. Multiple threads can safely read while another writes:
131
+
132
+ ```python
133
+ import threading
134
+
135
+ assistant = RAGAssistant("docs/", provider=OllamaProvider())
136
+
137
+ # Safe: concurrent reads and writes
138
+ threading.Thread(target=lambda: assistant.retrieve("query")).start()
139
+ threading.Thread(target=lambda: assistant.add_documents([new_doc])).start()
140
+ ```
141
+
142
+ ## Resource Management
143
+
144
+ Use context managers for automatic cleanup:
145
+
146
+ ```python
147
+ from ragit.providers import OllamaProvider
148
+
149
+ with OllamaProvider() as provider:
150
+ response = provider.generate("Hello", model="llama3")
151
+ # Session automatically closed
152
+ ```
153
+
131
154
  ## Document Loading
132
155
 
133
156
  ```python
@@ -6,14 +6,11 @@ RAG toolkit for Python. Document loading, chunking, vector search, LLM integrati
6
6
 
7
7
  ```bash
8
8
  pip install ragit
9
-
10
- # For offline embedding
11
- pip install ragit[transformers]
12
9
  ```
13
10
 
14
11
  ## Quick Start
15
12
 
16
- You must provide an embedding source: custom function, SentenceTransformers, or any provider.
13
+ You must provide an embedding source: custom function, Ollama, or any provider.
17
14
 
18
15
  ### Custom Embedding Function
19
16
 
@@ -41,26 +38,17 @@ assistant = RAGAssistant("docs/", embed_fn=my_embed, generate_fn=my_generate)
41
38
  answer = assistant.ask("How does authentication work?")
42
39
  ```
43
40
 
44
- ### Offline Embedding (SentenceTransformers)
45
-
46
- Models are downloaded automatically on first use (~90MB for default model).
41
+ ### With Ollama (nomic-embed-text)
47
42
 
48
43
  ```python
49
44
  from ragit import RAGAssistant
50
- from ragit.providers import SentenceTransformersProvider
45
+ from ragit.providers import OllamaProvider
51
46
 
52
- # Uses all-MiniLM-L6-v2 by default
53
- assistant = RAGAssistant("docs/", provider=SentenceTransformersProvider())
54
-
55
- # Or specify a model
56
- assistant = RAGAssistant(
57
- "docs/",
58
- provider=SentenceTransformersProvider(model_name="all-mpnet-base-v2")
59
- )
47
+ # Uses nomic-embed-text for embeddings (768d)
48
+ assistant = RAGAssistant("docs/", provider=OllamaProvider())
49
+ results = assistant.retrieve("search query")
60
50
  ```
61
51
 
62
- Available models: `all-MiniLM-L6-v2` (384d), `all-mpnet-base-v2` (768d), `paraphrase-MiniLM-L6-v2` (384d)
63
-
64
52
  ## Core API
65
53
 
66
54
  ```python
@@ -79,6 +67,45 @@ answer = assistant.ask(question, top_k=3) # Requires generate_fn/LLM
79
67
  code = assistant.generate_code(request) # Requires generate_fn/LLM
80
68
  ```
81
69
 
70
+ ## Index Persistence
71
+
72
+ Save and load indexes to avoid re-computing embeddings:
73
+
74
+ ```python
75
+ # Save index to disk
76
+ assistant.save_index("./my_index")
77
+
78
+ # Load index later (much faster than re-indexing)
79
+ loaded = RAGAssistant.load_index("./my_index", provider=OllamaProvider())
80
+ results = loaded.retrieve("query")
81
+ ```
82
+
83
+ ## Thread Safety
84
+
85
+ RAGAssistant is thread-safe. Multiple threads can safely read while another writes:
86
+
87
+ ```python
88
+ import threading
89
+
90
+ assistant = RAGAssistant("docs/", provider=OllamaProvider())
91
+
92
+ # Safe: concurrent reads and writes
93
+ threading.Thread(target=lambda: assistant.retrieve("query")).start()
94
+ threading.Thread(target=lambda: assistant.add_documents([new_doc])).start()
95
+ ```
96
+
97
+ ## Resource Management
98
+
99
+ Use context managers for automatic cleanup:
100
+
101
+ ```python
102
+ from ragit.providers import OllamaProvider
103
+
104
+ with OllamaProvider() as provider:
105
+ response = provider.generate("Hello", model="llama3")
106
+ # Session automatically closed
107
+ ```
108
+
82
109
  ## Document Loading
83
110
 
84
111
  ```python
@@ -10,7 +10,7 @@ maintainers = [
10
10
  { name = "RODMENA LIMITED", email = "info@rodmena.co.uk" },
11
11
  ]
12
12
  readme = "README.md"
13
- requires-python = ">=3.12,<3.14"
13
+ requires-python = ">=3.12"
14
14
  classifiers = [
15
15
  "Development Status :: 2 - Pre-Alpha",
16
16
  "Natural Language :: English",
@@ -33,13 +33,11 @@ dynamic = ["version"]
33
33
  dependencies = [
34
34
  "requests>=2.31.0",
35
35
  "numpy>=1.26.0",
36
- "pandas>=2.2.0",
37
36
  "pydantic>=2.0.0",
38
37
  "python-dotenv>=1.0.0",
39
- "scikit-learn>=1.5.0",
40
38
  "tqdm>=4.66.0",
41
- "trio>=0.24.0",
42
39
  "httpx>=0.27.0",
40
+ "resilient-circuit>=0.4.7",
43
41
  ]
44
42
 
45
43
  [project.urls]
@@ -59,8 +57,6 @@ dev = [
59
57
 
60
58
  test = ["pytest", "pytest-cov", "pytest-mock"]
61
59
 
62
- transformers = ["sentence-transformers>=2.2.0"]
63
-
64
60
  docs = [
65
61
  "sphinx>=7.0",
66
62
  "sphinx-rtd-theme>=2.0",
@@ -16,11 +16,7 @@ Quick Start
16
16
  >>> assistant = RAGAssistant("docs/", embed_fn=my_embed)
17
17
  >>> results = assistant.retrieve("How do I create a REST API?")
18
18
  >>>
19
- >>> # With SentenceTransformers (offline, requires ragit[transformers])
20
- >>> from ragit.providers import SentenceTransformersProvider
21
- >>> assistant = RAGAssistant("docs/", provider=SentenceTransformersProvider())
22
- >>>
23
- >>> # With Ollama (explicit)
19
+ >>> # With Ollama
24
20
  >>> from ragit.providers import OllamaProvider
25
21
  >>> assistant = RAGAssistant("docs/", provider=OllamaProvider())
26
22
  >>> answer = assistant.ask("How do I create a REST API?")
@@ -63,14 +59,27 @@ from ragit.core.experiment.experiment import ( # noqa: E402
63
59
  RagitExperiment,
64
60
  )
65
61
  from ragit.core.experiment.results import EvaluationResult, ExperimentResults # noqa: E402
62
+ from ragit.exceptions import ( # noqa: E402
63
+ ConfigurationError,
64
+ EvaluationError,
65
+ ExceptionAggregator,
66
+ GenerationError,
67
+ IndexingError,
68
+ ProviderError,
69
+ RagitError,
70
+ RetrievalError,
71
+ )
66
72
  from ragit.loaders import ( # noqa: E402
67
73
  chunk_by_separator,
68
74
  chunk_document,
69
75
  chunk_rst_sections,
70
76
  chunk_text,
77
+ deduplicate_documents,
78
+ generate_document_id,
71
79
  load_directory,
72
80
  load_text,
73
81
  )
82
+ from ragit.monitor import ExecutionMonitor # noqa: E402
74
83
  from ragit.providers import ( # noqa: E402
75
84
  BaseEmbeddingProvider,
76
85
  BaseLLMProvider,
@@ -89,6 +98,8 @@ __all__ = [
89
98
  "chunk_document",
90
99
  "chunk_by_separator",
91
100
  "chunk_rst_sections",
101
+ "generate_document_id",
102
+ "deduplicate_documents",
92
103
  # Core classes
93
104
  "Document",
94
105
  "Chunk",
@@ -97,6 +108,17 @@ __all__ = [
97
108
  "FunctionProvider",
98
109
  "BaseLLMProvider",
99
110
  "BaseEmbeddingProvider",
111
+ # Exceptions
112
+ "RagitError",
113
+ "ConfigurationError",
114
+ "ProviderError",
115
+ "IndexingError",
116
+ "RetrievalError",
117
+ "GenerationError",
118
+ "EvaluationError",
119
+ "ExceptionAggregator",
120
+ # Monitoring
121
+ "ExecutionMonitor",
100
122
  # Optimization
101
123
  "RagitExperiment",
102
124
  "BenchmarkQuestion",
@@ -104,13 +126,3 @@ __all__ = [
104
126
  "EvaluationResult",
105
127
  "ExperimentResults",
106
128
  ]
107
-
108
- # Conditionally add SentenceTransformersProvider if available
109
- try:
110
- from ragit.providers import ( # noqa: E402
111
- SentenceTransformersProvider as SentenceTransformersProvider,
112
- )
113
-
114
- __all__ += ["SentenceTransformersProvider"]
115
- except ImportError:
116
- pass