ragxo 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragxo/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
- from ragx import Ragxo, Document
2
1
 
2
+ from .client import Ragxo, Document
3
3
  __all__ = ["Ragxo", "Document"]
ragxo/client.py ADDED
@@ -0,0 +1,253 @@
1
+ import time
2
+ from typing import Self, Callable
3
+ from pymilvus import MilvusClient
4
+ from pydantic import BaseModel
5
+ import boto3
6
+ import dill
7
+ import os
8
+ import shutil
9
+ import logging
10
+ import tempfile
11
+ from botocore.exceptions import ClientError
12
+ import openai
13
+ from openai import ChatCompletion
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class Document(BaseModel):
18
+ text: str
19
+ metadata: dict
20
+ id: int
21
+
22
+ class Ragxo:
23
+ def __init__(self, dimension: int) -> None:
24
+ self.dimension = dimension
25
+ self.collection_name = "ragx"
26
+ os.makedirs("ragx_artifacts", exist_ok=True)
27
+
28
+ self.db_path = f"ragx_artifacts/milvus_{int(time.time())}.db"
29
+ self.client = MilvusClient(self.db_path)
30
+ self.client.create_collection(self.collection_name, dimension=dimension)
31
+ self.processing_fn = []
32
+ self.embedding_fn = None
33
+ self.system_prompt = None
34
+ self.model = "gpt-4o-mini"
35
+
36
+ def add_preprocess(self, fn: Callable) -> Self:
37
+ self.processing_fn.append(fn)
38
+ return self
39
+
40
+ def add_embedding_fn(self, fn: Callable) -> Self:
41
+ if not fn:
42
+ raise ValueError("Embedding function cannot be None")
43
+ self.embedding_fn = fn
44
+ return self
45
+
46
+ def add_system_prompt(self, prompt: str) -> Self:
47
+ self.system_prompt = prompt
48
+ return self
49
+
50
+ def add_model(self, model: str) -> Self:
51
+ self.model = model
52
+ return self
53
+
54
+ def index(self, data: list[Document]) -> Self:
55
+ if not self.embedding_fn:
56
+ raise ValueError("Embedding function not set")
57
+
58
+ processed_text = []
59
+ for item in data:
60
+ current_text = item.text
61
+ for fn in self.processing_fn:
62
+ current_text = fn(current_text)
63
+ processed_text.append(current_text)
64
+
65
+ embeddings = [
66
+ self.embedding_fn(text)
67
+ for text in processed_text
68
+ ]
69
+
70
+ self.client.insert(self.collection_name, [
71
+ {
72
+ "text": item.text,
73
+ "metadata": item.metadata,
74
+ "id": item.id,
75
+ "vector": embedding
76
+ }
77
+ for item, embedding in zip(data, embeddings)
78
+ ])
79
+ return self
80
+
81
+ def query(self, query: str, output_fields: list[str] = ['text', 'metadata']) -> list[list[dict]]:
82
+ if not self.embedding_fn:
83
+ raise ValueError("Embedding function not set. Please call add_embedding_fn first.")
84
+
85
+ preprocessed_query = query
86
+ for fn in self.processing_fn:
87
+ preprocessed_query = fn(preprocessed_query)
88
+
89
+ embedding = self.embedding_fn(preprocessed_query)
90
+
91
+ return self.client.search(
92
+ collection_name=self.collection_name,
93
+ data=[embedding],
94
+ limit=10,
95
+ output_fields=output_fields
96
+ )
97
+
98
+ def export(self, destination: str, s3_bucket: str = None) -> Self:
99
+ """
100
+ Export the Ragx instance to either local filesystem or S3.
101
+
102
+ Args:
103
+ destination: str - Local path or S3 key prefix
104
+ s3_bucket: str, optional - S3 bucket name. If provided, export to S3
105
+ """
106
+ try:
107
+ # If s3_bucket is provided, export to S3
108
+ if s3_bucket:
109
+ return self._export_to_s3(destination, s3_bucket)
110
+
111
+ # Otherwise, export to local filesystem
112
+ os.makedirs(destination, exist_ok=True)
113
+
114
+ # Save using dill
115
+ pickle_path = os.path.join(destination, "ragx.pkl")
116
+ with open(pickle_path, "wb") as f:
117
+ dill.dump(self, f)
118
+
119
+ # Copy database
120
+ db_dest = os.path.join(destination, "milvus.db")
121
+ shutil.copy(self.db_path, db_dest)
122
+
123
+ return self
124
+
125
+ except Exception as e:
126
+ logger.error(f"Error in export: {e}")
127
+ raise
128
+
129
+ def _export_to_s3(self, prefix: str, bucket: str) -> Self:
130
+ """
131
+ Internal method to handle S3 export.
132
+ """
133
+ try:
134
+ s3_client = boto3.client('s3')
135
+
136
+ # Create a temporary directory for the files
137
+ with tempfile.TemporaryDirectory() as temp_dir:
138
+ # Save pickle file
139
+ pickle_path = os.path.join(temp_dir, "ragx.pkl")
140
+ with open(pickle_path, "wb") as f:
141
+ dill.dump(self, f)
142
+
143
+ # Copy database
144
+ db_path = os.path.join(temp_dir, "milvus.db")
145
+ shutil.copy(self.db_path, db_path)
146
+
147
+ # Upload to S3
148
+ s3_client.upload_file(
149
+ pickle_path,
150
+ bucket,
151
+ f"{prefix}/ragx.pkl"
152
+ )
153
+ s3_client.upload_file(
154
+ db_path,
155
+ bucket,
156
+ f"{prefix}/milvus.db"
157
+ )
158
+
159
+ return self
160
+
161
+ except ClientError as e:
162
+ logger.error(f"Error uploading to S3: {e}")
163
+ raise
164
+ except Exception as e:
165
+ logger.error(f"Error in S3 export: {e}")
166
+ raise
167
+
168
+ @classmethod
169
+ def load(cls, source: str, s3_bucket: str = None) -> Self:
170
+ """
171
+ Load a Ragx instance from either local filesystem or S3.
172
+
173
+ Args:
174
+ source: str - Local path or S3 key prefix
175
+ s3_bucket: str, optional - S3 bucket name. If provided, load from S3
176
+ """
177
+ try:
178
+ # If s3_bucket is provided, load from S3
179
+ if s3_bucket:
180
+ return cls._load_from_s3(source, s3_bucket)
181
+
182
+ # Otherwise, load from local filesystem
183
+ pickle_path = os.path.join(source, "ragx.pkl")
184
+
185
+ with open(pickle_path, "rb") as f:
186
+ instance = dill.load(f)
187
+
188
+ # Restore client
189
+ instance.client = MilvusClient(os.path.join(source, "milvus.db"))
190
+
191
+ return instance
192
+
193
+ except Exception as e:
194
+ logger.error(f"Error in load: {e}")
195
+ raise
196
+
197
+ @classmethod
198
+ def _load_from_s3(cls, prefix: str, bucket: str) -> 'Ragx':
199
+ """
200
+ Internal classmethod to handle S3 loading.
201
+ """
202
+ try:
203
+ s3_client = boto3.client('s3')
204
+
205
+ # Create a temporary directory for the files
206
+ with tempfile.TemporaryDirectory() as temp_dir:
207
+ # Download files from S3
208
+ pickle_path = os.path.join(temp_dir, "ragx.pkl")
209
+ db_path = os.path.join(temp_dir, "milvus.db")
210
+
211
+ s3_client.download_file(
212
+ bucket,
213
+ f"{prefix}/ragx.pkl",
214
+ pickle_path
215
+ )
216
+ s3_client.download_file(
217
+ bucket,
218
+ f"{prefix}/milvus.db",
219
+ db_path
220
+ )
221
+
222
+ # Load the pickle file
223
+ with open(pickle_path, "rb") as f:
224
+ instance = dill.load(f)
225
+
226
+ # Restore client with the downloaded database
227
+ instance.client = MilvusClient(db_path)
228
+
229
+ return instance
230
+
231
+ except ClientError as e:
232
+ logger.error(f"Error downloading from S3: {e}")
233
+ raise
234
+ except Exception as e:
235
+ logger.error(f"Error in S3 load: {e}")
236
+ raise
237
+
238
+ def generate_llm_response(self, query: str, data: list[dict] = None) -> ChatCompletion:
239
+ if data is None:
240
+ data = self.query(query)[0]
241
+
242
+ if not self.system_prompt:
243
+ raise ValueError("System prompt not set. Please call add_system_prompt first.")
244
+
245
+ response = openai.chat.completions.create(
246
+ model=self.model,
247
+ messages=[
248
+ {"role": "system", "content": self.system_prompt},
249
+ {"role": "user", "content": "query: {} data: {}".format(query, data)}
250
+ ]
251
+ )
252
+
253
+ return response
@@ -0,0 +1,167 @@
1
+ Metadata-Version: 2.1
2
+ Name: ragxo
3
+ Version: 0.1.5
4
+ Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
5
+ Home-page: https://github.com/yourusername/ragx
6
+ License: MIT
7
+ Keywords: rag,milvus,nlp,embeddings,openai
8
+ Author: Mohamed Sadek
9
+ Author-email: mohamedfawzydes@gmail.com
10
+ Requires-Python: >=3.11,<4.0
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Dist: boto3 (>=1.36.14,<2.0.0)
21
+ Requires-Dist: dill (>=0.3.9,<0.4.0)
22
+ Requires-Dist: milvus (>=2.3.9,<3.0.0)
23
+ Requires-Dist: openai (>=1.61.1,<2.0.0)
24
+ Requires-Dist: pydantic (>=2.10.6,<3.0.0)
25
+ Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
26
+ Project-URL: Repository, https://github.com/yourusername/ragx
27
+ Description-Content-Type: text/markdown
28
+
29
+ # RagXO 🚀
30
+
31
+ [![PyPI version](https://badge.fury.io/py/ragxo.svg)](https://badge.fury.io/py/ragxo)
32
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
33
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/release/python-380/)
34
+
35
+ RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generation) systems by providing a unified way to package, version, and deploy your entire RAG pipeline with LLM integration. Export your complete system—including embedding functions, preprocessing steps, vector store, and LLM configurations—into a single, portable artifact.
36
+
37
+ ## Features ✨
38
+
39
+ - **Complete RAG Pipeline**: Package your entire RAG system into a versioned artifact
40
+ - **LLM Integration**: Built-in support for OpenAI models
41
+ - **Flexible Embedding**: Compatible with any embedding function (Sentence Transformers, OpenAI, etc.)
42
+ - **Custom Preprocessing**: Chain multiple preprocessing steps
43
+ - **Vector Store Integration**: Built-in Milvus support
44
+ - **System Prompts**: Include and version your system prompts
45
+
46
+ ## Installation 🛠️
47
+
48
+ ```bash
49
+ pip install ragxo
50
+ ```
51
+
52
+ ## Usage Guide 📚
53
+
54
+ ### Import
55
+
56
+ ```python
57
+ from ragxo import Ragxo, Document
58
+
59
+ ragxo_client = Ragxo(dimension=768)
60
+
61
+ ```
62
+
63
+ ### Adding Preprocessing Steps
64
+
65
+ ```python
66
+ import re
67
+
68
+ def remove_special_chars(text: str) -> str:
69
+ return re.sub(r'[^a-zA-Z0-9\s]', '', text)
70
+
71
+ def lowercase(text: str) -> str:
72
+ return text.lower()
73
+
74
+ ragxo_client.add_preprocess(remove_special_chars)
75
+ ragxo_client.add_preprocess(lowercase)
76
+ ```
77
+
78
+ ### Custom Embedding Functions
79
+
80
+ ```python
81
+ # Using SentenceTransformers
82
+ from sentence_transformers import SentenceTransformer
83
+ model = SentenceTransformer('all-MiniLM-L6-v2')
84
+
85
+ def get_embeddings(text: str) -> list[float]:
86
+ return model.encode(text).tolist()
87
+
88
+ ragxo.add_embedding_fn(get_embeddings)
89
+
90
+ # Or using OpenAI
91
+ from openai import OpenAI
92
+ client = OpenAI()
93
+
94
+ def get_openai_embeddings(text: str) -> list[float]:
95
+ response = client.embeddings.create(
96
+ input=text,
97
+ model="text-embedding-ada-002"
98
+ )
99
+ return response.data[0].embedding
100
+
101
+ ragxo.add_embedding_fn(get_openai_embeddings)
102
+ ```
103
+
104
+
105
+ ### Creating Documents
106
+
107
+ ```python
108
+ from ragxo import Document
109
+
110
+ doc = Document(
111
+ text="Your document content here",
112
+ metadata={"source": "wiki", "category": "science"},
113
+ id=1
114
+ )
115
+
116
+ ragxo_client.index([doc])
117
+
118
+ ```
119
+
120
+ ### LLM Configuration
121
+
122
+ ```python
123
+ # Set system prompt
124
+ ragxo_client.add_system_prompt("""
125
+ You are a helpful assistant. Use the provided context to answer questions accurately.
126
+ If you're unsure about something, please say so.
127
+ """)
128
+
129
+ # Set LLM model
130
+ ragxo_client.add_model("gpt-4")
131
+ ```
132
+
133
+ ### Export and Load
134
+
135
+ ```python
136
+ # Export your RAG pipeline
137
+ ragxo_client.export("rag_pipeline_v1")
138
+
139
+ # Load it elsewhere
140
+ loaded_ragxo_client = Ragxo.load("rag_pipeline_v1")
141
+ ```
142
+
143
+ ## Best Practices 💡
144
+
145
+ 1. **Version Your Exports**: Use semantic versioning for your exports:
146
+ ```python
147
+ ragxo.export("my_rag_v1.0.0")
148
+ ```
149
+
150
+ 2. **S3**: Use S3 to store your exports
151
+
152
+ ```shell
153
+ export AWS_ACCESS_KEY_ID=your_access_key
154
+ export AWS_SECRET_ACCESS_KEY=your_secret_key
155
+ ```
156
+
157
+ ```python
158
+ ragxo_client.export("my_rag_v1.0.0", s3_bucket="my_bucket")
159
+ ```
160
+
161
+ ## License 📝
162
+
163
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
164
+
165
+ ## Contributing 🤝
166
+
167
+ Contributions are welcome! Please feel free to submit a Pull Request.
@@ -0,0 +1,5 @@
1
+ ragxo/__init__.py,sha256=0VVe-z4XkkGQLQIG0hF0Hyf87_RgX0E4T9TRwwTkbmE,68
2
+ ragxo/client.py,sha256=smS3vt7k0k1p1mDOT8Taa4vpHl0fiuvbo3RdRY_D01k,8300
3
+ ragxo-0.1.5.dist-info/METADATA,sha256=0oS9x3tRULcvKZNQQOyIlKpQ_baDzt1pHB0wTubsb7I,4600
4
+ ragxo-0.1.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
5
+ ragxo-0.1.5.dist-info/RECORD,,
ragxo/ragx.py DELETED
@@ -1,141 +0,0 @@
1
- from typing import Self, Callable
2
- from pymilvus import MilvusClient
3
- from pydantic import BaseModel
4
- import dill
5
- import os
6
- import shutil
7
- import logging
8
- import openai
9
- from openai import ChatCompletion
10
-
11
- logging.basicConfig(level=logging.DEBUG)
12
- logger = logging.getLogger(__name__)
13
-
14
- class Document(BaseModel):
15
- text: str
16
- metadata: dict
17
- id: int
18
-
19
- class Ragxo:
20
- def __init__(self, dimension: int) -> None:
21
- self.dimension = dimension
22
- self.collection_name = "ragx"
23
- self.db_path = "milvus.db"
24
- self.client = MilvusClient(self.db_path)
25
- self.client.create_collection(self.collection_name, dimension=dimension)
26
- self.processing_fn = []
27
- self.embedding_fn = None
28
- self.system_prompt = None
29
- self.model = "gpt-4o-mini"
30
-
31
- def add_preprocess(self, fn: Callable) -> Self:
32
- self.processing_fn.append(fn)
33
- return self
34
-
35
- def add_embedding_fn(self, fn: Callable) -> Self:
36
- if not fn:
37
- raise ValueError("Embedding function cannot be None")
38
- self.embedding_fn = fn
39
- return self
40
-
41
- def add_system_prompt(self, prompt: str) -> Self:
42
- self.system_prompt = prompt
43
- return self
44
-
45
- def add_model(self, model: str) -> Self:
46
- self.model = model
47
- return self
48
-
49
- def index(self, data: list[Document]) -> Self:
50
- if not self.embedding_fn:
51
- raise ValueError("Embedding function not set")
52
-
53
- processed_text = []
54
- for item in data:
55
- current_text = item.text
56
- for fn in self.processing_fn:
57
- current_text = fn(current_text)
58
- processed_text.append(current_text)
59
-
60
- embeddings = [
61
- self.embedding_fn(text)
62
- for text in processed_text
63
- ]
64
-
65
- self.client.insert(self.collection_name, [
66
- {
67
- "text": item.text,
68
- "metadata": item.metadata,
69
- "id": item.id,
70
- "vector": embedding
71
- }
72
- for item, embedding in zip(data, embeddings)
73
- ])
74
- return self
75
-
76
- def query(self, query: str, output_fields: list[str] = ['text', 'metadata']) -> list[list[dict]]:
77
- if not self.embedding_fn:
78
- raise ValueError("Embedding function not set. Please call add_embedding_fn first.")
79
-
80
- preprocessed_query = query
81
- for fn in self.processing_fn:
82
- preprocessed_query = fn(preprocessed_query)
83
-
84
- embedding = self.embedding_fn(preprocessed_query)
85
-
86
- return self.client.search(
87
- collection_name=self.collection_name,
88
- data=[embedding],
89
- limit=10,
90
- output_fields=output_fields
91
- )
92
-
93
- def export(self, folder_path: str) -> Self:
94
- try:
95
- os.makedirs(folder_path, exist_ok=True)
96
-
97
- # Save using dill
98
- pickle_path = os.path.join(folder_path, "ragx.pkl")
99
- with open(pickle_path, "wb") as f:
100
- dill.dump(self, f)
101
-
102
- # Copy database
103
- db_dest = os.path.join(folder_path, "milvus.db")
104
- shutil.copy(self.db_path, db_dest)
105
-
106
- return self
107
-
108
- except Exception as e:
109
- logger.error(f"Error in export: {e}")
110
- raise
111
-
112
- @classmethod
113
- def load(cls, folder_path: str) -> 'Ragx':
114
- try:
115
- pickle_path = os.path.join(folder_path, "ragx.pkl")
116
-
117
- with open(pickle_path, "rb") as f:
118
- instance = dill.load(f)
119
-
120
- # Restore client
121
- instance.client = MilvusClient(os.path.join(folder_path, "milvus.db"))
122
-
123
- return instance
124
-
125
- except Exception as e:
126
- logger.error(f"Error in load: {e}")
127
- raise
128
-
129
- def generate_llm_response(self, query: str) -> ChatCompletion:
130
- if not self.system_prompt:
131
- raise ValueError("System prompt not set. Please call add_system_prompt first.")
132
-
133
- response = openai.chat.completions.create(
134
- model=self.model,
135
- messages=[
136
- {"role": "system", "content": self.system_prompt},
137
- {"role": "user", "content": query}
138
- ]
139
- )
140
-
141
- return response
@@ -1,28 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: ragxo
3
- Version: 0.1.2
4
- Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
5
- Home-page: https://github.com/yourusername/ragx
6
- License: MIT
7
- Keywords: rag,milvus,nlp,embeddings,openai
8
- Author: Mohamed Sadek
9
- Author-email: mohamedfawzydes@gmail.com
10
- Requires-Python: >=3.11,<4.0
11
- Classifier: Development Status :: 3 - Alpha
12
- Classifier: Intended Audience :: Developers
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.13
18
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
- Requires-Dist: dill (>=0.3.9,<0.4.0)
21
- Requires-Dist: milvus (>=2.3.9,<3.0.0)
22
- Requires-Dist: openai (>=1.61.1,<2.0.0)
23
- Requires-Dist: pydantic (>=2.10.6,<3.0.0)
24
- Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
25
- Project-URL: Repository, https://github.com/yourusername/ragx
26
- Description-Content-Type: text/markdown
27
-
28
-
@@ -1,5 +0,0 @@
1
- ragxo/__init__.py,sha256=jI_6iulTUQk9JUDft-jM6NHESpZSmJVPIaVOmd4-jWw,65
2
- ragxo/ragx.py,sha256=KotppZuO9U1aQG0CSbvRTVOXBN0BpaKMlS1IrqBmigk,4394
3
- ragxo-0.1.2.dist-info/METADATA,sha256=xZ3DcX6lsLCBLnwcn13J5pufLilNnvjL8nk8ccGlyGo,1111
4
- ragxo-0.1.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
5
- ragxo-0.1.2.dist-info/RECORD,,
File without changes