ragxo 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragxo-0.1.3/PKG-INFO ADDED
@@ -0,0 +1,212 @@
1
+ Metadata-Version: 2.1
2
+ Name: ragxo
3
+ Version: 0.1.3
4
+ Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
5
+ Home-page: https://github.com/yourusername/ragx
6
+ License: MIT
7
+ Keywords: rag,milvus,nlp,embeddings,openai
8
+ Author: Mohamed Sadek
9
+ Author-email: mohamedfawzydes@gmail.com
10
+ Requires-Python: >=3.11,<4.0
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Dist: dill (>=0.3.9,<0.4.0)
21
+ Requires-Dist: milvus (>=2.3.9,<3.0.0)
22
+ Requires-Dist: openai (>=1.61.1,<2.0.0)
23
+ Requires-Dist: pydantic (>=2.10.6,<3.0.0)
24
+ Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
25
+ Project-URL: Repository, https://github.com/yourusername/ragx
26
+ Description-Content-Type: text/markdown
27
+
28
+ # RagXO 🚀
29
+
30
+ [![PyPI version](https://badge.fury.io/py/ragxo.svg)](https://badge.fury.io/py/ragxo)
31
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
32
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/release/python-380/)
33
+
34
+ RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generation) systems by providing a unified way to package, version, and deploy your entire RAG pipeline with LLM integration. Export your complete system—including embedding functions, preprocessing steps, vector store, and LLM configurations—into a single, portable artifact.
35
+
36
+ ## Features ✨
37
+
38
+ - **Complete RAG Pipeline**: Package your entire RAG system into a versioned artifact
39
+ - **LLM Integration**: Built-in support for OpenAI models
40
+ - **Flexible Embedding**: Compatible with any embedding function (Sentence Transformers, OpenAI, etc.)
41
+ - **Custom Preprocessing**: Chain multiple preprocessing steps
42
+ - **Vector Store Integration**: Built-in Milvus support
43
+ - **System Prompts**: Include and version your system prompts
44
+
45
+ ## Installation 🛠️
46
+
47
+ ```bash
48
+ pip install ragxo
49
+ ```
50
+
51
+ ## Quick Start 🚀
52
+
53
+ ```python
54
+ from ragxo import Ragxo, Document
55
+ from openai import OpenAI
56
+ client = OpenAI()
57
+
58
+ def get_openai_embeddings(text: str) -> list[float]:
59
+ response = client.embeddings.create(
60
+ input=text,
61
+ model="text-embedding-ada-002"
62
+ )
63
+ return response.data[0].embedding
64
+
65
+ def preprocess_text(text: str) -> str:
66
+ return text.lower()
67
+
68
+ # Initialize and configure RagXO
69
+ ragxo = Ragxo(dimension=384)
70
+ ragxo.add_preprocess(preprocess_text)
71
+ ragxo.add_embedding_fn(get_openai_embeddings)
72
+
73
+ # Add system prompt and model
74
+ ragxo.add_system_prompt("You are a helpful assistant.")
75
+ ragxo.add_model("gpt-4o-mini")
76
+
77
+ # Create and index documents
78
+ documents = [
79
+ Document(
80
+ text="Sample document for indexing",
81
+ metadata={"source": "example"},
82
+ id=1
83
+ )
84
+ ]
85
+ ragxo.index(documents)
86
+
87
+ # Export the pipeline
88
+ ragxo.export("my_rag_v1")
89
+
90
+ # Load and use elsewhere
91
+ loaded_ragxo = Ragxo.load("my_rag_v1")
92
+
93
+ # Query and generate response
94
+ similar_docs = loaded_ragxo.query("sample query")
95
+ llm_response = loaded_ragxo.generate_llm_response("What can you tell me about the sample?")
96
+ ```
97
+
98
+ ## Usage Guide 📚
99
+
100
+ ### Creating Documents
101
+
102
+ ```python
103
+ from ragxo import Document
104
+
105
+ doc = Document(
106
+ text="Your document content here",
107
+ metadata={"source": "wiki", "category": "science"},
108
+ id=1
109
+ )
110
+ ```
111
+
112
+ ### Adding Preprocessing Steps
113
+
114
+ ```python
115
+ import re
116
+
117
+ def remove_special_chars(text: str) -> str:
118
+ return re.sub(r'[^a-zA-Z0-9\s]', '', text)
119
+
120
+ def lowercase(text: str) -> str:
121
+ return text.lower()
122
+
123
+ ragxo.add_preprocess(remove_special_chars)
124
+ ragxo.add_preprocess(lowercase)
125
+ ```
126
+
127
+ ### Custom Embedding Functions
128
+
129
+ ```python
130
+ # Using SentenceTransformers
131
+ from sentence_transformers import SentenceTransformer
132
+ model = SentenceTransformer('all-MiniLM-L6-v2')
133
+
134
+ def get_embeddings(text: str) -> list[float]:
135
+ return model.encode(text).tolist()
136
+
137
+ ragxo.add_embedding_fn(get_embeddings)
138
+
139
+ # Or using OpenAI
140
+ from openai import OpenAI
141
+ client = OpenAI()
142
+
143
+ def get_openai_embeddings(text: str) -> list[float]:
144
+ response = client.embeddings.create(
145
+ input=text,
146
+ model="text-embedding-ada-002"
147
+ )
148
+ return response.data[0].embedding
149
+
150
+ ragxo.add_embedding_fn(get_openai_embeddings)
151
+ ```
152
+
153
+ ### LLM Configuration
154
+
155
+ ```python
156
+ # Set system prompt
157
+ ragxo.add_system_prompt("""
158
+ You are a helpful assistant. Use the provided context to answer questions accurately.
159
+ If you're unsure about something, please say so.
160
+ """)
161
+
162
+ # Set LLM model
163
+ ragxo.add_model("gpt-4")
164
+ ```
165
+
166
+ ### Export and Load
167
+
168
+ ```python
169
+ # Export your RAG pipeline
170
+ ragxo.export("rag_pipeline_v1")
171
+
172
+ # Load it elsewhere
173
+ loaded_ragxo = Ragxo.load("rag_pipeline_v1")
174
+ ```
175
+
176
+ ## Best Practices 💡
177
+
178
+ 1. **Version Your Exports**: Use semantic versioning for your exports:
179
+ ```python
180
+ ragxo.export("my_rag_v1.0.0")
181
+ ```
182
+
183
+ 2. **Validate After Loading**: Always test your loaded pipeline:
184
+ ```python
185
+ loaded_ragxo = Ragxo.load("my_rag")
186
+ try:
187
+ # Test similarity search
188
+ similar_docs = loaded_ragxo.query("test query")
189
+ # Test LLM generation
190
+ llm_response = loaded_ragxo.generate_llm_response("test question")
191
+ print("Pipeline loaded successfully!")
192
+ except Exception as e:
193
+ print(f"Error loading pipeline: {e}")
194
+ ```
195
+
196
+ 3. **Document Your Pipeline Configuration**: Keep track of your setup:
197
+ ```python
198
+ pipeline_config = {
199
+ "preprocessing_steps": ["remove_special_chars", "lowercase"],
200
+ "embedding_model": "all-MiniLM-L6-v2",
201
+ "llm_model": "gpt-4",
202
+ "dimension": 384
203
+ }
204
+ ```
205
+
206
+ ## License 📝
207
+
208
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
209
+
210
+ ## Contributing 🤝
211
+
212
+ Contributions are welcome! Please feel free to submit a Pull Request.
ragxo-0.1.3/README.md ADDED
@@ -0,0 +1,185 @@
1
+ # RagXO 🚀
2
+
3
+ [![PyPI version](https://badge.fury.io/py/ragxo.svg)](https://badge.fury.io/py/ragxo)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/release/python-380/)
6
+
7
+ RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generation) systems by providing a unified way to package, version, and deploy your entire RAG pipeline with LLM integration. Export your complete system—including embedding functions, preprocessing steps, vector store, and LLM configurations—into a single, portable artifact.
8
+
9
+ ## Features ✨
10
+
11
+ - **Complete RAG Pipeline**: Package your entire RAG system into a versioned artifact
12
+ - **LLM Integration**: Built-in support for OpenAI models
13
+ - **Flexible Embedding**: Compatible with any embedding function (Sentence Transformers, OpenAI, etc.)
14
+ - **Custom Preprocessing**: Chain multiple preprocessing steps
15
+ - **Vector Store Integration**: Built-in Milvus support
16
+ - **System Prompts**: Include and version your system prompts
17
+
18
+ ## Installation 🛠️
19
+
20
+ ```bash
21
+ pip install ragxo
22
+ ```
23
+
24
+ ## Quick Start 🚀
25
+
26
+ ```python
27
+ from ragxo import Ragxo, Document
28
+ from openai import OpenAI
29
+ client = OpenAI()
30
+
31
+ def get_openai_embeddings(text: str) -> list[float]:
32
+ response = client.embeddings.create(
33
+ input=text,
34
+ model="text-embedding-ada-002"
35
+ )
36
+ return response.data[0].embedding
37
+
38
+ def preprocess_text(text: str) -> str:
39
+ return text.lower()
40
+
41
+ # Initialize and configure RagXO
42
+ ragxo = Ragxo(dimension=384)
43
+ ragxo.add_preprocess(preprocess_text)
44
+ ragxo.add_embedding_fn(get_openai_embeddings)
45
+
46
+ # Add system prompt and model
47
+ ragxo.add_system_prompt("You are a helpful assistant.")
48
+ ragxo.add_model("gpt-4o-mini")
49
+
50
+ # Create and index documents
51
+ documents = [
52
+ Document(
53
+ text="Sample document for indexing",
54
+ metadata={"source": "example"},
55
+ id=1
56
+ )
57
+ ]
58
+ ragxo.index(documents)
59
+
60
+ # Export the pipeline
61
+ ragxo.export("my_rag_v1")
62
+
63
+ # Load and use elsewhere
64
+ loaded_ragxo = Ragxo.load("my_rag_v1")
65
+
66
+ # Query and generate response
67
+ similar_docs = loaded_ragxo.query("sample query")
68
+ llm_response = loaded_ragxo.generate_llm_response("What can you tell me about the sample?")
69
+ ```
70
+
71
+ ## Usage Guide 📚
72
+
73
+ ### Creating Documents
74
+
75
+ ```python
76
+ from ragxo import Document
77
+
78
+ doc = Document(
79
+ text="Your document content here",
80
+ metadata={"source": "wiki", "category": "science"},
81
+ id=1
82
+ )
83
+ ```
84
+
85
+ ### Adding Preprocessing Steps
86
+
87
+ ```python
88
+ import re
89
+
90
+ def remove_special_chars(text: str) -> str:
91
+ return re.sub(r'[^a-zA-Z0-9\s]', '', text)
92
+
93
+ def lowercase(text: str) -> str:
94
+ return text.lower()
95
+
96
+ ragxo.add_preprocess(remove_special_chars)
97
+ ragxo.add_preprocess(lowercase)
98
+ ```
99
+
100
+ ### Custom Embedding Functions
101
+
102
+ ```python
103
+ # Using SentenceTransformers
104
+ from sentence_transformers import SentenceTransformer
105
+ model = SentenceTransformer('all-MiniLM-L6-v2')
106
+
107
+ def get_embeddings(text: str) -> list[float]:
108
+ return model.encode(text).tolist()
109
+
110
+ ragxo.add_embedding_fn(get_embeddings)
111
+
112
+ # Or using OpenAI
113
+ from openai import OpenAI
114
+ client = OpenAI()
115
+
116
+ def get_openai_embeddings(text: str) -> list[float]:
117
+ response = client.embeddings.create(
118
+ input=text,
119
+ model="text-embedding-ada-002"
120
+ )
121
+ return response.data[0].embedding
122
+
123
+ ragxo.add_embedding_fn(get_openai_embeddings)
124
+ ```
125
+
126
+ ### LLM Configuration
127
+
128
+ ```python
129
+ # Set system prompt
130
+ ragxo.add_system_prompt("""
131
+ You are a helpful assistant. Use the provided context to answer questions accurately.
132
+ If you're unsure about something, please say so.
133
+ """)
134
+
135
+ # Set LLM model
136
+ ragxo.add_model("gpt-4")
137
+ ```
138
+
139
+ ### Export and Load
140
+
141
+ ```python
142
+ # Export your RAG pipeline
143
+ ragxo.export("rag_pipeline_v1")
144
+
145
+ # Load it elsewhere
146
+ loaded_ragxo = Ragxo.load("rag_pipeline_v1")
147
+ ```
148
+
149
+ ## Best Practices 💡
150
+
151
+ 1. **Version Your Exports**: Use semantic versioning for your exports:
152
+ ```python
153
+ ragxo.export("my_rag_v1.0.0")
154
+ ```
155
+
156
+ 2. **Validate After Loading**: Always test your loaded pipeline:
157
+ ```python
158
+ loaded_ragxo = Ragxo.load("my_rag")
159
+ try:
160
+ # Test similarity search
161
+ similar_docs = loaded_ragxo.query("test query")
162
+ # Test LLM generation
163
+ llm_response = loaded_ragxo.generate_llm_response("test question")
164
+ print("Pipeline loaded successfully!")
165
+ except Exception as e:
166
+ print(f"Error loading pipeline: {e}")
167
+ ```
168
+
169
+ 3. **Document Your Pipeline Configuration**: Keep track of your setup:
170
+ ```python
171
+ pipeline_config = {
172
+ "preprocessing_steps": ["remove_special_chars", "lowercase"],
173
+ "embedding_model": "all-MiniLM-L6-v2",
174
+ "llm_model": "gpt-4",
175
+ "dimension": 384
176
+ }
177
+ ```
178
+
179
+ ## License 📝
180
+
181
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
182
+
183
+ ## Contributing 🤝
184
+
185
+ Contributions are welcome! Please feel free to submit a Pull Request.
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ragxo"
3
- version = "0.1.1"
3
+ version = "0.1.3"
4
4
  description = "A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration"
5
5
  authors = ["Mohamed Sadek <mohamedfawzydes@gmail.com>"]
6
6
  readme = "README.md"
@@ -23,8 +23,6 @@ python = "^3.11"
23
23
  milvus = "^2.3.9"
24
24
  pymilvus = "^2.5.4"
25
25
  pydantic = "^2.10.6"
26
- nltk = "^3.9.1"
27
- sentence-transformers = "^3.4.1"
28
26
  dill = "^0.3.9"
29
27
  openai = "^1.61.1"
30
28
 
@@ -126,7 +126,11 @@ class Ragxo:
126
126
  logger.error(f"Error in load: {e}")
127
127
  raise
128
128
 
129
- def generate_llm_response(self, query: str) -> ChatCompletion:
129
+ def generate_llm_response(self, query: str, data: list[dict] = None) -> ChatCompletion:
130
+
131
+ if data is None:
132
+ data = self.query(query)[0]
133
+
130
134
  if not self.system_prompt:
131
135
  raise ValueError("System prompt not set. Please call add_system_prompt first.")
132
136
 
@@ -134,7 +138,7 @@ class Ragxo:
134
138
  model=self.model,
135
139
  messages=[
136
140
  {"role": "system", "content": self.system_prompt},
137
- {"role": "user", "content": query}
141
+ {"role": "user", "content": "query: {} data: {}".format(query, data)}
138
142
  ]
139
143
  )
140
144
 
ragxo-0.1.1/PKG-INFO DELETED
@@ -1,30 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: ragxo
3
- Version: 0.1.1
4
- Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
5
- Home-page: https://github.com/yourusername/ragx
6
- License: MIT
7
- Keywords: rag,milvus,nlp,embeddings,openai
8
- Author: Mohamed Sadek
9
- Author-email: mohamedfawzydes@gmail.com
10
- Requires-Python: >=3.11,<4.0
11
- Classifier: Development Status :: 3 - Alpha
12
- Classifier: Intended Audience :: Developers
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.13
18
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
- Requires-Dist: dill (>=0.3.9,<0.4.0)
21
- Requires-Dist: milvus (>=2.3.9,<3.0.0)
22
- Requires-Dist: nltk (>=3.9.1,<4.0.0)
23
- Requires-Dist: openai (>=1.61.1,<2.0.0)
24
- Requires-Dist: pydantic (>=2.10.6,<3.0.0)
25
- Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
26
- Requires-Dist: sentence-transformers (>=3.4.1,<4.0.0)
27
- Project-URL: Repository, https://github.com/yourusername/ragx
28
- Description-Content-Type: text/markdown
29
-
30
-
ragxo-0.1.1/README.md DELETED
File without changes
File without changes