ragxo 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ragxo
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.6
|
4
4
|
Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
|
5
5
|
Home-page: https://github.com/yourusername/ragx
|
6
6
|
License: MIT
|
@@ -26,7 +26,9 @@ Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
|
|
26
26
|
Project-URL: Repository, https://github.com/yourusername/ragx
|
27
27
|
Description-Content-Type: text/markdown
|
28
28
|
|
29
|
-
# RagXO
|
29
|
+
# RagXO
|
30
|
+
|
31
|
+
Export, version and reuse your RAG pipeline everywhere 🚀
|
30
32
|
|
31
33
|
[](https://badge.fury.io/py/ragxo)
|
32
34
|
[](https://opensource.org/licenses/MIT)
|
@@ -49,6 +51,61 @@ RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generatio
|
|
49
51
|
pip install ragxo
|
50
52
|
```
|
51
53
|
|
54
|
+
## Quickstart 🚀
|
55
|
+
|
56
|
+
### Build a RAG pipeline
|
57
|
+
|
58
|
+
```python
|
59
|
+
from ragxo import Ragxo, Document
|
60
|
+
|
61
|
+
def preprocess_text_lower(text: str) -> str:
|
62
|
+
return text.lower()
|
63
|
+
|
64
|
+
def preprocess_text_remove_special_chars(text: str) -> str:
|
65
|
+
return re.sub(r'[^a-zA-Z0-9\s]', '', text)
|
66
|
+
|
67
|
+
def get_embeddings(text: str) -> list[float]:
|
68
|
+
return openai.embeddings.create(text=text, model="text-embedding-ada-002").data[0].embedding
|
69
|
+
|
70
|
+
ragxo_client = Ragxo(dimension=768)
|
71
|
+
|
72
|
+
ragxo_client.add_preprocess(preprocess_text_lower)
|
73
|
+
ragxo_client.add_preprocess(preprocess_text_remove_special_chars)
|
74
|
+
ragxo_client.add_embedding_fn(get_embeddings)
|
75
|
+
|
76
|
+
ragxo_client.add_system_prompt("You are a helpful assistant that can answer questions about the data provided.")
|
77
|
+
ragxo_client.add_model("gpt-4o-mini")
|
78
|
+
|
79
|
+
ragxo_client.index([
|
80
|
+
Document(text="Capital of France is Paris", metadata={"source": "example"}, id=1),
|
81
|
+
Document(text="Capital of Germany is Berlin", metadata={"source": "example"}, id=2),
|
82
|
+
Document(text="Capital of Italy is Rome", metadata={"source": "example"}, id=3),
|
83
|
+
])
|
84
|
+
|
85
|
+
ragxo_client.export("my_rag_v1.0.0")
|
86
|
+
|
87
|
+
```
|
88
|
+
|
89
|
+
|
90
|
+
### Load a RAG pipeline
|
91
|
+
|
92
|
+
```python
|
93
|
+
loaded_ragxo_client = Ragxo.load("my_rag_v1.0.0")
|
94
|
+
|
95
|
+
results = loaded_ragxo_client.query("What is the capital of France?")
|
96
|
+
|
97
|
+
llm_response = loaded_ragxo_client.generate_llm_response(
|
98
|
+
"What is the capital of France?",
|
99
|
+
limit=10,
|
100
|
+
temperature=0.5,
|
101
|
+
max_tokens=1000,
|
102
|
+
top_p=1.0,
|
103
|
+
frequency_penalty=0.0,
|
104
|
+
presence_penalty=0.0)
|
105
|
+
|
106
|
+
```
|
107
|
+
|
108
|
+
|
52
109
|
## Usage Guide 📚
|
53
110
|
|
54
111
|
### Import
|
@@ -1,4 +1,6 @@
|
|
1
|
-
# RagXO
|
1
|
+
# RagXO
|
2
|
+
|
3
|
+
Export, version and reuse your RAG pipeline everywhere 🚀
|
2
4
|
|
3
5
|
[](https://badge.fury.io/py/ragxo)
|
4
6
|
[](https://opensource.org/licenses/MIT)
|
@@ -21,6 +23,61 @@ RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generatio
|
|
21
23
|
pip install ragxo
|
22
24
|
```
|
23
25
|
|
26
|
+
## Quickstart 🚀
|
27
|
+
|
28
|
+
### Build a RAG pipeline
|
29
|
+
|
30
|
+
```python
|
31
|
+
from ragxo import Ragxo, Document
|
32
|
+
|
33
|
+
def preprocess_text_lower(text: str) -> str:
|
34
|
+
return text.lower()
|
35
|
+
|
36
|
+
def preprocess_text_remove_special_chars(text: str) -> str:
|
37
|
+
return re.sub(r'[^a-zA-Z0-9\s]', '', text)
|
38
|
+
|
39
|
+
def get_embeddings(text: str) -> list[float]:
|
40
|
+
return openai.embeddings.create(text=text, model="text-embedding-ada-002").data[0].embedding
|
41
|
+
|
42
|
+
ragxo_client = Ragxo(dimension=768)
|
43
|
+
|
44
|
+
ragxo_client.add_preprocess(preprocess_text_lower)
|
45
|
+
ragxo_client.add_preprocess(preprocess_text_remove_special_chars)
|
46
|
+
ragxo_client.add_embedding_fn(get_embeddings)
|
47
|
+
|
48
|
+
ragxo_client.add_system_prompt("You are a helpful assistant that can answer questions about the data provided.")
|
49
|
+
ragxo_client.add_model("gpt-4o-mini")
|
50
|
+
|
51
|
+
ragxo_client.index([
|
52
|
+
Document(text="Capital of France is Paris", metadata={"source": "example"}, id=1),
|
53
|
+
Document(text="Capital of Germany is Berlin", metadata={"source": "example"}, id=2),
|
54
|
+
Document(text="Capital of Italy is Rome", metadata={"source": "example"}, id=3),
|
55
|
+
])
|
56
|
+
|
57
|
+
ragxo_client.export("my_rag_v1.0.0")
|
58
|
+
|
59
|
+
```
|
60
|
+
|
61
|
+
|
62
|
+
### Load a RAG pipeline
|
63
|
+
|
64
|
+
```python
|
65
|
+
loaded_ragxo_client = Ragxo.load("my_rag_v1.0.0")
|
66
|
+
|
67
|
+
results = loaded_ragxo_client.query("What is the capital of France?")
|
68
|
+
|
69
|
+
llm_response = loaded_ragxo_client.generate_llm_response(
|
70
|
+
"What is the capital of France?",
|
71
|
+
limit=10,
|
72
|
+
temperature=0.5,
|
73
|
+
max_tokens=1000,
|
74
|
+
top_p=1.0,
|
75
|
+
frequency_penalty=0.0,
|
76
|
+
presence_penalty=0.0)
|
77
|
+
|
78
|
+
```
|
79
|
+
|
80
|
+
|
24
81
|
## Usage Guide 📚
|
25
82
|
|
26
83
|
### Import
|
@@ -37,6 +37,10 @@ class Ragxo:
|
|
37
37
|
self.processing_fn.append(fn)
|
38
38
|
return self
|
39
39
|
|
40
|
+
def add_llm_response_fn(self, fn: Callable) -> Self:
|
41
|
+
self.llm_response_fn = fn
|
42
|
+
return self
|
43
|
+
|
40
44
|
def add_embedding_fn(self, fn: Callable) -> Self:
|
41
45
|
if not fn:
|
42
46
|
raise ValueError("Embedding function cannot be None")
|
@@ -78,7 +82,7 @@ class Ragxo:
|
|
78
82
|
])
|
79
83
|
return self
|
80
84
|
|
81
|
-
def query(self, query: str, output_fields: list[str] = ['text', 'metadata']) -> list[list[dict]]:
|
85
|
+
def query(self, query: str, output_fields: list[str] = ['text', 'metadata'], limit: int = 10) -> list[list[dict]]:
|
82
86
|
if not self.embedding_fn:
|
83
87
|
raise ValueError("Embedding function not set. Please call add_embedding_fn first.")
|
84
88
|
|
@@ -91,7 +95,7 @@ class Ragxo:
|
|
91
95
|
return self.client.search(
|
92
96
|
collection_name=self.collection_name,
|
93
97
|
data=[embedding],
|
94
|
-
limit=
|
98
|
+
limit=limit,
|
95
99
|
output_fields=output_fields
|
96
100
|
)
|
97
101
|
|
@@ -235,9 +239,18 @@ class Ragxo:
|
|
235
239
|
logger.error(f"Error in S3 load: {e}")
|
236
240
|
raise
|
237
241
|
|
238
|
-
def generate_llm_response(self,
|
242
|
+
def generate_llm_response(self,
|
243
|
+
query: str,
|
244
|
+
limit: int = 10,
|
245
|
+
data: list[dict] = None,
|
246
|
+
temperature: float = 0.5,
|
247
|
+
max_tokens: int = 1000,
|
248
|
+
top_p: float = 1.0,
|
249
|
+
frequency_penalty: float = 0.0,
|
250
|
+
presence_penalty: float = 0.0,
|
251
|
+
) -> ChatCompletion:
|
239
252
|
if data is None:
|
240
|
-
data = self.query(query)[0]
|
253
|
+
data = self.query(query, limit=limit)[0]
|
241
254
|
|
242
255
|
if not self.system_prompt:
|
243
256
|
raise ValueError("System prompt not set. Please call add_system_prompt first.")
|
@@ -247,7 +260,12 @@ class Ragxo:
|
|
247
260
|
messages=[
|
248
261
|
{"role": "system", "content": self.system_prompt},
|
249
262
|
{"role": "user", "content": "query: {} data: {}".format(query, data)}
|
250
|
-
]
|
263
|
+
],
|
264
|
+
temperature=temperature,
|
265
|
+
max_tokens=max_tokens,
|
266
|
+
top_p=top_p,
|
267
|
+
frequency_penalty=frequency_penalty,
|
268
|
+
presence_penalty=presence_penalty,
|
251
269
|
)
|
252
270
|
|
253
271
|
return response
|
File without changes
|