ragxo 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ragxo
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
5
5
  Home-page: https://github.com/yourusername/ragx
6
6
  License: MIT
@@ -26,7 +26,9 @@ Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
26
26
  Project-URL: Repository, https://github.com/yourusername/ragx
27
27
  Description-Content-Type: text/markdown
28
28
 
29
- # RagXO 🚀
29
+ # RagXO
30
+
31
+ Export, version and reuse your RAG pipeline everywhere 🚀
30
32
 
31
33
  [![PyPI version](https://badge.fury.io/py/ragxo.svg)](https://badge.fury.io/py/ragxo)
32
34
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -49,6 +51,61 @@ RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generatio
49
51
  pip install ragxo
50
52
  ```
51
53
 
54
+ ## Quickstart 🚀
55
+
56
+ ### Build a RAG pipeline
57
+
58
+ ```python
59
+ from ragxo import Ragxo, Document
60
+
61
+ def preprocess_text_lower(text: str) -> str:
62
+ return text.lower()
63
+
64
+ def preprocess_text_remove_special_chars(text: str) -> str:
65
+ return re.sub(r'[^a-zA-Z0-9\s]', '', text)
66
+
67
+ def get_embeddings(text: str) -> list[float]:
68
+ return openai.embeddings.create(text=text, model="text-embedding-ada-002").data[0].embedding
69
+
70
+ ragxo_client = Ragxo(dimension=768)
71
+
72
+ ragxo_client.add_preprocess(preprocess_text_lower)
73
+ ragxo_client.add_preprocess(preprocess_text_remove_special_chars)
74
+ ragxo_client.add_embedding_fn(get_embeddings)
75
+
76
+ ragxo_client.add_system_prompt("You are a helpful assistant that can answer questions about the data provided.")
77
+ ragxo_client.add_model("gpt-4o-mini")
78
+
79
+ ragxo_client.index([
80
+ Document(text="Capital of France is Paris", metadata={"source": "example"}, id=1),
81
+ Document(text="Capital of Germany is Berlin", metadata={"source": "example"}, id=2),
82
+ Document(text="Capital of Italy is Rome", metadata={"source": "example"}, id=3),
83
+ ])
84
+
85
+ ragxo_client.export("my_rag_v1.0.0")
86
+
87
+ ```
88
+
89
+
90
+ ### Load a RAG pipeline
91
+
92
+ ```python
93
+ loaded_ragxo_client = Ragxo.load("my_rag_v1.0.0")
94
+
95
+ results = loaded_ragxo_client.query("What is the capital of France?")
96
+
97
+ llm_response = loaded_ragxo_client.generate_llm_response(
98
+ "What is the capital of France?",
99
+ limit=10,
100
+ temperature=0.5,
101
+ max_tokens=1000,
102
+ top_p=1.0,
103
+ frequency_penalty=0.0,
104
+ presence_penalty=0.0)
105
+
106
+ ```
107
+
108
+
52
109
  ## Usage Guide 📚
53
110
 
54
111
  ### Import
@@ -1,4 +1,6 @@
1
- # RagXO 🚀
1
+ # RagXO
2
+
3
+ Export, version and reuse your RAG pipeline everywhere 🚀
2
4
 
3
5
  [![PyPI version](https://badge.fury.io/py/ragxo.svg)](https://badge.fury.io/py/ragxo)
4
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -21,6 +23,61 @@ RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generatio
21
23
  pip install ragxo
22
24
  ```
23
25
 
26
+ ## Quickstart 🚀
27
+
28
+ ### Build a RAG pipeline
29
+
30
+ ```python
31
+ from ragxo import Ragxo, Document
32
+
33
+ def preprocess_text_lower(text: str) -> str:
34
+ return text.lower()
35
+
36
+ def preprocess_text_remove_special_chars(text: str) -> str:
37
+ return re.sub(r'[^a-zA-Z0-9\s]', '', text)
38
+
39
+ def get_embeddings(text: str) -> list[float]:
40
+ return openai.embeddings.create(text=text, model="text-embedding-ada-002").data[0].embedding
41
+
42
+ ragxo_client = Ragxo(dimension=768)
43
+
44
+ ragxo_client.add_preprocess(preprocess_text_lower)
45
+ ragxo_client.add_preprocess(preprocess_text_remove_special_chars)
46
+ ragxo_client.add_embedding_fn(get_embeddings)
47
+
48
+ ragxo_client.add_system_prompt("You are a helpful assistant that can answer questions about the data provided.")
49
+ ragxo_client.add_model("gpt-4o-mini")
50
+
51
+ ragxo_client.index([
52
+ Document(text="Capital of France is Paris", metadata={"source": "example"}, id=1),
53
+ Document(text="Capital of Germany is Berlin", metadata={"source": "example"}, id=2),
54
+ Document(text="Capital of Italy is Rome", metadata={"source": "example"}, id=3),
55
+ ])
56
+
57
+ ragxo_client.export("my_rag_v1.0.0")
58
+
59
+ ```
60
+
61
+
62
+ ### Load a RAG pipeline
63
+
64
+ ```python
65
+ loaded_ragxo_client = Ragxo.load("my_rag_v1.0.0")
66
+
67
+ results = loaded_ragxo_client.query("What is the capital of France?")
68
+
69
+ llm_response = loaded_ragxo_client.generate_llm_response(
70
+ "What is the capital of France?",
71
+ limit=10,
72
+ temperature=0.5,
73
+ max_tokens=1000,
74
+ top_p=1.0,
75
+ frequency_penalty=0.0,
76
+ presence_penalty=0.0)
77
+
78
+ ```
79
+
80
+
24
81
  ## Usage Guide 📚
25
82
 
26
83
  ### Import
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ragxo"
3
- version = "0.1.5"
3
+ version = "0.1.6"
4
4
  description = "A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration"
5
5
  authors = ["Mohamed Sadek <mohamedfawzydes@gmail.com>"]
6
6
  readme = "README.md"
@@ -37,6 +37,10 @@ class Ragxo:
37
37
  self.processing_fn.append(fn)
38
38
  return self
39
39
 
40
+ def add_llm_response_fn(self, fn: Callable) -> Self:
41
+ self.llm_response_fn = fn
42
+ return self
43
+
40
44
  def add_embedding_fn(self, fn: Callable) -> Self:
41
45
  if not fn:
42
46
  raise ValueError("Embedding function cannot be None")
@@ -78,7 +82,7 @@ class Ragxo:
78
82
  ])
79
83
  return self
80
84
 
81
- def query(self, query: str, output_fields: list[str] = ['text', 'metadata']) -> list[list[dict]]:
85
+ def query(self, query: str, output_fields: list[str] = ['text', 'metadata'], limit: int = 10) -> list[list[dict]]:
82
86
  if not self.embedding_fn:
83
87
  raise ValueError("Embedding function not set. Please call add_embedding_fn first.")
84
88
 
@@ -91,7 +95,7 @@ class Ragxo:
91
95
  return self.client.search(
92
96
  collection_name=self.collection_name,
93
97
  data=[embedding],
94
- limit=10,
98
+ limit=limit,
95
99
  output_fields=output_fields
96
100
  )
97
101
 
@@ -235,9 +239,18 @@ class Ragxo:
235
239
  logger.error(f"Error in S3 load: {e}")
236
240
  raise
237
241
 
238
- def generate_llm_response(self, query: str, data: list[dict] = None) -> ChatCompletion:
242
+ def generate_llm_response(self,
243
+ query: str,
244
+ limit: int = 10,
245
+ data: list[dict] = None,
246
+ temperature: float = 0.5,
247
+ max_tokens: int = 1000,
248
+ top_p: float = 1.0,
249
+ frequency_penalty: float = 0.0,
250
+ presence_penalty: float = 0.0,
251
+ ) -> ChatCompletion:
239
252
  if data is None:
240
- data = self.query(query)[0]
253
+ data = self.query(query, limit=limit)[0]
241
254
 
242
255
  if not self.system_prompt:
243
256
  raise ValueError("System prompt not set. Please call add_system_prompt first.")
@@ -247,7 +260,12 @@ class Ragxo:
247
260
  messages=[
248
261
  {"role": "system", "content": self.system_prompt},
249
262
  {"role": "user", "content": "query: {} data: {}".format(query, data)}
250
- ]
263
+ ],
264
+ temperature=temperature,
265
+ max_tokens=max_tokens,
266
+ top_p=top_p,
267
+ frequency_penalty=frequency_penalty,
268
+ presence_penalty=presence_penalty,
251
269
  )
252
270
 
253
271
  return response
File without changes