ragxo 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ragxo
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
5
5
  Home-page: https://github.com/yourusername/ragx
6
6
  License: MIT
@@ -20,13 +20,17 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
20
  Requires-Dist: boto3 (>=1.36.14,<2.0.0)
21
21
  Requires-Dist: dill (>=0.3.9,<0.4.0)
22
22
  Requires-Dist: milvus (>=2.3.9,<3.0.0)
23
+ Requires-Dist: mocker (>=1.1.1,<2.0.0)
23
24
  Requires-Dist: openai (>=1.61.1,<2.0.0)
24
25
  Requires-Dist: pydantic (>=2.10.6,<3.0.0)
25
26
  Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
27
+ Requires-Dist: pytest-mock (>=3.14.0,<4.0.0)
26
28
  Project-URL: Repository, https://github.com/yourusername/ragx
27
29
  Description-Content-Type: text/markdown
28
30
 
29
- # RagXO 🚀
31
+ # RagXO
32
+
33
+ Export, version and reuse your RAG pipeline everywhere 🚀
30
34
 
31
35
  [![PyPI version](https://badge.fury.io/py/ragxo.svg)](https://badge.fury.io/py/ragxo)
32
36
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -49,6 +53,61 @@ RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generatio
49
53
  pip install ragxo
50
54
  ```
51
55
 
56
+ ## Quickstart 🚀
57
+
58
+ ### Build a RAG pipeline
59
+
60
+ ```python
61
+ from ragxo import Ragxo, Document
62
+
63
+ def preprocess_text_lower(text: str) -> str:
64
+ return text.lower()
65
+
66
+ def preprocess_text_remove_special_chars(text: str) -> str:
67
+ return re.sub(r'[^a-zA-Z0-9\s]', '', text)
68
+
69
+ def get_embeddings(text: str) -> list[float]:
70
+ return openai.embeddings.create(text=text, model="text-embedding-ada-002").data[0].embedding
71
+
72
+ ragxo_client = Ragxo(dimension=768)
73
+
74
+ ragxo_client.add_preprocess(preprocess_text_lower)
75
+ ragxo_client.add_preprocess(preprocess_text_remove_special_chars)
76
+ ragxo_client.add_embedding_fn(get_embeddings)
77
+
78
+ ragxo_client.add_system_prompt("You are a helpful assistant that can answer questions about the data provided.")
79
+ ragxo_client.add_model("gpt-4o-mini")
80
+
81
+ ragxo_client.index([
82
+ Document(text="Capital of France is Paris", metadata={"source": "example"}, id=1),
83
+ Document(text="Capital of Germany is Berlin", metadata={"source": "example"}, id=2),
84
+ Document(text="Capital of Italy is Rome", metadata={"source": "example"}, id=3),
85
+ ])
86
+
87
+ ragxo_client.export("my_rag_v1.0.0")
88
+
89
+ ```
90
+
91
+
92
+ ### Load a RAG pipeline
93
+
94
+ ```python
95
+ loaded_ragxo_client = Ragxo.load("my_rag_v1.0.0")
96
+
97
+ results = loaded_ragxo_client.query("What is the capital of France?")
98
+
99
+ llm_response = loaded_ragxo_client.generate_llm_response(
100
+ "What is the capital of France?",
101
+ limit=10,
102
+ temperature=0.5,
103
+ max_tokens=1000,
104
+ top_p=1.0,
105
+ frequency_penalty=0.0,
106
+ presence_penalty=0.0)
107
+
108
+ ```
109
+
110
+
52
111
  ## Usage Guide 📚
53
112
 
54
113
  ### Import
@@ -1,4 +1,6 @@
1
- # RagXO 🚀
1
+ # RagXO
2
+
3
+ Export, version and reuse your RAG pipeline everywhere 🚀
2
4
 
3
5
  [![PyPI version](https://badge.fury.io/py/ragxo.svg)](https://badge.fury.io/py/ragxo)
4
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -21,6 +23,61 @@ RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generatio
21
23
  pip install ragxo
22
24
  ```
23
25
 
26
+ ## Quickstart 🚀
27
+
28
+ ### Build a RAG pipeline
29
+
30
+ ```python
31
+ from ragxo import Ragxo, Document
32
+
33
+ def preprocess_text_lower(text: str) -> str:
34
+ return text.lower()
35
+
36
+ def preprocess_text_remove_special_chars(text: str) -> str:
37
+ return re.sub(r'[^a-zA-Z0-9\s]', '', text)
38
+
39
+ def get_embeddings(text: str) -> list[float]:
40
+ return openai.embeddings.create(text=text, model="text-embedding-ada-002").data[0].embedding
41
+
42
+ ragxo_client = Ragxo(dimension=768)
43
+
44
+ ragxo_client.add_preprocess(preprocess_text_lower)
45
+ ragxo_client.add_preprocess(preprocess_text_remove_special_chars)
46
+ ragxo_client.add_embedding_fn(get_embeddings)
47
+
48
+ ragxo_client.add_system_prompt("You are a helpful assistant that can answer questions about the data provided.")
49
+ ragxo_client.add_model("gpt-4o-mini")
50
+
51
+ ragxo_client.index([
52
+ Document(text="Capital of France is Paris", metadata={"source": "example"}, id=1),
53
+ Document(text="Capital of Germany is Berlin", metadata={"source": "example"}, id=2),
54
+ Document(text="Capital of Italy is Rome", metadata={"source": "example"}, id=3),
55
+ ])
56
+
57
+ ragxo_client.export("my_rag_v1.0.0")
58
+
59
+ ```
60
+
61
+
62
+ ### Load a RAG pipeline
63
+
64
+ ```python
65
+ loaded_ragxo_client = Ragxo.load("my_rag_v1.0.0")
66
+
67
+ results = loaded_ragxo_client.query("What is the capital of France?")
68
+
69
+ llm_response = loaded_ragxo_client.generate_llm_response(
70
+ "What is the capital of France?",
71
+ limit=10,
72
+ temperature=0.5,
73
+ max_tokens=1000,
74
+ top_p=1.0,
75
+ frequency_penalty=0.0,
76
+ presence_penalty=0.0)
77
+
78
+ ```
79
+
80
+
24
81
  ## Usage Guide 📚
25
82
 
26
83
  ### Import
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ragxo"
3
- version = "0.1.5"
3
+ version = "0.1.7"
4
4
  description = "A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration"
5
5
  authors = ["Mohamed Sadek <mohamedfawzydes@gmail.com>"]
6
6
  readme = "README.md"
@@ -26,6 +26,8 @@ pydantic = "^2.10.6"
26
26
  dill = "^0.3.9"
27
27
  openai = "^1.61.1"
28
28
  boto3 = "^1.36.14"
29
+ pytest-mock = "^3.14.0"
30
+ mocker = "^1.1.1"
29
31
 
30
32
  [tool.poetry.group.dev.dependencies]
31
33
  pytest = "^8.3.4"
@@ -37,6 +37,10 @@ class Ragxo:
37
37
  self.processing_fn.append(fn)
38
38
  return self
39
39
 
40
+ def add_llm_response_fn(self, fn: Callable) -> Self:
41
+ self.llm_response_fn = fn
42
+ return self
43
+
40
44
  def add_embedding_fn(self, fn: Callable) -> Self:
41
45
  if not fn:
42
46
  raise ValueError("Embedding function cannot be None")
@@ -47,8 +51,19 @@ class Ragxo:
47
51
  self.system_prompt = prompt
48
52
  return self
49
53
 
50
- def add_model(self, model: str) -> Self:
54
+ def add_model(self, model: str, limit: int = 10,
55
+ temperature: float = 0.5,
56
+ max_tokens: int = 1000,
57
+ top_p: float = 1.0,
58
+ frequency_penalty: float = 0.0,
59
+ presence_penalty: float = 0.0) -> Self:
51
60
  self.model = model
61
+ self.limit = limit
62
+ self.temperature = temperature
63
+ self.max_tokens = max_tokens
64
+ self.top_p = top_p
65
+ self.frequency_penalty = frequency_penalty
66
+ self.presence_penalty = presence_penalty
52
67
  return self
53
68
 
54
69
  def index(self, data: list[Document]) -> Self:
@@ -78,7 +93,7 @@ class Ragxo:
78
93
  ])
79
94
  return self
80
95
 
81
- def query(self, query: str, output_fields: list[str] = ['text', 'metadata']) -> list[list[dict]]:
96
+ def query(self, query: str, output_fields: list[str] = ['text', 'metadata'], limit: int = 10) -> list[list[dict]]:
82
97
  if not self.embedding_fn:
83
98
  raise ValueError("Embedding function not set. Please call add_embedding_fn first.")
84
99
 
@@ -91,7 +106,7 @@ class Ragxo:
91
106
  return self.client.search(
92
107
  collection_name=self.collection_name,
93
108
  data=[embedding],
94
- limit=10,
109
+ limit=limit,
95
110
  output_fields=output_fields
96
111
  )
97
112
 
@@ -235,9 +250,11 @@ class Ragxo:
235
250
  logger.error(f"Error in S3 load: {e}")
236
251
  raise
237
252
 
238
- def generate_llm_response(self, query: str, data: list[dict] = None) -> ChatCompletion:
253
+ def generate_llm_response(self,
254
+ query: str,
255
+ data: list[dict] = None) -> ChatCompletion:
239
256
  if data is None:
240
- data = self.query(query)[0]
257
+ data = self.query(query, limit=self.limit)[0]
241
258
 
242
259
  if not self.system_prompt:
243
260
  raise ValueError("System prompt not set. Please call add_system_prompt first.")
@@ -247,7 +264,12 @@ class Ragxo:
247
264
  messages=[
248
265
  {"role": "system", "content": self.system_prompt},
249
266
  {"role": "user", "content": "query: {} data: {}".format(query, data)}
250
- ]
267
+ ],
268
+ temperature=self.temperature,
269
+ max_tokens=self.max_tokens,
270
+ top_p=self.top_p,
271
+ frequency_penalty=self.frequency_penalty,
272
+ presence_penalty=self.presence_penalty,
251
273
  )
252
274
 
253
275
  return response
File without changes