ragxo 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ragxo
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
5
5
  Home-page: https://github.com/yourusername/ragx
6
6
  License: MIT
@@ -67,7 +67,7 @@ def preprocess_text_remove_special_chars(text: str) -> str:
67
67
  return re.sub(r'[^a-zA-Z0-9\s]', '', text)
68
68
 
69
69
  def get_embeddings(text: str) -> list[float]:
70
- return openai.embeddings.create(text=text, model="text-embedding-ada-002").data[0].embedding
70
+ return openai.embeddings.create(input=text, model="text-embedding-ada-002").data[0].embedding
71
71
 
72
72
  ragxo_client = Ragxo(dimension=768)
73
73
 
@@ -76,7 +76,14 @@ ragxo_client.add_preprocess(preprocess_text_remove_special_chars)
76
76
  ragxo_client.add_embedding_fn(get_embeddings)
77
77
 
78
78
  ragxo_client.add_system_prompt("You are a helpful assistant that can answer questions about the data provided.")
79
- ragxo_client.add_model("gpt-4o-mini")
79
+ ragxo_client.add_model(
80
+ "gpt-4o-mini",
81
+ temperature=0.5,
82
+ max_tokens=1000,
83
+ top_p=1.0,
84
+ frequency_penalty=0.0,
85
+ presence_penalty=0.0
86
+ )
80
87
 
81
88
  ragxo_client.index([
82
89
  Document(text="Capital of France is Paris", metadata={"source": "example"}, id=1),
@@ -94,16 +101,10 @@ ragxo_client.export("my_rag_v1.0.0")
94
101
  ```python
95
102
  loaded_ragxo_client = Ragxo.load("my_rag_v1.0.0")
96
103
 
97
- results = loaded_ragxo_client.query("What is the capital of France?")
104
+ vector_search_results = loaded_ragxo_client.query("What is the capital of France?")
98
105
 
99
106
  llm_response = loaded_ragxo_client.generate_llm_response(
100
- "What is the capital of France?",
101
- limit=10,
102
- temperature=0.5,
103
- max_tokens=1000,
104
- top_p=1.0,
105
- frequency_penalty=0.0,
106
- presence_penalty=0.0)
107
+ "What is the capital of France?")
107
108
 
108
109
  ```
109
110
 
@@ -37,7 +37,7 @@ def preprocess_text_remove_special_chars(text: str) -> str:
37
37
  return re.sub(r'[^a-zA-Z0-9\s]', '', text)
38
38
 
39
39
  def get_embeddings(text: str) -> list[float]:
40
- return openai.embeddings.create(text=text, model="text-embedding-ada-002").data[0].embedding
40
+ return openai.embeddings.create(input=text, model="text-embedding-ada-002").data[0].embedding
41
41
 
42
42
  ragxo_client = Ragxo(dimension=768)
43
43
 
@@ -46,7 +46,14 @@ ragxo_client.add_preprocess(preprocess_text_remove_special_chars)
46
46
  ragxo_client.add_embedding_fn(get_embeddings)
47
47
 
48
48
  ragxo_client.add_system_prompt("You are a helpful assistant that can answer questions about the data provided.")
49
- ragxo_client.add_model("gpt-4o-mini")
49
+ ragxo_client.add_model(
50
+ "gpt-4o-mini",
51
+ temperature=0.5,
52
+ max_tokens=1000,
53
+ top_p=1.0,
54
+ frequency_penalty=0.0,
55
+ presence_penalty=0.0
56
+ )
50
57
 
51
58
  ragxo_client.index([
52
59
  Document(text="Capital of France is Paris", metadata={"source": "example"}, id=1),
@@ -64,16 +71,10 @@ ragxo_client.export("my_rag_v1.0.0")
64
71
  ```python
65
72
  loaded_ragxo_client = Ragxo.load("my_rag_v1.0.0")
66
73
 
67
- results = loaded_ragxo_client.query("What is the capital of France?")
74
+ vector_search_results = loaded_ragxo_client.query("What is the capital of France?")
68
75
 
69
76
  llm_response = loaded_ragxo_client.generate_llm_response(
70
- "What is the capital of France?",
71
- limit=10,
72
- temperature=0.5,
73
- max_tokens=1000,
74
- top_p=1.0,
75
- frequency_penalty=0.0,
76
- presence_penalty=0.0)
77
+ "What is the capital of France?")
77
78
 
78
79
  ```
79
80
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ragxo"
3
- version = "0.1.7"
3
+ version = "0.1.8"
4
4
  description = "A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration"
5
5
  authors = ["Mohamed Sadek <mohamedfawzydes@gmail.com>"]
6
6
  readme = "README.md"
@@ -20,7 +20,26 @@ class Document(BaseModel):
20
20
  id: int
21
21
 
22
22
  class Ragxo:
23
+ """
24
+ A RAG (Retrieval-Augmented Generation) system that combines vector search with LLM responses.
25
+
26
+ Attributes:
27
+ dimension (int): Dimension of the embedding vectors
28
+ collection_name (str): Name of the Milvus collection
29
+ db_path (str): Path to the Milvus database
30
+ processing_fn (list): List of preprocessing functions
31
+ embedding_fn (Callable): Function to generate embeddings
32
+ system_prompt (str): System prompt for LLM
33
+ model (str): LLM model name
34
+ """
35
+
23
36
  def __init__(self, dimension: int) -> None:
37
+ """
38
+ Initialize the Ragxo instance.
39
+
40
+ Args:
41
+ dimension (int): Dimension of the embedding vectors
42
+ """
24
43
  self.dimension = dimension
25
44
  self.collection_name = "ragx"
26
45
  os.makedirs("ragx_artifacts", exist_ok=True)
@@ -34,20 +53,59 @@ class Ragxo:
34
53
  self.model = "gpt-4o-mini"
35
54
 
36
55
  def add_preprocess(self, fn: Callable) -> Self:
56
+ """
57
+ Add a preprocessing function to the pipeline.
58
+
59
+ Args:
60
+ fn (Callable): Function that takes and returns a string
61
+
62
+ Returns:
63
+ Self: The current instance for method chaining
64
+ """
37
65
  self.processing_fn.append(fn)
38
66
  return self
39
67
 
40
68
  def add_llm_response_fn(self, fn: Callable) -> Self:
69
+ """
70
+ Add a function to process LLM responses.
71
+
72
+ Args:
73
+ fn (Callable): Function to process LLM responses
74
+
75
+ Returns:
76
+ Self: The current instance for method chaining
77
+ """
41
78
  self.llm_response_fn = fn
42
79
  return self
43
80
 
44
81
  def add_embedding_fn(self, fn: Callable) -> Self:
82
+ """
83
+ Set the embedding function for vector generation.
84
+
85
+ Args:
86
+ fn (Callable): Function that converts text to embeddings
87
+
88
+ Returns:
89
+ Self: The current instance for method chaining
90
+
91
+ Raises:
92
+ ValueError: If fn is None
93
+ """
45
94
  if not fn:
46
95
  raise ValueError("Embedding function cannot be None")
47
96
  self.embedding_fn = fn
48
97
  return self
49
98
 
50
99
  def add_system_prompt(self, prompt: str) -> Self:
100
+ """
101
+ Set the system prompt for LLM interactions.
102
+
103
+ Args:
104
+ prompt (str): System prompt text
105
+
106
+ Returns:
107
+ Self: The current instance for method chaining
108
+ """
51
109
  self.system_prompt = prompt
52
110
  return self
53
111
 
@@ -57,6 +115,21 @@ class Ragxo:
57
115
  top_p: float = 1.0,
58
116
  frequency_penalty: float = 0.0,
59
117
  presence_penalty: float = 0.0) -> Self:
118
+ """
119
+ Configure the LLM model and its parameters.
120
+
121
+ Args:
122
+ model (str): Name of the LLM model
123
+ limit (int): Maximum number of results to return from vector search
124
+ temperature (float): Sampling temperature
125
+ max_tokens (int): Maximum tokens in response
126
+ top_p (float): Nucleus sampling parameter
127
+ frequency_penalty (float): Frequency penalty parameter
128
+ presence_penalty (float): Presence penalty parameter
129
+
130
+ Returns:
131
+ Self: The current instance for method chaining
132
+ """
60
133
  self.model = model
61
134
  self.limit = limit
62
135
  self.temperature = temperature
@@ -67,6 +140,18 @@ class Ragxo:
67
140
  return self
68
141
 
69
142
  def index(self, data: list[Document]) -> Self:
143
+ """
144
+ Index documents into the vector database.
145
+
146
+ Args:
147
+ data (list[Document]): List of documents to index
148
+
149
+ Returns:
150
+ Self: The current instance for method chaining
151
+
152
+ Raises:
153
+ ValueError: If embedding function is not set
154
+ """
70
155
  if not self.embedding_fn:
71
156
  raise ValueError("Embedding function not set")
72
157
 
@@ -94,6 +179,20 @@ class Ragxo:
94
179
  return self
95
180
 
96
181
  def query(self, query: str, output_fields: list[str] = ['text', 'metadata'], limit: int = 10) -> list[list[dict]]:
182
+ """
183
+ Search the vector database for similar documents.
184
+
185
+ Args:
186
+ query (str): Search query
187
+ output_fields (list[str]): Fields to return in results
188
+ limit (int): Maximum number of results
189
+
190
+ Returns:
191
+ list[list[dict]]: Search results
192
+
193
+ Raises:
194
+ ValueError: If embedding function is not set
195
+ """
97
196
  if not self.embedding_fn:
98
197
  raise ValueError("Embedding function not set. Please call add_embedding_fn first.")
99
198
 
@@ -253,6 +352,19 @@ class Ragxo:
253
352
  def generate_llm_response(self,
254
353
  query: str,
255
354
  data: list[dict] = None) -> ChatCompletion:
355
+ """
356
+ Generate LLM response based on query and retrieved data.
357
+
358
+ Args:
359
+ query (str): User query
360
+ data (list[dict], optional): Retrieved documents. If None, performs a new query
361
+
362
+ Returns:
363
+ ChatCompletion: LLM response
364
+
365
+ Raises:
366
+ ValueError: If system prompt is not set
367
+ """
256
368
  if data is None:
257
369
  data = self.query(query, limit=self.limit)[0]
258
370
 
File without changes