ragxo 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragxo/client.py
CHANGED
@@ -20,7 +20,26 @@ class Document(BaseModel):
|
|
20
20
|
id: int
|
21
21
|
|
22
22
|
class Ragxo:
|
23
|
+
"""
|
24
|
+
A RAG (Retrieval-Augmented Generation) system that combines vector search with LLM responses.
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
dimension (int): Dimension of the embedding vectors
|
28
|
+
collection_name (str): Name of the Milvus collection
|
29
|
+
db_path (str): Path to the Milvus database
|
30
|
+
processing_fn (list): List of preprocessing functions
|
31
|
+
embedding_fn (Callable): Function to generate embeddings
|
32
|
+
system_prompt (str): System prompt for LLM
|
33
|
+
model (str): LLM model name
|
34
|
+
"""
|
35
|
+
|
23
36
|
def __init__(self, dimension: int) -> None:
|
37
|
+
"""
|
38
|
+
Initialize the Ragxo instance.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
dimension (int): Dimension of the embedding vectors
|
42
|
+
"""
|
24
43
|
self.dimension = dimension
|
25
44
|
self.collection_name = "ragx"
|
26
45
|
os.makedirs("ragx_artifacts", exist_ok=True)
|
@@ -34,29 +53,84 @@ class Ragxo:
|
|
34
53
|
self.model = "gpt-4o-mini"
|
35
54
|
|
36
55
|
def add_preprocess(self, fn: Callable) -> Self:
|
56
|
+
"""
|
57
|
+
Add a preprocessing function to the pipeline.
|
58
|
+
|
59
|
+
Args:
|
60
|
+
fn (Callable): Function that takes and returns a string
|
61
|
+
|
62
|
+
Returns:
|
63
|
+
Self: The current instance for method chaining
|
64
|
+
"""
|
37
65
|
self.processing_fn.append(fn)
|
38
66
|
return self
|
39
67
|
|
40
68
|
def add_llm_response_fn(self, fn: Callable) -> Self:
|
69
|
+
"""
|
70
|
+
Add a function to process LLM responses.
|
71
|
+
|
72
|
+
Args:
|
73
|
+
fn (Callable): Function to process LLM responses
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
Self: The current instance for method chaining
|
77
|
+
"""
|
41
78
|
self.llm_response_fn = fn
|
42
79
|
return self
|
43
80
|
|
44
81
|
def add_embedding_fn(self, fn: Callable) -> Self:
|
82
|
+
"""
|
83
|
+
Set the embedding function for vector generation.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
fn (Callable): Function that converts text to embeddings
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
Self: The current instance for method chaining
|
90
|
+
|
91
|
+
Raises:
|
92
|
+
ValueError: If fn is None
|
93
|
+
"""
|
45
94
|
if not fn:
|
46
95
|
raise ValueError("Embedding function cannot be None")
|
47
96
|
self.embedding_fn = fn
|
48
97
|
return self
|
49
98
|
|
50
99
|
def add_system_prompt(self, prompt: str) -> Self:
|
100
|
+
"""
|
101
|
+
Set the system prompt for LLM interactions.
|
102
|
+
|
103
|
+
Args:
|
104
|
+
prompt (str): System prompt text
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
Self: The current instance for method chaining
|
108
|
+
"""
|
51
109
|
self.system_prompt = prompt
|
52
110
|
return self
|
53
111
|
|
54
|
-
def add_model(self, model: str,
|
112
|
+
def add_model(self, model: str,
|
113
|
+
limit: int = 10,
|
55
114
|
temperature: float = 0.5,
|
56
115
|
max_tokens: int = 1000,
|
57
116
|
top_p: float = 1.0,
|
58
117
|
frequency_penalty: float = 0.0,
|
59
118
|
presence_penalty: float = 0.0) -> Self:
|
119
|
+
"""
|
120
|
+
Configure the LLM model and its parameters.
|
121
|
+
|
122
|
+
Args:
|
123
|
+
model (str): Name of the LLM model
|
124
|
+
limit (int): Maximum number of results to return from vector search
|
125
|
+
temperature (float): Sampling temperature
|
126
|
+
max_tokens (int): Maximum tokens in response
|
127
|
+
top_p (float): Nucleus sampling parameter
|
128
|
+
frequency_penalty (float): Frequency penalty parameter
|
129
|
+
presence_penalty (float): Presence penalty parameter
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
Self: The current instance for method chaining
|
133
|
+
"""
|
60
134
|
self.model = model
|
61
135
|
self.limit = limit
|
62
136
|
self.temperature = temperature
|
@@ -67,6 +141,18 @@ class Ragxo:
|
|
67
141
|
return self
|
68
142
|
|
69
143
|
def index(self, data: list[Document]) -> Self:
|
144
|
+
"""
|
145
|
+
Index documents into the vector database.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
data (list[Document]): List of documents to index
|
149
|
+
|
150
|
+
Returns:
|
151
|
+
Self: The current instance for method chaining
|
152
|
+
|
153
|
+
Raises:
|
154
|
+
ValueError: If embedding function is not set
|
155
|
+
"""
|
70
156
|
if not self.embedding_fn:
|
71
157
|
raise ValueError("Embedding function not set")
|
72
158
|
|
@@ -94,6 +180,20 @@ class Ragxo:
|
|
94
180
|
return self
|
95
181
|
|
96
182
|
def query(self, query: str, output_fields: list[str] = ['text', 'metadata'], limit: int = 10) -> list[list[dict]]:
|
183
|
+
"""
|
184
|
+
Search the vector database for similar documents.
|
185
|
+
|
186
|
+
Args:
|
187
|
+
query (str): Search query
|
188
|
+
output_fields (list[str]): Fields to return in results
|
189
|
+
limit (int): Maximum number of results
|
190
|
+
|
191
|
+
Returns:
|
192
|
+
list[list[dict]]: Search results
|
193
|
+
|
194
|
+
Raises:
|
195
|
+
ValueError: If embedding function is not set
|
196
|
+
"""
|
97
197
|
if not self.embedding_fn:
|
98
198
|
raise ValueError("Embedding function not set. Please call add_embedding_fn first.")
|
99
199
|
|
@@ -253,6 +353,19 @@ class Ragxo:
|
|
253
353
|
def generate_llm_response(self,
|
254
354
|
query: str,
|
255
355
|
data: list[dict] = None) -> ChatCompletion:
|
356
|
+
"""
|
357
|
+
Generate LLM response based on query and retrieved data.
|
358
|
+
|
359
|
+
Args:
|
360
|
+
query (str): User query
|
361
|
+
data (list[dict], optional): Retrieved documents. If None, performs a new query
|
362
|
+
|
363
|
+
Returns:
|
364
|
+
ChatCompletion: LLM response
|
365
|
+
|
366
|
+
Raises:
|
367
|
+
ValueError: If system prompt is not set
|
368
|
+
"""
|
256
369
|
if data is None:
|
257
370
|
data = self.query(query, limit=self.limit)[0]
|
258
371
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ragxo
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
|
5
5
|
Home-page: https://github.com/yourusername/ragx
|
6
6
|
License: MIT
|
@@ -67,7 +67,7 @@ def preprocess_text_remove_special_chars(text: str) -> str:
|
|
67
67
|
return re.sub(r'[^a-zA-Z0-9\s]', '', text)
|
68
68
|
|
69
69
|
def get_embeddings(text: str) -> list[float]:
|
70
|
-
return openai.embeddings.create(
|
70
|
+
return openai.embeddings.create(input=text, model="text-embedding-ada-002").data[0].embedding
|
71
71
|
|
72
72
|
ragxo_client = Ragxo(dimension=768)
|
73
73
|
|
@@ -76,7 +76,15 @@ ragxo_client.add_preprocess(preprocess_text_remove_special_chars)
|
|
76
76
|
ragxo_client.add_embedding_fn(get_embeddings)
|
77
77
|
|
78
78
|
ragxo_client.add_system_prompt("You are a helpful assistant that can answer questions about the data provided.")
|
79
|
-
ragxo_client.add_model(
|
79
|
+
ragxo_client.add_model(
|
80
|
+
"gpt-4o-mini",
|
81
|
+
limit=10,
|
82
|
+
temperature=0.5,
|
83
|
+
max_tokens=1000,
|
84
|
+
top_p=1.0,
|
85
|
+
frequency_penalty=0.0,
|
86
|
+
presence_penalty=0.0
|
87
|
+
)
|
80
88
|
|
81
89
|
ragxo_client.index([
|
82
90
|
Document(text="Capital of France is Paris", metadata={"source": "example"}, id=1),
|
@@ -94,16 +102,10 @@ ragxo_client.export("my_rag_v1.0.0")
|
|
94
102
|
```python
|
95
103
|
loaded_ragxo_client = Ragxo.load("my_rag_v1.0.0")
|
96
104
|
|
97
|
-
|
105
|
+
vector_search_results = loaded_ragxo_client.query("What is the capital of France?")
|
98
106
|
|
99
107
|
llm_response = loaded_ragxo_client.generate_llm_response(
|
100
|
-
"What is the capital of France?"
|
101
|
-
limit=10,
|
102
|
-
temperature=0.5,
|
103
|
-
max_tokens=1000,
|
104
|
-
top_p=1.0,
|
105
|
-
frequency_penalty=0.0,
|
106
|
-
presence_penalty=0.0)
|
108
|
+
"What is the capital of France?")
|
107
109
|
|
108
110
|
```
|
109
111
|
|
@@ -0,0 +1,5 @@
|
|
1
|
+
ragxo/__init__.py,sha256=0VVe-z4XkkGQLQIG0hF0Hyf87_RgX0E4T9TRwwTkbmE,68
|
2
|
+
ragxo/client.py,sha256=rmV01TZ3F8McLavVOokjLSUv78fdCA80oDRVWSw-38M,12791
|
3
|
+
ragxo-0.1.9.dist-info/METADATA,sha256=br3CDegqsUhhaw-GFC7U0-J7H6fN3qHbvQpFZf8rlfU,6241
|
4
|
+
ragxo-0.1.9.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
5
|
+
ragxo-0.1.9.dist-info/RECORD,,
|
ragxo-0.1.7.dist-info/RECORD
DELETED
@@ -1,5 +0,0 @@
|
|
1
|
-
ragxo/__init__.py,sha256=0VVe-z4XkkGQLQIG0hF0Hyf87_RgX0E4T9TRwwTkbmE,68
|
2
|
-
ragxo/client.py,sha256=kFKLvc1s9JfhO2nTkQd1hgkQ7X0Le47dIs8_3vI916Y,9263
|
3
|
-
ragxo-0.1.7.dist-info/METADATA,sha256=g1U7d-b7OtvGwEU01yUxxSSSa8sEVvh6Kl15uCSol_I,6225
|
4
|
-
ragxo-0.1.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
5
|
-
ragxo-0.1.7.dist-info/RECORD,,
|
File without changes
|