ragxo 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragxo/__init__.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
|
2
|
-
from .client import Ragxo, Document
|
3
|
-
__all__ = ["Ragxo", "Document"]
|
2
|
+
from .client import Ragxo, Document, EvaluationExample
|
3
|
+
__all__ = ["Ragxo", "Document", "EvaluationExample"]
|
ragxo/client.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import time
|
2
|
-
from typing import Self, Callable
|
2
|
+
from typing import Literal, Self, Callable
|
3
3
|
from pymilvus import MilvusClient
|
4
|
-
from pydantic import BaseModel
|
4
|
+
from pydantic import BaseModel, Field
|
5
5
|
import boto3
|
6
6
|
import dill
|
7
7
|
import os
|
@@ -11,6 +11,7 @@ import tempfile
|
|
11
11
|
from botocore.exceptions import ClientError
|
12
12
|
import openai
|
13
13
|
from openai import ChatCompletion
|
14
|
+
from ragxo.utils import with_loading
|
14
15
|
|
15
16
|
logger = logging.getLogger(__name__)
|
16
17
|
|
@@ -19,6 +20,14 @@ class Document(BaseModel):
|
|
19
20
|
metadata: dict
|
20
21
|
id: int
|
21
22
|
|
23
|
+
class EvaluationExample(BaseModel):
|
24
|
+
query: str
|
25
|
+
expected: str
|
26
|
+
|
27
|
+
class EvaluationResults(BaseModel):
|
28
|
+
results: list[str] = Field(description="A list of strings, each either 'correct' or 'incorrect'")
|
29
|
+
|
30
|
+
|
22
31
|
class Ragxo:
|
23
32
|
"""
|
24
33
|
A RAG (Retrieval-Augmented Generation) system that combines vector search with LLM responses.
|
@@ -51,6 +60,13 @@ class Ragxo:
|
|
51
60
|
self.embedding_fn = None
|
52
61
|
self.system_prompt = None
|
53
62
|
self.model = "gpt-4o-mini"
|
63
|
+
self.limit = 10
|
64
|
+
self.temperature = 0.5
|
65
|
+
self.max_tokens = 2000
|
66
|
+
self.top_p = 1.0
|
67
|
+
self.frequency_penalty = 0.0
|
68
|
+
self.presence_penalty = 0.0
|
69
|
+
|
54
70
|
|
55
71
|
def add_preprocess(self, fn: Callable) -> Self:
|
56
72
|
"""
|
@@ -140,6 +156,7 @@ class Ragxo:
|
|
140
156
|
self.presence_penalty = presence_penalty
|
141
157
|
return self
|
142
158
|
|
159
|
+
@with_loading("Indexing documents")
|
143
160
|
def index(self, data: list[Document]) -> Self:
|
144
161
|
"""
|
145
162
|
Index documents into the vector database.
|
@@ -210,6 +227,7 @@ class Ragxo:
|
|
210
227
|
output_fields=output_fields
|
211
228
|
)
|
212
229
|
|
230
|
+
@with_loading("Exporting Ragxo instance")
|
213
231
|
def export(self, destination: str, s3_bucket: str = None) -> Self:
|
214
232
|
"""
|
215
233
|
Export the Ragx instance to either local filesystem or S3.
|
@@ -310,7 +328,7 @@ class Ragxo:
|
|
310
328
|
raise
|
311
329
|
|
312
330
|
@classmethod
|
313
|
-
def _load_from_s3(cls, prefix: str, bucket: str) ->
|
331
|
+
def _load_from_s3(cls, prefix: str, bucket: str) -> Self:
|
314
332
|
"""
|
315
333
|
Internal classmethod to handle S3 loading.
|
316
334
|
"""
|
@@ -352,13 +370,17 @@ class Ragxo:
|
|
352
370
|
|
353
371
|
def generate_llm_response(self,
|
354
372
|
query: str,
|
373
|
+
history: list[dict] = [],
|
374
|
+
messages: list[dict] = None,
|
355
375
|
data: list[dict] = None) -> ChatCompletion:
|
356
376
|
"""
|
357
377
|
Generate LLM response based on query and retrieved data.
|
358
378
|
|
359
379
|
Args:
|
360
|
-
query (str): User query
|
380
|
+
query (str): User query, this is used if messages is None
|
361
381
|
data (list[dict], optional): Retrieved documents. If None, performs a new query
|
382
|
+
history (list[dict], optional): History of messages
|
383
|
+
messages (list[dict], optional): Messages to pass to the LLM: [{"role": "system", "content": system_prompt}, {"role": "user", "content": "Some user message"}, {"role": "assistant", "content": "Some assistant message"}]
|
362
384
|
|
363
385
|
Returns:
|
364
386
|
ChatCompletion: LLM response
|
@@ -375,9 +397,10 @@ class Ragxo:
|
|
375
397
|
response = openai.chat.completions.create(
|
376
398
|
model=self.model,
|
377
399
|
messages=[
|
378
|
-
{"role": "system", "content": self.system_prompt}
|
379
|
-
|
380
|
-
|
400
|
+
{"role": "system", "content": self.system_prompt}
|
401
|
+
] + history + [
|
402
|
+
{"role": "user", "content": f"query: {query} data: {data}"}
|
403
|
+
] if messages is None else messages,
|
381
404
|
temperature=self.temperature,
|
382
405
|
max_tokens=self.max_tokens,
|
383
406
|
top_p=self.top_p,
|
@@ -385,4 +408,85 @@ class Ragxo:
|
|
385
408
|
presence_penalty=self.presence_penalty,
|
386
409
|
)
|
387
410
|
|
388
|
-
return response
|
411
|
+
return response
|
412
|
+
|
413
|
+
|
414
|
+
|
415
|
+
@with_loading("Evaluating test dataset")
|
416
|
+
def evaluate(self, test_data: list[EvaluationExample], batch_size: int = 10, judge_model: str = "gpt-4o-mini") -> float:
|
417
|
+
"""
|
418
|
+
Evaluate the performance of the RAG system on a test dataset using a single prompt per batch.
|
419
|
+
|
420
|
+
For each batch:
|
421
|
+
1. Generates an answer for each query.
|
422
|
+
2. Concatenates evaluation details (query, expected, generated answer) into one prompt.
|
423
|
+
3. Instructs the judge to output a JSON object strictly adhering to our schema:
|
424
|
+
{"results": ["correct", "incorrect", ...]}.
|
425
|
+
4. Parses the structured output and computes overall accuracy.
|
426
|
+
|
427
|
+
Args:
|
428
|
+
test_data (list[EvaluationExample]): List of evaluation examples.
|
429
|
+
batch_size (int): Number of examples to process per batch.
|
430
|
+
|
431
|
+
Returns:
|
432
|
+
float: Accuracy as a fraction of correct evaluations.
|
433
|
+
"""
|
434
|
+
total = len(test_data)
|
435
|
+
correct_count = 0
|
436
|
+
|
437
|
+
for i in range(0, total, batch_size):
|
438
|
+
batch = test_data[i : i + batch_size]
|
439
|
+
batch_prompt = "Evaluate the following examples and output your answer as a JSON object with a single key \"results\" that maps to an array of strings. Each element in the array should be either \"correct\" or \"incorrect\", corresponding to each example in order.\n\n"
|
440
|
+
|
441
|
+
# For each example in the batch, generate the answer and include details.
|
442
|
+
for idx, example in enumerate(batch):
|
443
|
+
query = example.query
|
444
|
+
expected = example.expected
|
445
|
+
|
446
|
+
# Generate the answer using the RAG system.
|
447
|
+
llm_response = self.generate_llm_response(query)
|
448
|
+
generated_answer = llm_response.choices[0].message.content.strip()
|
449
|
+
|
450
|
+
batch_prompt += f"Example {idx+1}:\n"
|
451
|
+
batch_prompt += f"Query: {query}\n"
|
452
|
+
batch_prompt += f"Expected Answer: {expected}\n"
|
453
|
+
batch_prompt += f"Generated Answer: {generated_answer}\n\n"
|
454
|
+
|
455
|
+
# Append clear instructions for the structured output.
|
456
|
+
batch_prompt += (
|
457
|
+
"Return your output as a JSON object exactly in this format: "
|
458
|
+
"{\"results\": [\"correct\", \"incorrect\", ...]} with no additional text or markdown formatting."
|
459
|
+
)
|
460
|
+
|
461
|
+
messages = [
|
462
|
+
{"role": "system", "content": "You are an expert evaluator. Evaluate whether each generated answer meets the expected answer."},
|
463
|
+
{"role": "user", "content": batch_prompt}
|
464
|
+
]
|
465
|
+
|
466
|
+
# Call the OpenAI API with a structured response enforced via a JSON Schema.
|
467
|
+
response = openai.beta.chat.completions.parse(
|
468
|
+
model=judge_model,
|
469
|
+
messages=messages,
|
470
|
+
temperature=0, # Deterministic output.
|
471
|
+
response_format=EvaluationResults
|
472
|
+
)
|
473
|
+
|
474
|
+
output_text = response.choices[0].message.content.strip()
|
475
|
+
|
476
|
+
try:
|
477
|
+
# Parse the JSON output using the Pydantic model.
|
478
|
+
eval_results = EvaluationResults.model_validate_json(output_text)
|
479
|
+
except Exception as e:
|
480
|
+
print(f"Error parsing JSON: {e}\nReceived output: {output_text}")
|
481
|
+
eval_results = None
|
482
|
+
|
483
|
+
if eval_results:
|
484
|
+
for result in eval_results.results:
|
485
|
+
if result.lower() == "correct":
|
486
|
+
correct_count += 1
|
487
|
+
else:
|
488
|
+
print("Skipping batch due to parsing error.")
|
489
|
+
|
490
|
+
accuracy = correct_count / total if total > 0 else 0.0
|
491
|
+
print(f"Accuracy: {accuracy * 100:.2f}% ({correct_count}/{total})")
|
492
|
+
return accuracy
|
ragxo/utils.py
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
import functools
|
2
|
+
from alive_progress import alive_bar
|
3
|
+
|
4
|
+
|
5
|
+
def with_loading(title: str):
|
6
|
+
"""
|
7
|
+
Decorator to add loading animation to methods.
|
8
|
+
|
9
|
+
Args:
|
10
|
+
title (str): Title to display during loading
|
11
|
+
"""
|
12
|
+
|
13
|
+
def decorator(func):
|
14
|
+
@functools.wraps(func)
|
15
|
+
def wrapper(self, *args, **kwargs):
|
16
|
+
with alive_bar(title=title, bar=None, stats=False, monitor=False, stats_end=False) as bar:
|
17
|
+
result = func(self, *args, **kwargs)
|
18
|
+
bar()
|
19
|
+
return result
|
20
|
+
return wrapper
|
21
|
+
return decorator
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: ragxo
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.12
|
4
4
|
Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
|
5
5
|
Home-page: https://github.com/yourusername/ragx
|
6
6
|
License: MIT
|
@@ -17,8 +17,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.13
|
18
18
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
19
19
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
|
+
Requires-Dist: alive-progress (>=3.1.1,<4.0.0)
|
20
21
|
Requires-Dist: boto3 (>=1.36.14,<2.0.0)
|
21
|
-
Requires-Dist:
|
22
|
+
Requires-Dist: datasets (>=3.2.0,<4.0.0)
|
23
|
+
Requires-Dist: dill (<0.3.9)
|
22
24
|
Requires-Dist: milvus (>=2.3.9,<3.0.0)
|
23
25
|
Requires-Dist: mocker (>=1.1.1,<2.0.0)
|
24
26
|
Requires-Dist: openai (>=1.61.1,<2.0.0)
|
@@ -30,12 +32,30 @@ Description-Content-Type: text/markdown
|
|
30
32
|
|
31
33
|
# RagXO
|
32
34
|
|
33
|
-
Export, version and reuse your RAG pipeline everywhere 🚀
|
35
|
+
Export, version and reuse your E2E RAG pipeline everywhere 🚀
|
34
36
|
|
35
37
|
[](https://badge.fury.io/py/ragxo)
|
36
38
|
[](https://opensource.org/licenses/MIT)
|
37
39
|
[](https://www.python.org/downloads/release/python-380/)
|
38
40
|
|
41
|
+
## Table of Contents
|
42
|
+
- [Features](#features-)
|
43
|
+
- [Installation](#installation-️)
|
44
|
+
- [Quickstart](#quickstart-)
|
45
|
+
- [Build a RAG pipeline](#build-a-rag-pipeline)
|
46
|
+
- [Load a RAG pipeline](#load-a-rag-pipeline)
|
47
|
+
- [Usage Guide](#usage-guide-)
|
48
|
+
- [Import](#import)
|
49
|
+
- [Adding Preprocessing Steps](#adding-preprocessing-steps)
|
50
|
+
- [Custom Embedding Functions](#custom-embedding-functions)
|
51
|
+
- [Creating Documents](#creating-documents)
|
52
|
+
- [LLM Configuration](#llm-configuration)
|
53
|
+
- [Export and Load](#export-and-load)
|
54
|
+
- [Evaluation](#evaluation)
|
55
|
+
- [Best Practices](#best-practices-)
|
56
|
+
- [License](#license-)
|
57
|
+
- [Contributing](#contributing-)
|
58
|
+
|
39
59
|
RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generation) systems by providing a unified way to package, version, and deploy your entire RAG pipeline with LLM integration. Export your complete system—including embedding functions, preprocessing steps, vector store, and LLM configurations—into a single, portable artifact.
|
40
60
|
|
41
61
|
## Features ✨
|
@@ -57,6 +77,10 @@ pip install ragxo
|
|
57
77
|
|
58
78
|
### Build a RAG pipeline
|
59
79
|
|
80
|
+
```bash
|
81
|
+
export OPENAI_API_KEY=<openai_key>
|
82
|
+
```
|
83
|
+
|
60
84
|
```python
|
61
85
|
from ragxo import Ragxo, Document
|
62
86
|
|
@@ -207,6 +231,46 @@ ragxo_client.export("rag_pipeline_v1")
|
|
207
231
|
loaded_ragxo_client = Ragxo.load("rag_pipeline_v1")
|
208
232
|
```
|
209
233
|
|
234
|
+
### Evaluation
|
235
|
+
|
236
|
+
```python
|
237
|
+
from ragxo import EvaluationExample
|
238
|
+
|
239
|
+
# Create test examples
|
240
|
+
test_data = [
|
241
|
+
EvaluationExample(
|
242
|
+
query="What is the capital of France?",
|
243
|
+
expected="The capital of France is Paris."
|
244
|
+
),
|
245
|
+
EvaluationExample(
|
246
|
+
query="What is the capital of Germany?",
|
247
|
+
expected="The capital of Germany is Berlin."
|
248
|
+
),
|
249
|
+
]
|
250
|
+
|
251
|
+
# Evaluate the RAG system
|
252
|
+
accuracy = ragxo_client.evaluate(
|
253
|
+
test_data=test_data,
|
254
|
+
batch_size=10, # Process 10 examples at a time
|
255
|
+
judge_model="gpt-4" # Optional: specify a different model for evaluation
|
256
|
+
)
|
257
|
+
|
258
|
+
print(f"Evaluation accuracy: {accuracy * 100:.2f}%")
|
259
|
+
```
|
260
|
+
|
261
|
+
The evaluation process:
|
262
|
+
1. Processes test examples in batches
|
263
|
+
2. Generates RAG responses for each query
|
264
|
+
3. Uses an LLM to compare generated answers with expected answers
|
265
|
+
4. Returns accuracy score (0.0 to 1.0)
|
266
|
+
|
267
|
+
Best practices for evaluation:
|
268
|
+
- Use diverse test examples
|
269
|
+
- Include edge cases
|
270
|
+
- Keep expected answers consistent in format
|
271
|
+
- Use a more capable model for evaluation (e.g., GPT-4)
|
272
|
+
- Adjust batch size based on your rate limits and needs
|
273
|
+
|
210
274
|
## Best Practices 💡
|
211
275
|
|
212
276
|
1. **Version Your Exports**: Use semantic versioning for your exports:
|
@@ -232,3 +296,4 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
232
296
|
## Contributing 🤝
|
233
297
|
|
234
298
|
Contributions are welcome! Please feel free to submit a Pull Request.
|
299
|
+
|
@@ -0,0 +1,6 @@
|
|
1
|
+
ragxo/__init__.py,sha256=BAVy_mbqGOaAMmXpIqB94za5WgxuK9DAfd6BtJUsM_s,108
|
2
|
+
ragxo/client.py,sha256=pX7v24Rw_MC6HInrxvSJUxNqFa1YdNjf8_-WySljP0o,17676
|
3
|
+
ragxo/utils.py,sha256=BQ3u1oSi-kRqYTnpnJHq1KebuoVnA15u_5REVlYuM1o,569
|
4
|
+
ragxo-0.1.12.dist-info/METADATA,sha256=3aw_8FCoQ86bl2KCt0CEZt39GCquCFxFoDyHOYJOEuk,8233
|
5
|
+
ragxo-0.1.12.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
6
|
+
ragxo-0.1.12.dist-info/RECORD,,
|
ragxo-0.1.10.dist-info/RECORD
DELETED
@@ -1,5 +0,0 @@
|
|
1
|
-
ragxo/__init__.py,sha256=0VVe-z4XkkGQLQIG0hF0Hyf87_RgX0E4T9TRwwTkbmE,68
|
2
|
-
ragxo/client.py,sha256=rmV01TZ3F8McLavVOokjLSUv78fdCA80oDRVWSw-38M,12791
|
3
|
-
ragxo-0.1.10.dist-info/METADATA,sha256=uk-eth9MdnPkG2WeZU7WGr7cVa6HuPtTZFjTkFVSJ_A,6371
|
4
|
-
ragxo-0.1.10.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
5
|
-
ragxo-0.1.10.dist-info/RECORD,,
|
File without changes
|