ragxo 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragxo/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
 
2
- from .client import Ragxo, Document
3
- __all__ = ["Ragxo", "Document"]
2
+ from .client import Ragxo, Document, EvaluationExample
3
+ __all__ = ["Ragxo", "Document", "EvaluationExample"]
ragxo/client.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import time
2
- from typing import Self, Callable
2
+ from typing import Literal, Self, Callable
3
3
  from pymilvus import MilvusClient
4
- from pydantic import BaseModel
4
+ from pydantic import BaseModel, Field
5
5
  import boto3
6
6
  import dill
7
7
  import os
@@ -11,6 +11,7 @@ import tempfile
11
11
  from botocore.exceptions import ClientError
12
12
  import openai
13
13
  from openai import ChatCompletion
14
+ from ragxo.utils import with_loading
14
15
 
15
16
  logger = logging.getLogger(__name__)
16
17
 
@@ -19,6 +20,14 @@ class Document(BaseModel):
19
20
  metadata: dict
20
21
  id: int
21
22
 
23
+ class EvaluationExample(BaseModel):
24
+ query: str
25
+ expected: str
26
+
27
+ class EvaluationResults(BaseModel):
28
+ results: list[str] = Field(description="A list of strings, each either 'correct' or 'incorrect'")
29
+
30
+
22
31
  class Ragxo:
23
32
  """
24
33
  A RAG (Retrieval-Augmented Generation) system that combines vector search with LLM responses.
@@ -51,6 +60,13 @@ class Ragxo:
51
60
  self.embedding_fn = None
52
61
  self.system_prompt = None
53
62
  self.model = "gpt-4o-mini"
63
+ self.limit = 10
64
+ self.temperature = 0.5
65
+ self.max_tokens = 2000
66
+ self.top_p = 1.0
67
+ self.frequency_penalty = 0.0
68
+ self.presence_penalty = 0.0
69
+
54
70
 
55
71
  def add_preprocess(self, fn: Callable) -> Self:
56
72
  """
@@ -140,6 +156,7 @@ class Ragxo:
140
156
  self.presence_penalty = presence_penalty
141
157
  return self
142
158
 
159
+ @with_loading("Indexing documents")
143
160
  def index(self, data: list[Document]) -> Self:
144
161
  """
145
162
  Index documents into the vector database.
@@ -210,6 +227,7 @@ class Ragxo:
210
227
  output_fields=output_fields
211
228
  )
212
229
 
230
+ @with_loading("Exporting Ragxo instance")
213
231
  def export(self, destination: str, s3_bucket: str = None) -> Self:
214
232
  """
215
233
  Export the Ragx instance to either local filesystem or S3.
@@ -310,7 +328,7 @@ class Ragxo:
310
328
  raise
311
329
 
312
330
  @classmethod
313
- def _load_from_s3(cls, prefix: str, bucket: str) -> 'Ragx':
331
+ def _load_from_s3(cls, prefix: str, bucket: str) -> Self:
314
332
  """
315
333
  Internal classmethod to handle S3 loading.
316
334
  """
@@ -352,13 +370,17 @@ class Ragxo:
352
370
 
353
371
  def generate_llm_response(self,
354
372
  query: str,
373
+ history: list[dict] = [],
374
+ messages: list[dict] = None,
355
375
  data: list[dict] = None) -> ChatCompletion:
356
376
  """
357
377
  Generate LLM response based on query and retrieved data.
358
378
 
359
379
  Args:
360
- query (str): User query
380
+ query (str): User query, this is used if messages is None
361
381
  data (list[dict], optional): Retrieved documents. If None, performs a new query
382
+ history (list[dict], optional): History of messages
383
+ messages (list[dict], optional): Messages to pass to the LLM: [{"role": "system", "content": system_prompt}, {"role": "user", "content": "Some user message"}, {"role": "assistant", "content": "Some assistant message"}]
362
384
 
363
385
  Returns:
364
386
  ChatCompletion: LLM response
@@ -375,9 +397,10 @@ class Ragxo:
375
397
  response = openai.chat.completions.create(
376
398
  model=self.model,
377
399
  messages=[
378
- {"role": "system", "content": self.system_prompt},
379
- {"role": "user", "content": "query: {} data: {}".format(query, data)}
380
- ],
400
+ {"role": "system", "content": self.system_prompt}
401
+ ] + history + [
402
+ {"role": "user", "content": f"query: {query} data: {data}"}
403
+ ] if messages is None else messages,
381
404
  temperature=self.temperature,
382
405
  max_tokens=self.max_tokens,
383
406
  top_p=self.top_p,
@@ -385,4 +408,85 @@ class Ragxo:
385
408
  presence_penalty=self.presence_penalty,
386
409
  )
387
410
 
388
- return response
411
+ return response
412
+
413
+
414
+
415
+ @with_loading("Evaluating test dataset")
416
+ def evaluate(self, test_data: list[EvaluationExample], batch_size: int = 10, judge_model: str = "gpt-4o-mini") -> float:
417
+ """
418
+ Evaluate the performance of the RAG system on a test dataset using a single prompt per batch.
419
+
420
+ For each batch:
421
+ 1. Generates an answer for each query.
422
+ 2. Concatenates evaluation details (query, expected, generated answer) into one prompt.
423
+ 3. Instructs the judge to output a JSON object strictly adhering to our schema:
424
+ {"results": ["correct", "incorrect", ...]}.
425
+ 4. Parses the structured output and computes overall accuracy.
426
+
427
+ Args:
428
+ test_data (list[EvaluationExample]): List of evaluation examples.
429
+ batch_size (int): Number of examples to process per batch.
430
+
431
+ Returns:
432
+ float: Accuracy as a fraction of correct evaluations.
433
+ """
434
+ total = len(test_data)
435
+ correct_count = 0
436
+
437
+ for i in range(0, total, batch_size):
438
+ batch = test_data[i : i + batch_size]
439
+ batch_prompt = "Evaluate the following examples and output your answer as a JSON object with a single key \"results\" that maps to an array of strings. Each element in the array should be either \"correct\" or \"incorrect\", corresponding to each example in order.\n\n"
440
+
441
+ # For each example in the batch, generate the answer and include details.
442
+ for idx, example in enumerate(batch):
443
+ query = example.query
444
+ expected = example.expected
445
+
446
+ # Generate the answer using the RAG system.
447
+ llm_response = self.generate_llm_response(query)
448
+ generated_answer = llm_response.choices[0].message.content.strip()
449
+
450
+ batch_prompt += f"Example {idx+1}:\n"
451
+ batch_prompt += f"Query: {query}\n"
452
+ batch_prompt += f"Expected Answer: {expected}\n"
453
+ batch_prompt += f"Generated Answer: {generated_answer}\n\n"
454
+
455
+ # Append clear instructions for the structured output.
456
+ batch_prompt += (
457
+ "Return your output as a JSON object exactly in this format: "
458
+ "{\"results\": [\"correct\", \"incorrect\", ...]} with no additional text or markdown formatting."
459
+ )
460
+
461
+ messages = [
462
+ {"role": "system", "content": "You are an expert evaluator. Evaluate whether each generated answer meets the expected answer."},
463
+ {"role": "user", "content": batch_prompt}
464
+ ]
465
+
466
+ # Call the OpenAI API with a structured response enforced via a JSON Schema.
467
+ response = openai.beta.chat.completions.parse(
468
+ model=judge_model,
469
+ messages=messages,
470
+ temperature=0, # Deterministic output.
471
+ response_format=EvaluationResults
472
+ )
473
+
474
+ output_text = response.choices[0].message.content.strip()
475
+
476
+ try:
477
+ # Parse the JSON output using the Pydantic model.
478
+ eval_results = EvaluationResults.model_validate_json(output_text)
479
+ except Exception as e:
480
+ print(f"Error parsing JSON: {e}\nReceived output: {output_text}")
481
+ eval_results = None
482
+
483
+ if eval_results:
484
+ for result in eval_results.results:
485
+ if result.lower() == "correct":
486
+ correct_count += 1
487
+ else:
488
+ print("Skipping batch due to parsing error.")
489
+
490
+ accuracy = correct_count / total if total > 0 else 0.0
491
+ print(f"Accuracy: {accuracy * 100:.2f}% ({correct_count}/{total})")
492
+ return accuracy
ragxo/utils.py ADDED
@@ -0,0 +1,21 @@
1
+ import functools
2
+ from alive_progress import alive_bar
3
+
4
+
5
+ def with_loading(title: str):
6
+ """
7
+ Decorator to add loading animation to methods.
8
+
9
+ Args:
10
+ title (str): Title to display during loading
11
+ """
12
+
13
+ def decorator(func):
14
+ @functools.wraps(func)
15
+ def wrapper(self, *args, **kwargs):
16
+ with alive_bar(title=title, bar=None, stats=False, monitor=False, stats_end=False) as bar:
17
+ result = func(self, *args, **kwargs)
18
+ bar()
19
+ return result
20
+ return wrapper
21
+ return decorator
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ragxo
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
5
5
  Home-page: https://github.com/yourusername/ragx
6
6
  License: MIT
@@ -17,8 +17,10 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
19
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Dist: alive-progress (>=3.1.1,<4.0.0)
20
21
  Requires-Dist: boto3 (>=1.36.14,<2.0.0)
21
- Requires-Dist: dill (>=0.3.9,<0.4.0)
22
+ Requires-Dist: datasets (>=3.2.0,<4.0.0)
23
+ Requires-Dist: dill (<0.3.9)
22
24
  Requires-Dist: milvus (>=2.3.9,<3.0.0)
23
25
  Requires-Dist: mocker (>=1.1.1,<2.0.0)
24
26
  Requires-Dist: openai (>=1.61.1,<2.0.0)
@@ -30,12 +32,30 @@ Description-Content-Type: text/markdown
30
32
 
31
33
  # RagXO
32
34
 
33
- Export, version and reuse your RAG pipeline everywhere 🚀
35
+ Export, version and reuse your E2E RAG pipeline everywhere 🚀
34
36
 
35
37
  [![PyPI version](https://badge.fury.io/py/ragxo.svg)](https://badge.fury.io/py/ragxo)
36
38
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
37
39
  [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/release/python-380/)
38
40
 
41
+ ## Table of Contents
42
+ - [Features](#features-)
43
+ - [Installation](#installation-️)
44
+ - [Quickstart](#quickstart-)
45
+ - [Build a RAG pipeline](#build-a-rag-pipeline)
46
+ - [Load a RAG pipeline](#load-a-rag-pipeline)
47
+ - [Usage Guide](#usage-guide-)
48
+ - [Import](#import)
49
+ - [Adding Preprocessing Steps](#adding-preprocessing-steps)
50
+ - [Custom Embedding Functions](#custom-embedding-functions)
51
+ - [Creating Documents](#creating-documents)
52
+ - [LLM Configuration](#llm-configuration)
53
+ - [Export and Load](#export-and-load)
54
+ - [Evaluation](#evaluation)
55
+ - [Best Practices](#best-practices-)
56
+ - [License](#license-)
57
+ - [Contributing](#contributing-)
58
+
39
59
  RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generation) systems by providing a unified way to package, version, and deploy your entire RAG pipeline with LLM integration. Export your complete system—including embedding functions, preprocessing steps, vector store, and LLM configurations—into a single, portable artifact.
40
60
 
41
61
  ## Features ✨
@@ -57,6 +77,10 @@ pip install ragxo
57
77
 
58
78
  ### Build a RAG pipeline
59
79
 
80
+ ```bash
81
+ export OPENAI_API_KEY=<openai_key>
82
+ ```
83
+
60
84
  ```python
61
85
  from ragxo import Ragxo, Document
62
86
 
@@ -207,6 +231,46 @@ ragxo_client.export("rag_pipeline_v1")
207
231
  loaded_ragxo_client = Ragxo.load("rag_pipeline_v1")
208
232
  ```
209
233
 
234
+ ### Evaluation
235
+
236
+ ```python
237
+ from ragxo import EvaluationExample
238
+
239
+ # Create test examples
240
+ test_data = [
241
+ EvaluationExample(
242
+ query="What is the capital of France?",
243
+ expected="The capital of France is Paris."
244
+ ),
245
+ EvaluationExample(
246
+ query="What is the capital of Germany?",
247
+ expected="The capital of Germany is Berlin."
248
+ ),
249
+ ]
250
+
251
+ # Evaluate the RAG system
252
+ accuracy = ragxo_client.evaluate(
253
+ test_data=test_data,
254
+ batch_size=10, # Process 10 examples at a time
255
+ judge_model="gpt-4" # Optional: specify a different model for evaluation
256
+ )
257
+
258
+ print(f"Evaluation accuracy: {accuracy * 100:.2f}%")
259
+ ```
260
+
261
+ The evaluation process:
262
+ 1. Processes test examples in batches
263
+ 2. Generates RAG responses for each query
264
+ 3. Uses an LLM to compare generated answers with expected answers
265
+ 4. Returns accuracy score (0.0 to 1.0)
266
+
267
+ Best practices for evaluation:
268
+ - Use diverse test examples
269
+ - Include edge cases
270
+ - Keep expected answers consistent in format
271
+ - Use a more capable model for evaluation (e.g., GPT-4)
272
+ - Adjust batch size based on your rate limits and needs
273
+
210
274
  ## Best Practices 💡
211
275
 
212
276
  1. **Version Your Exports**: Use semantic versioning for your exports:
@@ -232,3 +296,4 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
232
296
  ## Contributing 🤝
233
297
 
234
298
  Contributions are welcome! Please feel free to submit a Pull Request.
299
+
@@ -0,0 +1,6 @@
1
+ ragxo/__init__.py,sha256=BAVy_mbqGOaAMmXpIqB94za5WgxuK9DAfd6BtJUsM_s,108
2
+ ragxo/client.py,sha256=pX7v24Rw_MC6HInrxvSJUxNqFa1YdNjf8_-WySljP0o,17676
3
+ ragxo/utils.py,sha256=BQ3u1oSi-kRqYTnpnJHq1KebuoVnA15u_5REVlYuM1o,569
4
+ ragxo-0.1.12.dist-info/METADATA,sha256=3aw_8FCoQ86bl2KCt0CEZt39GCquCFxFoDyHOYJOEuk,8233
5
+ ragxo-0.1.12.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
6
+ ragxo-0.1.12.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- ragxo/__init__.py,sha256=0VVe-z4XkkGQLQIG0hF0Hyf87_RgX0E4T9TRwwTkbmE,68
2
- ragxo/client.py,sha256=rmV01TZ3F8McLavVOokjLSUv78fdCA80oDRVWSw-38M,12791
3
- ragxo-0.1.10.dist-info/METADATA,sha256=uk-eth9MdnPkG2WeZU7WGr7cVa6HuPtTZFjTkFVSJ_A,6371
4
- ragxo-0.1.10.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
5
- ragxo-0.1.10.dist-info/RECORD,,
File without changes