PyPI - ragxo - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

ragxo 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

ragxo/__init__.py +2 -2
ragxo/client.py +109 -8
ragxo/utils.py +1 -1
{ragxo-0.1.11.dist-info → ragxo-0.1.12.dist-info}/METADATA +61 -2
ragxo-0.1.12.dist-info/RECORD +6 -0
ragxo-0.1.11.dist-info/RECORD +0 -6
{ragxo-0.1.11.dist-info → ragxo-0.1.12.dist-info}/WHEEL +0 -0

ragxo/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from .client import Ragxo, Document
-__all__ = ["Ragxo", "Document"]
+from .client import Ragxo, Document, EvaluationExample
+__all__ = ["Ragxo", "Document", "EvaluationExample"]

ragxo/client.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import time
-from typing import Self, Callable
+from typing import Literal, Self, Callable
 from pymilvus import MilvusClient
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 import boto3
 import dill
 import os
@@ -20,6 +20,14 @@ class Document(BaseModel):
     metadata: dict
     id: int
+class EvaluationExample(BaseModel):
+    query: str
+    expected: str
+class EvaluationResults(BaseModel):
+    results: list[str] = Field(description="A list of strings, each either 'correct' or 'incorrect'")
 class Ragxo:
     """
     A RAG (Retrieval-Augmented Generation) system that combines vector search with LLM responses.
@@ -52,6 +60,13 @@ class Ragxo:
         self.embedding_fn = None
         self.system_prompt = None
         self.model = "gpt-4o-mini"
+        self.limit = 10
+        self.temperature = 0.5
+        self.max_tokens = 2000
+        self.top_p = 1.0
+        self.frequency_penalty = 0.0
+        self.presence_penalty = 0.0
     def add_preprocess(self, fn: Callable) -> Self:
         """
@@ -313,7 +328,7 @@ class Ragxo:
             raise
     @classmethod
-    def _load_from_s3(cls, prefix: str, bucket: str) -> 'Ragx':
+    def _load_from_s3(cls, prefix: str, bucket: str) -> Self:
         """
         Internal classmethod to handle S3 loading.
         """
@@ -355,13 +370,17 @@ class Ragxo:
     def generate_llm_response(self,
                               query: str,
+                              history: list[dict] = [],
+                              messages: list[dict] = None,
                               data: list[dict] = None) -> ChatCompletion:
         """
         Generate LLM response based on query and retrieved data.
         Args:
-            query (str): User query
+            query (str): User query, this is used if messages is None
             data (list[dict], optional): Retrieved documents. If None, performs a new query
+            history (list[dict], optional): History of messages
+            messages (list[dict], optional): Messages to pass to the LLM: [{"role": "system", "content": system_prompt}, {"role": "user", "content": "Some user message"}, {"role": "assistant", "content": "Some assistant message"}]
         Returns:
             ChatCompletion: LLM response
@@ -378,9 +397,10 @@ class Ragxo:
         response = openai.chat.completions.create(
             model=self.model,
             messages=[
-                {"role": "system", "content": self.system_prompt},
-                {"role": "user", "content": "query: {} data: {}".format(query, data)}
-            ],
+                {"role": "system", "content": self.system_prompt}
+            ] + history + [
+                {"role": "user", "content": f"query: {query} data: {data}"}
+            ] if messages is None else messages,
             temperature=self.temperature,
             max_tokens=self.max_tokens,
             top_p=self.top_p,
@@ -388,4 +408,85 @@ class Ragxo:
             presence_penalty=self.presence_penalty,
         )
-        return response
+        return response
+    @with_loading("Evaluating test dataset")
+    def evaluate(self, test_data: list[EvaluationExample], batch_size: int = 10, judge_model: str = "gpt-4o-mini") -> float:
+        """
+        Evaluate the performance of the RAG system on a test dataset using a single prompt per batch.
+        For each batch:
+        1. Generates an answer for each query.
+        2. Concatenates evaluation details (query, expected, generated answer) into one prompt.
+        3. Instructs the judge to output a JSON object strictly adhering to our schema:
+            {"results": ["correct", "incorrect", ...]}.
+        4. Parses the structured output and computes overall accuracy.
+        Args:
+            test_data (list[EvaluationExample]): List of evaluation examples.
+            batch_size (int): Number of examples to process per batch.
+        Returns:
+            float: Accuracy as a fraction of correct evaluations.
+        """
+        total = len(test_data)
+        correct_count = 0
+        for i in range(0, total, batch_size):
+            batch = test_data[i : i + batch_size]
+            batch_prompt = "Evaluate the following examples and output your answer as a JSON object with a single key \"results\" that maps to an array of strings. Each element in the array should be either \"correct\" or \"incorrect\", corresponding to each example in order.\n\n"
+            # For each example in the batch, generate the answer and include details.
+            for idx, example in enumerate(batch):
+                query = example.query
+                expected = example.expected
+                # Generate the answer using the RAG system.
+                llm_response = self.generate_llm_response(query)
+                generated_answer = llm_response.choices[0].message.content.strip()
+                batch_prompt += f"Example {idx+1}:\n"
+                batch_prompt += f"Query: {query}\n"
+                batch_prompt += f"Expected Answer: {expected}\n"
+                batch_prompt += f"Generated Answer: {generated_answer}\n\n"
+            # Append clear instructions for the structured output.
+            batch_prompt += (
+                "Return your output as a JSON object exactly in this format: "
+                "{\"results\": [\"correct\", \"incorrect\", ...]} with no additional text or markdown formatting."
+            )
+            messages = [
+                {"role": "system", "content": "You are an expert evaluator. Evaluate whether each generated answer meets the expected answer."},
+                {"role": "user", "content": batch_prompt}
+            ]
+            # Call the OpenAI API with a structured response enforced via a JSON Schema.
+            response = openai.beta.chat.completions.parse(
+                model=judge_model,
+                messages=messages,
+                temperature=0,  # Deterministic output.
+                response_format=EvaluationResults
+            )
+            output_text = response.choices[0].message.content.strip()
+            try:
+                # Parse the JSON output using the Pydantic model.
+                eval_results = EvaluationResults.model_validate_json(output_text)
+            except Exception as e:
+                print(f"Error parsing JSON: {e}\nReceived output: {output_text}")
+                eval_results = None
+            if eval_results:
+                for result in eval_results.results:
+                    if result.lower() == "correct":
+                        correct_count += 1
+            else:
+                print("Skipping batch due to parsing error.")
+        accuracy = correct_count / total if total > 0 else 0.0
+        print(f"Accuracy: {accuracy * 100:.2f}% ({correct_count}/{total})")
+        return accuracy

ragxo/utils.py CHANGED Viewed

@@ -13,7 +13,7 @@ def with_loading(title: str):
     def decorator(func):
         @functools.wraps(func)
         def wrapper(self, *args, **kwargs):
-            with alive_bar(title=title, bar=None, stats=False) as bar:
+            with alive_bar(title=title, bar=None, stats=False, monitor=False, stats_end=False) as bar:
                 result = func(self, *args, **kwargs)
                 bar()
             return result

{ragxo-0.1.11.dist-info → ragxo-0.1.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ragxo
-Version: 0.1.11
+Version: 0.1.12
 Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
 Home-page: https://github.com/yourusername/ragx
 License: MIT
@@ -19,7 +19,8 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Dist: alive-progress (>=3.1.1,<4.0.0)
 Requires-Dist: boto3 (>=1.36.14,<2.0.0)
-Requires-Dist: dill (>=0.3.9,<0.4.0)
+Requires-Dist: datasets (>=3.2.0,<4.0.0)
+Requires-Dist: dill (<0.3.9)
 Requires-Dist: milvus (>=2.3.9,<3.0.0)
 Requires-Dist: mocker (>=1.1.1,<2.0.0)
 Requires-Dist: openai (>=1.61.1,<2.0.0)
@@ -37,6 +38,24 @@ Export, version and reuse your E2E RAG pipeline everywhere 🚀
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/release/python-380/)
+## Table of Contents
+- [Features](#features-)
+- [Installation](#installation-️)
+- [Quickstart](#quickstart-)
+  - [Build a RAG pipeline](#build-a-rag-pipeline)
+  - [Load a RAG pipeline](#load-a-rag-pipeline)
+- [Usage Guide](#usage-guide-)
+  - [Import](#import)
+  - [Adding Preprocessing Steps](#adding-preprocessing-steps)
+  - [Custom Embedding Functions](#custom-embedding-functions)
+  - [Creating Documents](#creating-documents)
+  - [LLM Configuration](#llm-configuration)
+  - [Export and Load](#export-and-load)
+  - [Evaluation](#evaluation)
+- [Best Practices](#best-practices-)
+- [License](#license-)
+- [Contributing](#contributing-)
 RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generation) systems by providing a unified way to package, version, and deploy your entire RAG pipeline with LLM integration. Export your complete system—including embedding functions, preprocessing steps, vector store, and LLM configurations—into a single, portable artifact.
 ## Features ✨
@@ -212,6 +231,46 @@ ragxo_client.export("rag_pipeline_v1")
 loaded_ragxo_client = Ragxo.load("rag_pipeline_v1")
 ```
+### Evaluation
+```python
+from ragxo import EvaluationExample
+# Create test examples
+test_data = [
+    EvaluationExample(
+        query="What is the capital of France?",
+        expected="The capital of France is Paris."
+    ),
+    EvaluationExample(
+        query="What is the capital of Germany?",
+        expected="The capital of Germany is Berlin."
+    ),
+]
+# Evaluate the RAG system
+accuracy = ragxo_client.evaluate(
+    test_data=test_data,
+    batch_size=10,  # Process 10 examples at a time
+    judge_model="gpt-4"  # Optional: specify a different model for evaluation
+)
+print(f"Evaluation accuracy: {accuracy * 100:.2f}%")
+```
+The evaluation process:
+1. Processes test examples in batches
+2. Generates RAG responses for each query
+3. Uses an LLM to compare generated answers with expected answers
+4. Returns accuracy score (0.0 to 1.0)
+Best practices for evaluation:
+- Use diverse test examples
+- Include edge cases
+- Keep expected answers consistent in format
+- Use a more capable model for evaluation (e.g., GPT-4)
+- Adjust batch size based on your rate limits and needs
 ## Best Practices 💡
 1. **Version Your Exports**: Use semantic versioning for your exports:

ragxo-0.1.12.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+ragxo/__init__.py,sha256=BAVy_mbqGOaAMmXpIqB94za5WgxuK9DAfd6BtJUsM_s,108
+ragxo/client.py,sha256=pX7v24Rw_MC6HInrxvSJUxNqFa1YdNjf8_-WySljP0o,17676
+ragxo/utils.py,sha256=BQ3u1oSi-kRqYTnpnJHq1KebuoVnA15u_5REVlYuM1o,569
+ragxo-0.1.12.dist-info/METADATA,sha256=3aw_8FCoQ86bl2KCt0CEZt39GCquCFxFoDyHOYJOEuk,8233
+ragxo-0.1.12.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+ragxo-0.1.12.dist-info/RECORD,,

ragxo-0.1.11.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-ragxo/__init__.py,sha256=0VVe-z4XkkGQLQIG0hF0Hyf87_RgX0E4T9TRwwTkbmE,68
-ragxo/client.py,sha256=6VE9h9XoEaS01irBp70brrWev8tNagtYnHOoyvjBVmo,12914
-ragxo/utils.py,sha256=yy5_ejmxU75mRhfE_XGtKOvKtHo8AoV1QFQlwMDEiFw,537
-ragxo-0.1.11.dist-info/METADATA,sha256=t3vwZl6LHtRgnrGqI8jMRrWGUy2c9tQlkxG45UDjjf4,6472
-ragxo-0.1.11.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-ragxo-0.1.11.dist-info/RECORD,,

{ragxo-0.1.11.dist-info → ragxo-0.1.12.dist-info}/WHEEL RENAMED Viewed

File without changes

ragxo 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl

ragxo 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl