PyPI - ragxo - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

ragxo 0.1.10py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

ragxo/__init__.py +2 -2
ragxo/client.py +112 -8
ragxo/utils.py +21 -0
{ragxo-0.1.10.dist-info → ragxo-0.1.12.dist-info}/METADATA +68 -3
ragxo-0.1.12.dist-info/RECORD +6 -0
ragxo-0.1.10.dist-info/RECORD +0 -5
{ragxo-0.1.10.dist-info → ragxo-0.1.12.dist-info}/WHEEL +0 -0

ragxo/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from .client import Ragxo, Document
-__all__ = ["Ragxo", "Document"]
+from .client import Ragxo, Document, EvaluationExample
+__all__ = ["Ragxo", "Document", "EvaluationExample"]

ragxo/client.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import time
-from typing import Self, Callable
+from typing import Literal, Self, Callable
 from pymilvus import MilvusClient
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 import boto3
 import dill
 import os
@@ -11,6 +11,7 @@ import tempfile
 from botocore.exceptions import ClientError
 import openai
 from openai import ChatCompletion
+from ragxo.utils import with_loading
 logger = logging.getLogger(__name__)
@@ -19,6 +20,14 @@ class Document(BaseModel):
     metadata: dict
     id: int
+class EvaluationExample(BaseModel):
+    query: str
+    expected: str
+class EvaluationResults(BaseModel):
+    results: list[str] = Field(description="A list of strings, each either 'correct' or 'incorrect'")
 class Ragxo:
     """
     A RAG (Retrieval-Augmented Generation) system that combines vector search with LLM responses.
@@ -51,6 +60,13 @@ class Ragxo:
         self.embedding_fn = None
         self.system_prompt = None
         self.model = "gpt-4o-mini"
+        self.limit = 10
+        self.temperature = 0.5
+        self.max_tokens = 2000
+        self.top_p = 1.0
+        self.frequency_penalty = 0.0
+        self.presence_penalty = 0.0
     def add_preprocess(self, fn: Callable) -> Self:
         """
@@ -140,6 +156,7 @@ class Ragxo:
         self.presence_penalty = presence_penalty
         return self
+    @with_loading("Indexing documents")
     def index(self, data: list[Document]) -> Self:
         """
         Index documents into the vector database.
@@ -210,6 +227,7 @@ class Ragxo:
             output_fields=output_fields
         )
+    @with_loading("Exporting Ragxo instance")
     def export(self, destination: str, s3_bucket: str = None) -> Self:
         """
         Export the Ragx instance to either local filesystem or S3.
@@ -310,7 +328,7 @@ class Ragxo:
             raise
     @classmethod
-    def _load_from_s3(cls, prefix: str, bucket: str) -> 'Ragx':
+    def _load_from_s3(cls, prefix: str, bucket: str) -> Self:
         """
         Internal classmethod to handle S3 loading.
         """
@@ -352,13 +370,17 @@ class Ragxo:
     def generate_llm_response(self,
                               query: str,
+                              history: list[dict] = [],
+                              messages: list[dict] = None,
                               data: list[dict] = None) -> ChatCompletion:
         """
         Generate LLM response based on query and retrieved data.
         Args:
-            query (str): User query
+            query (str): User query, this is used if messages is None
             data (list[dict], optional): Retrieved documents. If None, performs a new query
+            history (list[dict], optional): History of messages
+            messages (list[dict], optional): Messages to pass to the LLM: [{"role": "system", "content": system_prompt}, {"role": "user", "content": "Some user message"}, {"role": "assistant", "content": "Some assistant message"}]
         Returns:
             ChatCompletion: LLM response
@@ -375,9 +397,10 @@ class Ragxo:
         response = openai.chat.completions.create(
             model=self.model,
             messages=[
-                {"role": "system", "content": self.system_prompt},
-                {"role": "user", "content": "query: {} data: {}".format(query, data)}
-            ],
+                {"role": "system", "content": self.system_prompt}
+            ] + history + [
+                {"role": "user", "content": f"query: {query} data: {data}"}
+            ] if messages is None else messages,
             temperature=self.temperature,
             max_tokens=self.max_tokens,
             top_p=self.top_p,
@@ -385,4 +408,85 @@ class Ragxo:
             presence_penalty=self.presence_penalty,
         )
-        return response
+        return response
+    @with_loading("Evaluating test dataset")
+    def evaluate(self, test_data: list[EvaluationExample], batch_size: int = 10, judge_model: str = "gpt-4o-mini") -> float:
+        """
+        Evaluate the performance of the RAG system on a test dataset using a single prompt per batch.
+        For each batch:
+        1. Generates an answer for each query.
+        2. Concatenates evaluation details (query, expected, generated answer) into one prompt.
+        3. Instructs the judge to output a JSON object strictly adhering to our schema:
+            {"results": ["correct", "incorrect", ...]}.
+        4. Parses the structured output and computes overall accuracy.
+        Args:
+            test_data (list[EvaluationExample]): List of evaluation examples.
+            batch_size (int): Number of examples to process per batch.
+        Returns:
+            float: Accuracy as a fraction of correct evaluations.
+        """
+        total = len(test_data)
+        correct_count = 0
+        for i in range(0, total, batch_size):
+            batch = test_data[i : i + batch_size]
+            batch_prompt = "Evaluate the following examples and output your answer as a JSON object with a single key \"results\" that maps to an array of strings. Each element in the array should be either \"correct\" or \"incorrect\", corresponding to each example in order.\n\n"
+            # For each example in the batch, generate the answer and include details.
+            for idx, example in enumerate(batch):
+                query = example.query
+                expected = example.expected
+                # Generate the answer using the RAG system.
+                llm_response = self.generate_llm_response(query)
+                generated_answer = llm_response.choices[0].message.content.strip()
+                batch_prompt += f"Example {idx+1}:\n"
+                batch_prompt += f"Query: {query}\n"
+                batch_prompt += f"Expected Answer: {expected}\n"
+                batch_prompt += f"Generated Answer: {generated_answer}\n\n"
+            # Append clear instructions for the structured output.
+            batch_prompt += (
+                "Return your output as a JSON object exactly in this format: "
+                "{\"results\": [\"correct\", \"incorrect\", ...]} with no additional text or markdown formatting."
+            )
+            messages = [
+                {"role": "system", "content": "You are an expert evaluator. Evaluate whether each generated answer meets the expected answer."},
+                {"role": "user", "content": batch_prompt}
+            ]
+            # Call the OpenAI API with a structured response enforced via a JSON Schema.
+            response = openai.beta.chat.completions.parse(
+                model=judge_model,
+                messages=messages,
+                temperature=0,  # Deterministic output.
+                response_format=EvaluationResults
+            )
+            output_text = response.choices[0].message.content.strip()
+            try:
+                # Parse the JSON output using the Pydantic model.
+                eval_results = EvaluationResults.model_validate_json(output_text)
+            except Exception as e:
+                print(f"Error parsing JSON: {e}\nReceived output: {output_text}")
+                eval_results = None
+            if eval_results:
+                for result in eval_results.results:
+                    if result.lower() == "correct":
+                        correct_count += 1
+            else:
+                print("Skipping batch due to parsing error.")
+        accuracy = correct_count / total if total > 0 else 0.0
+        print(f"Accuracy: {accuracy * 100:.2f}% ({correct_count}/{total})")
+        return accuracy

ragxo/utils.py ADDED Viewed

@@ -0,0 +1,21 @@
+import functools
+from alive_progress import alive_bar
+def with_loading(title: str):
+    """
+    Decorator to add loading animation to methods.
+    Args:
+        title (str): Title to display during loading
+    """
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(self, *args, **kwargs):
+            with alive_bar(title=title, bar=None, stats=False, monitor=False, stats_end=False) as bar:
+                result = func(self, *args, **kwargs)
+                bar()
+            return result
+        return wrapper
+    return decorator

{ragxo-0.1.10.dist-info → ragxo-0.1.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ragxo
-Version: 0.1.10
+Version: 0.1.12
 Summary: A RAG (Retrieval-Augmented Generation) toolkit with Milvus integration
 Home-page: https://github.com/yourusername/ragx
 License: MIT
@@ -17,8 +17,10 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Dist: alive-progress (>=3.1.1,<4.0.0)
 Requires-Dist: boto3 (>=1.36.14,<2.0.0)
-Requires-Dist: dill (>=0.3.9,<0.4.0)
+Requires-Dist: datasets (>=3.2.0,<4.0.0)
+Requires-Dist: dill (<0.3.9)
 Requires-Dist: milvus (>=2.3.9,<3.0.0)
 Requires-Dist: mocker (>=1.1.1,<2.0.0)
 Requires-Dist: openai (>=1.61.1,<2.0.0)
@@ -30,12 +32,30 @@ Description-Content-Type: text/markdown
 # RagXO
-Export, version and reuse your RAG pipeline everywhere 🚀
+Export, version and reuse your E2E RAG pipeline everywhere 🚀
 [![PyPI version](https://badge.fury.io/py/ragxo.svg)](https://badge.fury.io/py/ragxo)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/release/python-380/)
+## Table of Contents
+- [Features](#features-)
+- [Installation](#installation-️)
+- [Quickstart](#quickstart-)
+  - [Build a RAG pipeline](#build-a-rag-pipeline)
+  - [Load a RAG pipeline](#load-a-rag-pipeline)
+- [Usage Guide](#usage-guide-)
+  - [Import](#import)
+  - [Adding Preprocessing Steps](#adding-preprocessing-steps)
+  - [Custom Embedding Functions](#custom-embedding-functions)
+  - [Creating Documents](#creating-documents)
+  - [LLM Configuration](#llm-configuration)
+  - [Export and Load](#export-and-load)
+  - [Evaluation](#evaluation)
+- [Best Practices](#best-practices-)
+- [License](#license-)
+- [Contributing](#contributing-)
 RagXO extends the capabilities of traditional RAG (Retrieval-Augmented Generation) systems by providing a unified way to package, version, and deploy your entire RAG pipeline with LLM integration. Export your complete system—including embedding functions, preprocessing steps, vector store, and LLM configurations—into a single, portable artifact.
 ## Features ✨
@@ -57,6 +77,10 @@ pip install ragxo
 ### Build a RAG pipeline
+```bash
+export OPENAI_API_KEY=<openai_key>
+```
 ```python
 from ragxo import Ragxo, Document
@@ -207,6 +231,46 @@ ragxo_client.export("rag_pipeline_v1")
 loaded_ragxo_client = Ragxo.load("rag_pipeline_v1")
 ```
+### Evaluation
+```python
+from ragxo import EvaluationExample
+# Create test examples
+test_data = [
+    EvaluationExample(
+        query="What is the capital of France?",
+        expected="The capital of France is Paris."
+    ),
+    EvaluationExample(
+        query="What is the capital of Germany?",
+        expected="The capital of Germany is Berlin."
+    ),
+]
+# Evaluate the RAG system
+accuracy = ragxo_client.evaluate(
+    test_data=test_data,
+    batch_size=10,  # Process 10 examples at a time
+    judge_model="gpt-4"  # Optional: specify a different model for evaluation
+)
+print(f"Evaluation accuracy: {accuracy * 100:.2f}%")
+```
+The evaluation process:
+1. Processes test examples in batches
+2. Generates RAG responses for each query
+3. Uses an LLM to compare generated answers with expected answers
+4. Returns accuracy score (0.0 to 1.0)
+Best practices for evaluation:
+- Use diverse test examples
+- Include edge cases
+- Keep expected answers consistent in format
+- Use a more capable model for evaluation (e.g., GPT-4)
+- Adjust batch size based on your rate limits and needs
 ## Best Practices 💡
 1. **Version Your Exports**: Use semantic versioning for your exports:
@@ -232,3 +296,4 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 ## Contributing 🤝
 Contributions are welcome! Please feel free to submit a Pull Request.

ragxo-0.1.12.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+ragxo/__init__.py,sha256=BAVy_mbqGOaAMmXpIqB94za5WgxuK9DAfd6BtJUsM_s,108
+ragxo/client.py,sha256=pX7v24Rw_MC6HInrxvSJUxNqFa1YdNjf8_-WySljP0o,17676
+ragxo/utils.py,sha256=BQ3u1oSi-kRqYTnpnJHq1KebuoVnA15u_5REVlYuM1o,569
+ragxo-0.1.12.dist-info/METADATA,sha256=3aw_8FCoQ86bl2KCt0CEZt39GCquCFxFoDyHOYJOEuk,8233
+ragxo-0.1.12.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+ragxo-0.1.12.dist-info/RECORD,,

ragxo-0.1.10.dist-info/RECORD DELETED Viewed

@@ -1,5 +0,0 @@
-ragxo/__init__.py,sha256=0VVe-z4XkkGQLQIG0hF0Hyf87_RgX0E4T9TRwwTkbmE,68
-ragxo/client.py,sha256=rmV01TZ3F8McLavVOokjLSUv78fdCA80oDRVWSw-38M,12791
-ragxo-0.1.10.dist-info/METADATA,sha256=uk-eth9MdnPkG2WeZU7WGr7cVa6HuPtTZFjTkFVSJ_A,6371
-ragxo-0.1.10.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-ragxo-0.1.10.dist-info/RECORD,,

{ragxo-0.1.10.dist-info → ragxo-0.1.12.dist-info}/WHEEL RENAMED Viewed

File without changes

ragxo 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

ragxo 0.1.10py3-none-any.whl → 0.1.12py3-none-any.whl