PyPI - rakam-eval-sdk - Versions diffs - 0.1.15__py3-none-any.whl - Mend

rakam-eval-sdk 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

rakam_eval_sdk/__init__.py +0 -0
rakam_eval_sdk/client.py +202 -0
rakam_eval_sdk/schema.py +128 -0
rakam_eval_sdk-0.1.15.dist-info/METADATA +165 -0
rakam_eval_sdk-0.1.15.dist-info/RECORD +6 -0
rakam_eval_sdk-0.1.15.dist-info/WHEEL +4 -0

rakam_eval_sdk/__init__.py ADDED Viewed

File without changes

rakam_eval_sdk/client.py ADDED Viewed

@@ -0,0 +1,202 @@
+import os
+import random
+from typing import Any, List, Optional, cast
+import requests
+from .schema import (
+    EvalConfig,
+    MetricConfig,
+    SchemaEvalConfig,
+    SchemaInputItem,
+    SchemaMetricConfig,
+    TextInputItem,
+)
+class DeepEvalClient:
+    """
+    Client for interacting with the DeepEval API.
+    Provides synchronous and background evaluation with optional probability-based execution.
+    """
+    def __init__(
+        self,
+        base_url: Optional[str] = None,
+        api_token: Optional[str] = None,
+        settings_module: Optional[Any] = None,  # optional external settings
+        timeout: int = 30,
+    ):
+        settings_url = getattr(settings_module, "EVALFRAMWORK_URL", None)
+        settings_token = getattr(settings_module, "EVALFRAMWORK_API_KEY", None)
+        raw_url = (
+            base_url
+            or settings_url
+            or os.getenv("EVALFRAMWORK_URL")
+            or "http://localhost:8080"
+        )
+        self.base_url = raw_url.rstrip("/")
+        self.api_token = (
+            api_token or settings_token or os.getenv("EVALFRAMWORK_API_KEY", "")
+        )
+        self.timeout = timeout
+    def _request(
+        self,
+        endpoint: str,
+        payload: dict,
+        raise_exception: bool = False,
+    ) -> Optional[dict]:
+        """Internal helper to send POST requests with standard headers and error handling."""
+        url = f"{self.base_url}{endpoint}"
+        headers = {
+            "accept": "application/json",
+            "Content-Type": "application/json",
+            "X-API-Token": self.api_token,
+        }
+        try:
+            resp = requests.post(
+                url, headers=headers, json=payload, timeout=self.timeout
+            )
+            if raise_exception:
+                resp.raise_for_status()
+        except requests.RequestException as e:
+            if raise_exception:
+                raise
+            return {"error": str(e)}
+        try:
+            return cast(dict, resp.json())
+        except ValueError:
+            if raise_exception:
+                raise
+            return {"error": "Invalid JSON response", "raw": resp.text}
+    def text_eval(
+        self,
+        data: List[TextInputItem],
+        metrics: List[MetricConfig],
+        raise_exception: bool = False,
+        component: str = "unknown",
+    ) -> Optional[dict]:
+        """Run synchronous text evaluation."""
+        payload = EvalConfig.model_construct(
+            data=data, metrics=metrics, component=component
+        ).model_dump()
+        return self._request("/deepeval/text-eval", payload, raise_exception)
+    def text_eval_background(
+        self,
+        data: List[TextInputItem],
+        metrics: List[MetricConfig],
+        raise_exception: bool = False,
+        component: str = "unknown",
+    ) -> Optional[dict]:
+        """Run background text evaluation (async job)."""
+        payload = EvalConfig.model_construct(
+            data=data, metrics=metrics, component=component
+        ).model_dump()
+        return self._request("/deepeval/text-eval/background", payload, raise_exception)
+    def schema_eval(
+        self,
+        data: List[SchemaInputItem],
+        metrics: List[SchemaMetricConfig],
+        raise_exception: bool = False,
+        component: str = "unknown",
+    ) -> Optional[dict]:
+        """Run synchronous schema evaluation."""
+        payload = SchemaEvalConfig.model_construct(
+            data=data, metrics=metrics, component=component
+        ).model_dump()
+        return self._request("/deepeval/schema-eval", payload, raise_exception)
+    def schema_eval_background(
+        self,
+        data: List[SchemaInputItem],
+        metrics: List[SchemaMetricConfig],
+        raise_exception: bool = False,
+        component: str = "unknown",
+    ) -> Optional[dict]:
+        """Run background schema evaluation (async job)."""
+        payload = SchemaEvalConfig.model_construct(
+            data=data, metrics=metrics, component=component
+        ).model_dump()
+        return self._request(
+            "/deepeval/schema-eval/background", payload, raise_exception
+        )
+    def maybe_text_eval(
+        self,
+        data: List[TextInputItem],
+        metrics: List[MetricConfig],
+        chance: float,
+        raise_exception: bool = False,
+        component: str = "unknown",
+    ) -> Optional[dict]:
+        """Randomly run text_eval based on a probability between 0 and 1."""
+        self._validate_chance(chance)
+        return (
+            self.text_eval(data, metrics, raise_exception, component=component)
+            if random.random() <= chance
+            else None
+        )
+    def maybe_text_eval_background(
+        self,
+        data: List[TextInputItem],
+        metrics: List[MetricConfig],
+        chance: float,
+        raise_exception: bool = False,
+        component: str = "unknown",
+    ) -> Optional[dict]:
+        """Randomly run text_eval_background based on a probability between 0 and 1."""
+        self._validate_chance(chance)
+        return (
+            self.text_eval_background(
+                data, metrics, raise_exception, component=component
+            )
+            if random.random() <= chance
+            else None
+        )
+    def maybe_schema_eval(
+        self,
+        data: List[SchemaInputItem],
+        metrics: List[SchemaMetricConfig],
+        chance: float,
+        raise_exception: bool = False,
+        component: str = "unknown",
+    ) -> Optional[dict]:
+        """Randomly run schema_eval based on a probability between 0 and 1."""
+        self._validate_chance(chance)
+        return (
+            self.schema_eval(data, metrics, raise_exception, component=component)
+            if random.random() <= chance
+            else None
+        )
+    def maybe_schema_eval_background(
+        self,
+        data: List[SchemaInputItem],
+        metrics: List[SchemaMetricConfig],
+        chance: float,
+        raise_exception: bool = False,
+        component: str = "unknown",
+    ) -> Optional[dict]:
+        """Randomly run text_eval_background based on a probability between 0 and 1."""
+        self._validate_chance(chance)
+        return (
+            self.schema_eval_background(
+                data, metrics, raise_exception, component=component
+            )
+            if random.random() <= chance
+            else None
+        )
+    @staticmethod
+    def _validate_chance(chance: float) -> None:
+        """Ensure chance is a valid probability between 0 and 1."""
+        if not (0 <= chance <= 1):
+            raise ValueError("chance must be between 0 and 1.")

rakam_eval_sdk/schema.py ADDED Viewed

@@ -0,0 +1,128 @@
+# Common base class for all metric configs
+import sys
+from typing import Annotated, Any, Dict, List, Literal, Optional, Union
+# Base class (you can keep this abstract)
+from pydantic import BaseModel, Field
+if sys.version_info < (3, 9):
+    from typing_extensions import Annotated
+else:
+    from typing import Annotated
+class MetricConfigBase(BaseModel):
+    type: str
+    name: Optional[str] = None
+class ClientSideMetricConfig(BaseModel):
+    name: str
+    score: float
+    success: Optional[int] = 1
+    evaluation_cost: Optional[float] = 0
+    reason: Optional[str] = None
+    threshold: Optional[float] = 0
+class OCRSimilarityConfig(MetricConfigBase):
+    type: Literal["ocr_similarity"] = "ocr_similarity"
+    threshold: float = 0.5
+class CorrectnessConfig(MetricConfigBase):
+    type: Literal["correctness"] = "correctness"
+    model: str = "gpt-4.1"
+    steps: List[str] = Field(
+        default=[
+            "Check if the OCR model extracted the important information correctly. "
+            "Minor formatting differences like '$1,250.00' vs '$1250.00' are acceptable."
+        ]
+    )
+    criteria: Optional[str] = None,
+    params: List[Literal["actual_output", "expected_output"]] = Field(
+        default=["actual_output", "expected_output"]
+    )
+class AnswerRelevancyConfig(MetricConfigBase):
+    type: Literal["answer_relevancy"] = "answer_relevancy"
+    threshold: float = 0.7
+    model: str = "gpt-4.1"
+    include_reason: bool = True
+class FaithfulnessConfig(MetricConfigBase):
+    type: Literal["faithfulness"] = "faithfulness"
+    threshold: float = 0.7
+    model: str = "gpt-4.1"
+    include_reason: bool = True
+class ToxicityConfig(MetricConfigBase):
+    type: Literal["toxicity"] = "toxicity"
+    threshold: float = 0.5
+    model: str = "gpt-4.1"
+    include_reason: bool = True
+class JsonCorrectnessConfig(MetricConfigBase):
+    type: Literal["json_correctness"] = "json_correctness"
+    threshold: float = 0.5
+    model: str = "gpt-4.1"
+    include_reason: bool = True
+    excpected_schema: Dict[str, Any]
+class FieldsPresenceConfig(MetricConfigBase):
+    type: Literal["fields_presence"] = "fields_presence"
+    excpected_schema: Dict[str, Any]
+    threshold: float = 0.5
+    include_reason: bool = True
+    strict_mode: bool = True
+MetricConfig = Annotated[
+    Union[
+        OCRSimilarityConfig,
+        CorrectnessConfig,
+        AnswerRelevancyConfig,
+        FaithfulnessConfig,
+        ToxicityConfig,
+    ],
+    Field(discriminator="type"),
+]
+SchemaMetricConfig = Annotated[
+    Union[JsonCorrectnessConfig, FieldsPresenceConfig], Field(
+        discriminator="type")
+]
+class InputItem(BaseModel):
+    id: Optional[str] = None  # set to optional to keep backward compatibility
+    input: str
+    output: str
+    metrics: Optional[List[ClientSideMetricConfig]] = []
+class TextInputItem(InputItem):
+    expected_output: Optional[str] = None
+    retrieval_context: Optional[list[str]] = None
+class SchemaInputItem(InputItem):
+    expected_output: Optional[str] = None
+    # retrieval_context: list[Json[Any]] = None
+class EvalConfig(BaseModel):
+    component: str = "unknown"
+    data: List[TextInputItem]
+    metrics: List[MetricConfig] = Field(default_factory=list)
+class SchemaEvalConfig(BaseModel):
+    component: str = "unknown"
+    data: List[SchemaInputItem]
+    metrics: List[SchemaMetricConfig] = Field(default_factory=list)

rakam_eval_sdk-0.1.15.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,165 @@
+Metadata-Version: 2.3
+Name: rakam-eval-sdk
+Version: 0.1.15
+Summary: Evaluation Framework SDK
+Author: Mohamed Bachar Touil
+License: MIT
+Requires-Dist: pydantic>=2.10.6
+Requires-Dist: requests
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+# DeepEvalClient
+A lightweight Python client for interacting with the **Evaluation API**.
+It provides convenient wrappers for text and schema evaluation endpoints, with support for background jobs and probabilistic execution.
+---
+## Features
+- 🔹 **Text Evaluation** – Run evaluations on plain text inputs.
+- 🔹 **Schema Evaluation** – Evaluate structured inputs against schema-based metrics.
+- 🔹 **Background Jobs** – Submit jobs asynchronously and process later.
+- 🔹 **Probabilistic Execution** – Run evaluations with a configurable chance (e.g., A/B testing scenarios).
+- 🔹 **Robust Error Handling** – Handles network errors and invalid JSON gracefully.
+- 🔹 **Configurable** – Configure via constructor args, environment variables, or external settings module.
+---
+## Installation
+```bash
+pip install rakam-eval-sdk
+```
+Usage
+1. Basic Setup
+```python
+from deepeval.client import DeepEvalClient
+from deepeval.schema import TextInputItem, MetricConfig
+client = DeepEvalClient(
+    base_url="http://localhost:8080",
+    api_token="your-api-key"
+)
+```
+2. Text Evaluation
+```python
+    client.maybe_text_eval_background(
+                component="ocr",
+                data=[
+                    TextInputItem(
+                        id="runtime evaluation", # identifiar (that can be unique). use same id in case you want to follow performance over time
+                        input="...", # input given to ai component
+                        output="...", # output of the ai component
+                        # optional args/ condtional based on metrics passed
+                        expected_output=["..."],
+                        retrieval_context=[
+                            ["..."]
+                        ]
+                    )
+                ],
+                metrics=[
+                    ToxicityConfig(
+                        # model="gpt-4.1",
+                        threshold=0.2,
+                        include_reason=False
+                    ),
+                    CorrectnessConfig(
+                        steps=[
+                            "You are evaluating text extracted from resumes and job descriptions using OCR.",
+                            "1. Verify that the extracted text is coherent and free of major corruption (e.g., broken words, random characters).",
+                            "2. Check whether key resume/job-related fields are preserved correctly (e.g., name, job title, skills, education, experience, company name, job requirements).",
+                            "3. Ensure that important details are not missing or replaced with irrelevant content.",
+                            "4. Ignore minor formatting issues (line breaks, spacing) as long as the information is readable and accurate.",
+                            "5. Consider the output correct if it faithfully represents the resume or job description’s main information."
+                        ],
+                        params=["actual_output"],
+                    )
+                ],
+                chance=.3
+            )
+```
+3. Schema Evaluation
+```python
+    client.maybe_text_eval_background(
+                component="ocr",
+                data=[
+                    TextInputItem(
+                        id="runtime evaluation", # identifiar (that can be unique). use same id in case you want to follow performance over time
+                        input="...", # input given to ai component
+                        output="...", # output of the ai component
+                        # optional args/ condtional based on metrics passed
+                        expected_output=["..."],
+                        retrieval_context=[
+                            ["..."]
+                        ]
+                    )
+                ],
+                metrics=[
+                    ToxicityConfig(
+                        # model="gpt-4.1",
+                        threshold=0.2,
+                        include_reason=False
+                    ),
+                    CorrectnessConfig(
+                        steps=[
+                            "You are evaluating text extracted from resumes and job descriptions using OCR.",
+                            "1. Verify that the extracted text is coherent and free of major corruption (e.g., broken words, random characters).",
+                            "2. Check whether key resume/job-related fields are preserved correctly (e.g., name, job title, skills, education, experience, company name, job requirements).",
+                            "3. Ensure that important details are not missing or replaced with irrelevant content.",
+                            "4. Ignore minor formatting issues (line breaks, spacing) as long as the information is readable and accurate.",
+                            "5. Consider the output correct if it faithfully represents the resume or job description’s main information."
+                        ],
+                        params=["actual_output"],
+                    )
+                ],
+                chance=.3
+            )
+```
+## Configuration
+The client can be configured in multiple ways:
+### Directly via constructor arguments
+```python
+DeepEvalClient(base_url="http://api", api_token="123")
+```
+### Environment variables
+```bash
+export EVALFRAMWORK_URL=http://api
+export EVALFRAMWORK_API_KEY=123
+```
+### Settings module
+```python
+import settings # it can be django settings e.g.: from django.conf import settings
+client = DeepEvalClient(settings_module=settings)
+```
+<!-- uv publish --index testpypi
+twine upload --repository testpypi dist/\*
+uv add twine build --dev
+uv build -->

rakam_eval_sdk-0.1.15.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+rakam_eval_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+rakam_eval_sdk/client.py,sha256=EdYA8SFoq6PhO6JNxu_j2eJSd3g4I0rtUtGJmGgvfzA,6583
+rakam_eval_sdk/schema.py,sha256=FaY7nlcbzlFhH7lZl9iFfJ6T0wGVte7TYbt-w_wpFuI,3400
+rakam_eval_sdk-0.1.15.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
+rakam_eval_sdk-0.1.15.dist-info/METADATA,sha256=PhyFhXFiTeCt2KK_kBjGGXDI69q8qFmyg-aEiKh16OQ,5930
+rakam_eval_sdk-0.1.15.dist-info/RECORD,,

rakam_eval_sdk-0.1.15.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: uv 0.8.24
+Root-Is-Purelib: true
+Tag: py3-none-any