PyPI - fiddler-evals - Versions diffs - 0.1.1.dev14__tar.gz → 0.2.0__tar.gz - Mend

fiddler-evals 0.1.1.dev14tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

{fiddler_evals-0.1.1.dev14/fiddler_evals.egg-info → fiddler_evals-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fiddler-evals
-Version: 0.1.1.dev14
+Version: 0.2.0
 Summary: Python SDK for evaluating LLM Applications
 Author-email: Fiddler AI <support@fiddler.ai>
 Maintainer-email: Fiddler AI <support@fiddler.ai>
@@ -15,7 +15,6 @@ Requires-Dist: requests<3
 Requires-Dist: pydantic>=2.0.0
 Requires-Dist: tqdm
 Requires-Dist: typing-extensions<5,>=4.6.0
-Requires-Dist: pandas>=1.2.5
 Requires-Dist: python-decouple
 Provides-Extra: pandas
 Requires-Dist: pandas>=1.2.5; extra == "pandas"
@@ -60,7 +59,7 @@ pip install --upgrade --pre fiddler-evals
 from fiddler_evals import init
 # Initialize connection
-init(url='https://your-org.fiddler.ai', token='your-api-token')
+init(url='https://your-instance.fiddler.ai', token='your-api-token')
 ```
 ### 2. Create Project Structure
@@ -101,19 +100,32 @@ dataset.insert(test_cases)
 ### 4. Use Built-in Evaluators
+**Configure LLM Gateway provider:**
+Add an LLM provider via UI (**Settings > LLM Gateway**) to use Fiddler's pre-build LLM-as-a-Judge evaluators.
+LLM-as-a-Judge evaluators require a `model` parameter in the format `{provider}/{model}` (e.g., `openai/gpt-4o`) and an optional `credential` parameter for LLM Gateway authentication.
 ```python
 from fiddler_evals.evaluators import (
     AnswerRelevance, Coherence, Conciseness,
-    Toxicity, Sentiment, RegexSearch
+    Sentiment, RegexSearch
 )
-# Test individual evaluators
-relevance_evaluator = AnswerRelevance()
+# Test LLM-as-a-Judge evaluators (require model parameter)
+relevance_evaluator = AnswerRelevance(
+    model="openai/gpt-4o",           # Required: LLM Gateway model in {provider}/{model} format
+    credential="my-openai-cred"      # Optional: LLM Gateway credential name
+)
 score = relevance_evaluator.score(
     prompt="What is the capital of France?",
     response="Paris is the capital of France."
 )
 print(f"Score: {score.value} - {score.reasoning}")
+# Test other evaluators (no model parameter needed)
+sentiment_evaluator = Sentiment()
+scores = sentiment_evaluator.score(text="This is a helpful response.")
+print("Sentiments:", [f'{score.name}: {score.value}' for score in scores])
 ```
 ### 5. Create Custom Evaluators
@@ -199,8 +211,8 @@ def contains_number_evaluator(output: str) -> float:
 # Use functions directly in evaluators list
 evaluators = [
-    AnswerRelevance(),
-    Conciseness(),
+    AnswerRelevance(model="openai/gpt-4o", credential="my-openai-cred"),
+    Conciseness(model="openai/gpt-4o", credential="my-openai-cred"),
     word_count_evaluator,        # Function evaluator
     contains_number_evaluator,   # Function evaluator
 ]
@@ -231,9 +243,19 @@ def my_llm_task(inputs: dict, extras: dict, metadata: dict) -> dict:
 # Set up evaluators with different configurations
 evaluators = [
-    # Primary evaluation metrics
-    AnswerRelevance(score_name_prefix="primary"),
-    Conciseness(score_name_prefix="primary"),
+    # LLM-as-a-Judge evaluators (require model parameter)
+    AnswerRelevance(
+        model="openai/gpt-4o",
+        credential="my-openai-cred",
+        score_name_prefix="primary"
+    ),
+    Conciseness(
+        model="openai/gpt-4o",
+        credential="my-openai-cred",
+        score_name_prefix="primary"
+    ),
+    # Other evaluators
     Sentiment(score_name_prefix="primary"),
     # Custom evaluators with specific mappings
@@ -245,13 +267,13 @@ evaluators = [
     # Multiple instances of same evaluator for different fields
     RegexSearch(
         pattern=r"\d+",
-        score_name_prefix="validation",
+        score_name_prefix="question",
         score_name="has_number",
         score_fn_kwargs_mapping={"output": "question"}
     ),
     RegexSearch(
         pattern=r"\d+",
-        score_name_prefix="validation",
+        score_name_prefix="answer",
         score_name="has_number",
         score_fn_kwargs_mapping={"output": "answer"}
     ),
@@ -277,21 +299,22 @@ print(f"Generated {sum(len(result.scores) for result in experiment_result.result
 # Results in organized score names:
 # "primary_answer_relevance", "primary_conciseness", "primary_sentiment",
-# "quality_politeness", "validation_has_number" (for question), "validation_has_number" (for answer)
+# "quality_politeness", "question_has_number", "answer_has_number"
 ```
 ## Built-in Evaluators
-| Evaluator | Purpose | Key Parameters |
-|-----------|---------|----------------|
-| `AnswerRelevance` | Checks if response addresses the question | `prompt`, `response` |
-| `Coherence` | Evaluates logical flow and consistency | `response`, `prompt` |
-| `Conciseness` | Measures response brevity and clarity | `response` |
-| `Toxicity` | Detects harmful or toxic content | `text` |
-| `Sentiment` | Analyzes emotional tone | `text` |
-| `RegexSearch` | Pattern matching for specific formats | `output`, `pattern` |
-| `FTLPromptSafety` | Compute safety scores for prompts | `text` |
-| `FTLResponseFaithfulness` | Evaluate faithfulness of LLM responses | `response`, `context` |
+| Evaluator | Purpose | Constructor Parameters | Score Parameters |
+|-----------|---------|------------------------|------------------|
+| `AnswerRelevance` | Checks if response addresses the question | `model` (required), `credential` (required) | `prompt`, `response` |
+| `Coherence` | Evaluates logical flow and consistency | `model` (required), `credential` (required) | `response`, `prompt` (optional) |
+| `Conciseness` | Measures response brevity and clarity | `model` (required), `credential` (required) | `response` |
+| `Sentiment` | Analyzes emotional tone | - | `text` |
+| `RegexSearch` | Pattern matching for specific formats | `pattern` (required) | `output` |
+| `FTLPromptSafety` | Compute safety scores for prompts | - | `text` |
+| `FTLResponseFaithfulness` | Evaluate faithfulness of LLM responses | - | `response`, `context` |
+**Note:** Evaluators marked with `model` and `credential` parameters are LLM-as-a-Judge evaluators that require an LLM Gateway model. The `model` parameter should be in `{provider}/{model}` format (e.g., `openai/gpt-4o`). The `credential` parameter is the name of the LLM Gateway credential for authentication.
 ## Data Import Options

{fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/PUBLIC.md RENAMED Viewed

@@ -38,7 +38,7 @@ pip install --upgrade --pre fiddler-evals
 from fiddler_evals import init
 # Initialize connection
-init(url='https://your-org.fiddler.ai', token='your-api-token')
+init(url='https://your-instance.fiddler.ai', token='your-api-token')
 ```
 ### 2. Create Project Structure
@@ -79,19 +79,32 @@ dataset.insert(test_cases)
 ### 4. Use Built-in Evaluators
+**Configure LLM Gateway provider:**
+Add an LLM provider via UI (**Settings > LLM Gateway**) to use Fiddler's pre-build LLM-as-a-Judge evaluators.
+LLM-as-a-Judge evaluators require a `model` parameter in the format `{provider}/{model}` (e.g., `openai/gpt-4o`) and an optional `credential` parameter for LLM Gateway authentication.
 ```python
 from fiddler_evals.evaluators import (
     AnswerRelevance, Coherence, Conciseness,
-    Toxicity, Sentiment, RegexSearch
+    Sentiment, RegexSearch
 )
-# Test individual evaluators
-relevance_evaluator = AnswerRelevance()
+# Test LLM-as-a-Judge evaluators (require model parameter)
+relevance_evaluator = AnswerRelevance(
+    model="openai/gpt-4o",           # Required: LLM Gateway model in {provider}/{model} format
+    credential="my-openai-cred"      # Optional: LLM Gateway credential name
+)
 score = relevance_evaluator.score(
     prompt="What is the capital of France?",
     response="Paris is the capital of France."
 )
 print(f"Score: {score.value} - {score.reasoning}")
+# Test other evaluators (no model parameter needed)
+sentiment_evaluator = Sentiment()
+scores = sentiment_evaluator.score(text="This is a helpful response.")
+print("Sentiments:", [f'{score.name}: {score.value}' for score in scores])
 ```
 ### 5. Create Custom Evaluators
@@ -177,8 +190,8 @@ def contains_number_evaluator(output: str) -> float:
 # Use functions directly in evaluators list
 evaluators = [
-    AnswerRelevance(),
-    Conciseness(),
+    AnswerRelevance(model="openai/gpt-4o", credential="my-openai-cred"),
+    Conciseness(model="openai/gpt-4o", credential="my-openai-cred"),
     word_count_evaluator,        # Function evaluator
     contains_number_evaluator,   # Function evaluator
 ]
@@ -209,9 +222,19 @@ def my_llm_task(inputs: dict, extras: dict, metadata: dict) -> dict:
 # Set up evaluators with different configurations
 evaluators = [
-    # Primary evaluation metrics
-    AnswerRelevance(score_name_prefix="primary"),
-    Conciseness(score_name_prefix="primary"),
+    # LLM-as-a-Judge evaluators (require model parameter)
+    AnswerRelevance(
+        model="openai/gpt-4o",
+        credential="my-openai-cred",
+        score_name_prefix="primary"
+    ),
+    Conciseness(
+        model="openai/gpt-4o",
+        credential="my-openai-cred",
+        score_name_prefix="primary"
+    ),
+    # Other evaluators
     Sentiment(score_name_prefix="primary"),
     # Custom evaluators with specific mappings
@@ -223,13 +246,13 @@ evaluators = [
     # Multiple instances of same evaluator for different fields
     RegexSearch(
         pattern=r"\d+",
-        score_name_prefix="validation",
+        score_name_prefix="question",
         score_name="has_number",
         score_fn_kwargs_mapping={"output": "question"}
     ),
     RegexSearch(
         pattern=r"\d+",
-        score_name_prefix="validation",
+        score_name_prefix="answer",
         score_name="has_number",
         score_fn_kwargs_mapping={"output": "answer"}
     ),
@@ -255,21 +278,22 @@ print(f"Generated {sum(len(result.scores) for result in experiment_result.result
 # Results in organized score names:
 # "primary_answer_relevance", "primary_conciseness", "primary_sentiment",
-# "quality_politeness", "validation_has_number" (for question), "validation_has_number" (for answer)
+# "quality_politeness", "question_has_number", "answer_has_number"
 ```
 ## Built-in Evaluators
-| Evaluator | Purpose | Key Parameters |
-|-----------|---------|----------------|
-| `AnswerRelevance` | Checks if response addresses the question | `prompt`, `response` |
-| `Coherence` | Evaluates logical flow and consistency | `response`, `prompt` |
-| `Conciseness` | Measures response brevity and clarity | `response` |
-| `Toxicity` | Detects harmful or toxic content | `text` |
-| `Sentiment` | Analyzes emotional tone | `text` |
-| `RegexSearch` | Pattern matching for specific formats | `output`, `pattern` |
-| `FTLPromptSafety` | Compute safety scores for prompts | `text` |
-| `FTLResponseFaithfulness` | Evaluate faithfulness of LLM responses | `response`, `context` |
+| Evaluator | Purpose | Constructor Parameters | Score Parameters |
+|-----------|---------|------------------------|------------------|
+| `AnswerRelevance` | Checks if response addresses the question | `model` (required), `credential` (required) | `prompt`, `response` |
+| `Coherence` | Evaluates logical flow and consistency | `model` (required), `credential` (required) | `response`, `prompt` (optional) |
+| `Conciseness` | Measures response brevity and clarity | `model` (required), `credential` (required) | `response` |
+| `Sentiment` | Analyzes emotional tone | - | `text` |
+| `RegexSearch` | Pattern matching for specific formats | `pattern` (required) | `output` |
+| `FTLPromptSafety` | Compute safety scores for prompts | - | `text` |
+| `FTLResponseFaithfulness` | Evaluate faithfulness of LLM responses | - | `response`, `context` |
+**Note:** Evaluators marked with `model` and `credential` parameters are LLM-as-a-Judge evaluators that require an LLM Gateway model. The `model` parameter should be in `{provider}/{model}` format (e.g., `openai/gpt-4o`). The `credential` parameter is the name of the LLM Gateway credential for authentication.
 ## Data Import Options

fiddler_evals-0.2.0/fiddler_evals/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.2.0

{fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/__init__.py RENAMED Viewed

@@ -20,7 +20,6 @@ from fiddler_evals.evaluators import (
     RegexSearch,
     Sentiment,
     TopicClassification,
-    Toxicity,
 )
 from fiddler_evals.evaluators.base import Evaluator
 from fiddler_evals.evaluators.eval_fn import EvalFn
@@ -55,7 +54,6 @@ __all__ = [
     "AnswerRelevance",
     "Coherence",
     "Conciseness",
-    "Toxicity",
     "Sentiment",
     "RegexSearch",
     "RegexMatch",

{fiddler_evals-0.1.1.dev14 → fiddler_evals-0.2.0}/fiddler_evals/connection.py RENAMED Viewed

@@ -29,63 +29,30 @@ class Connection:
     managing connection parameters, authentication tokens, and ensuring proper
     communication protocols are established.
-    Attributes
-    ----------
-    url : str
-        Base URL of the Fiddler platform instance
-    token : str
-        Authentication token for API access
-    proxies : dict, optional
-        Optional proxy configuration for HTTP requests
-    timeout : float or tuple, optional
-        HTTP request timeout settings
-    verify : bool
-        Whether to verify SSL/TLS certificates
-    request_headers : dict
-        HTTP headers including authentication and client info
-    client : RequestClient
-        Cached HTTP client instance for making requests
-    server_info : ServerInfo
-        Cached server information and metadata
-    server_version : VersionInfo
-        Version of the connected Fiddler server
-    organization_name : str
-        Name of the connected organization
-    organization_id : UUID
-        UUID of the connected organization
-    Examples
-    --------
-    Creating a basic connection:
-    .. code-block:: python
-        connection = Connection(
-            url="https://your-fiddler-instance.com",
-            token="your-auth-token"
-        )
-    Creating a connection with custom timeout and proxy:
-    .. code-block:: python
-        connection = Connection(
-            url="https://your-fiddler-instance.com",
-            token="your-auth-token",
-            timeout=(5.0, 30.0),  # (connect_timeout, read_timeout)
-            proxies={"https": "https://proxy.company.com:8080"}
-        )
-    Creating a connection without SSL verification:
-    .. code-block:: python
-        connection = Connection(
-            url="https://your-fiddler-instance.com",
-            token="your-auth-token",
-            verify=False,  # Not recommended for production
-            validate=False  # Skip version compatibility check
-        )
+    Example:
+        .. code-block:: python
+            # Creating a basic connection
+            connection = Connection(
+                url="https://your-instance.fiddler.ai",
+                token="your-auth-token"
+            )
+            # Creating a connection with custom timeout and proxy
+            connection = Connection(
+                url="https://your-instance.fiddler.ai",
+                token="your-auth-token",
+                timeout=(5.0, 30.0),  # (connect_timeout, read_timeout)
+                proxies={"https": "https://proxy.company.com:8080"}
+            )
+            # Creating a connection without SSL verification
+            connection = Connection(
+                url="https://your-instance.fiddler.ai",
+                token="your-auth-token",
+                verify=False,  # Not recommended for production
+                validate=False  # Skip version compatibility check
+            )
     """
     def __init__(  # pylint: disable=too-many-arguments
@@ -99,27 +66,17 @@ class Connection:
     ) -> None:
         """Initialize a connection to the Fiddler platform.
-        Parameters
-        ----------
-        url : str
-            The base URL to your Fiddler platform instance
-        token : str
-            Authentication token obtained from the Fiddler UI
-        proxies : dict, optional
-            Dictionary mapping protocol to proxy URL for HTTP requests
-        timeout : float or tuple, optional
-            HTTP request timeout settings (float or tuple of connect/read timeouts)
-        verify : bool, default True
-            Whether to verify server's TLS certificate
-        validate : bool, default True
-            Whether to validate server/client version compatibility
-        Raises
-        ------
-        ValueError
-            If url or token parameters are empty
-        IncompatibleClient
-            If server version is incompatible with client version
+        Args:
+            url: The base URL to your Fiddler platform instance
+            token: Authentication token obtained from the Fiddler UI
+            proxies: Dictionary mapping protocol to proxy URL for HTTP requests
+            timeout: HTTP request timeout settings (float or tuple of connect/read timeouts)
+            verify: Whether to verify server's TLS certificate (default: True)
+            validate: Whether to validate server/client version compatibility (default: True)
+        Raises:
+            ValueError: If url or token parameters are empty
+            IncompatibleClient: If server version is incompatible with client version
         """
         self.url = url
@@ -363,30 +320,36 @@ def init(  # pylint: disable=too-many-arguments
     Examples:
         Basic initialization:
-        import fiddler as fdl
+        .. code-block:: python
-        fdl.init(
-            url="https://your-fiddler-instance.com",
-            token="your-auth-token"
-        )
+            import fiddler as fdl
+            fdl.init(
+                url="https://your-instance.fiddler.ai",
+                token="your-auth-token"
+            )
         Initialization with custom timeout and proxy:
-        fdl.init(
-            url="https://your-fiddler-instance.com",
-            token="your-auth-token",
-            timeout=(10.0, 60.0),  # 10s connect, 60s read timeout
-            proxies={"https": "https://proxy.company.com:8080"}
-        )
+        .. code-block:: python
+            fdl.init(
+                url="https://your-instance.fiddler.ai",
+                token="your-auth-token",
+                timeout=(10.0, 60.0),  # 10s connect, 60s read timeout
+                proxies={"https": "https://proxy.company.com:8080"}
+            )
         Initialization for development with relaxed settings:
-        fdl.init(
-            url="https://dev-fiddler-instance.com",
-            token="dev-token",
-            verify=False,  # Skip SSL verification
-            validate=False,  # Skip version compatibility check
-        )
+        .. code-block:: python
+            fdl.init(
+                url="https://your-instance.fiddler.ai",
+                token="dev-token",
+                verify=False,  # Skip SSL verification
+                validate=False,  # Skip version compatibility check
+            )

fiddler-evals 0.1.1.dev14__tar.gz → 0.2.0__tar.gz

fiddler-evals 0.1.1.dev14tar.gz → 0.2.0tar.gz