PyPI - eval-protocol - Versions diffs - 0.2.11.dev1__tar.gz → 0.2.98.dev1__tar.gz - Mend

eval-protocol 0.2.11.dev1tar.gz → 0.2.98.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (508) hide show

{eval_protocol-0.2.11.dev1 → eval_protocol-0.2.98.dev1}/LICENSE RENAMED Viewed

@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+SOFTWARE.

eval_protocol-0.2.98.dev1/PKG-INFO ADDED Viewed

@@ -0,0 +1,156 @@
+Metadata-Version: 2.4
+Name: eval-protocol
+Version: 0.2.98.dev1
+Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
+Author-email: Fireworks AI <info@fireworks.ai>
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/fireworks-ai/eval-protocol
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: requests>=2.25.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: dataclasses-json>=0.5.7
+Requires-Dist: uvicorn>=0.15.0
+Requires-Dist: python-dotenv>=0.19.0
+Requires-Dist: openai>=1.78.1
+Requires-Dist: aiosqlite
+Requires-Dist: aiohttp
+Requires-Dist: mcp>=1.9.2
+Requires-Dist: PyYAML>=5.0
+Requires-Dist: hydra-core>=1.3.2
+Requires-Dist: omegaconf>=2.3.0
+Requires-Dist: httpx>=0.24.0
+Requires-Dist: anthropic>=0.59.0
+Requires-Dist: litellm<1.75.0
+Requires-Dist: pytest>=6.0.0
+Requires-Dist: pytest-asyncio>=0.21.0
+Requires-Dist: peewee>=3.18.2
+Requires-Dist: backoff>=2.2.0
+Requires-Dist: questionary>=2.0.0
+Requires-Dist: toml>=0.10.0
+Requires-Dist: loguru>=0.6.0
+Requires-Dist: docstring-parser>=0.15
+Requires-Dist: rich>=12.0.0
+Requires-Dist: psutil>=6.0.0
+Requires-Dist: addict>=2.4.0
+Requires-Dist: deepdiff>=6.0.0
+Requires-Dist: websockets>=15.0.1
+Requires-Dist: fastapi>=0.116.1
+Provides-Extra: dev
+Requires-Dist: build; extra == "dev"
+Requires-Dist: twine; extra == "dev"
+Requires-Dist: pytest-httpserver; extra == "dev"
+Requires-Dist: werkzeug>=2.0.0; extra == "dev"
+Requires-Dist: ruff>=0.5.0; extra == "dev"
+Requires-Dist: transformers>=4.0.0; extra == "dev"
+Requires-Dist: pandas>=1.5.0; extra == "dev"
+Requires-Dist: types-setuptools; extra == "dev"
+Requires-Dist: types-requests; extra == "dev"
+Requires-Dist: types-PyYAML; extra == "dev"
+Requires-Dist: types-docker; extra == "dev"
+Requires-Dist: versioneer>=0.20; extra == "dev"
+Requires-Dist: openai>=1.78.1; extra == "dev"
+Requires-Dist: pre-commit; extra == "dev"
+Requires-Dist: e2b; extra == "dev"
+Requires-Dist: pytest-cov; extra == "dev"
+Requires-Dist: pytest-xdist; extra == "dev"
+Requires-Dist: docker==7.1.0; extra == "dev"
+Requires-Dist: ipykernel>=6.30.0; extra == "dev"
+Requires-Dist: jupyter>=1.1.1; extra == "dev"
+Requires-Dist: pip>=25.1.1; extra == "dev"
+Requires-Dist: haikus==0.3.8; extra == "dev"
+Requires-Dist: syrupy>=4.0.0; extra == "dev"
+Requires-Dist: gymnasium>=1.2.0; extra == "dev"
+Provides-Extra: trl
+Requires-Dist: torch>=1.9; extra == "trl"
+Requires-Dist: trl>=0.7.0; extra == "trl"
+Requires-Dist: peft>=0.7.0; extra == "trl"
+Requires-Dist: transformers>=4.0.0; extra == "trl"
+Requires-Dist: accelerate>=0.28.0; extra == "trl"
+Provides-Extra: openevals
+Requires-Dist: openevals>=0.1.0; extra == "openevals"
+Provides-Extra: fireworks
+Requires-Dist: fireworks-ai>=0.19.19; extra == "fireworks"
+Provides-Extra: box2d
+Requires-Dist: swig; extra == "box2d"
+Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
+Requires-Dist: Pillow; extra == "box2d"
+Provides-Extra: langfuse
+Requires-Dist: langfuse>=2.0.0; extra == "langfuse"
+Provides-Extra: huggingface
+Requires-Dist: datasets>=3.0.0; extra == "huggingface"
+Requires-Dist: transformers>=4.0.0; extra == "huggingface"
+Provides-Extra: langsmith
+Requires-Dist: langsmith>=0.1.86; extra == "langsmith"
+Provides-Extra: bigquery
+Requires-Dist: google-cloud-bigquery>=3.0.0; extra == "bigquery"
+Requires-Dist: google-auth>=2.0.0; extra == "bigquery"
+Provides-Extra: svgbench
+Requires-Dist: selenium>=4.0.0; extra == "svgbench"
+Provides-Extra: pydantic
+Requires-Dist: pydantic-ai>=1.0.2; extra == "pydantic"
+Provides-Extra: supabase
+Requires-Dist: supabase>=2.18.1; extra == "supabase"
+Provides-Extra: chinook
+Requires-Dist: psycopg2-binary>=2.9.10; extra == "chinook"
+Provides-Extra: langchain
+Requires-Dist: langchain-core>=0.3.0; extra == "langchain"
+Provides-Extra: braintrust
+Requires-Dist: braintrust[otel]; extra == "braintrust"
+Provides-Extra: openenv
+Requires-Dist: openenv-core; extra == "openenv"
+Provides-Extra: langgraph
+Requires-Dist: langgraph>=0.6.7; extra == "langgraph"
+Requires-Dist: langchain-core>=0.3.75; extra == "langgraph"
+Provides-Extra: langgraph-tools
+Requires-Dist: langgraph>=0.6.7; extra == "langgraph-tools"
+Requires-Dist: langchain>=0.3.0; extra == "langgraph-tools"
+Requires-Dist: langchain-fireworks>=0.3.0; extra == "langgraph-tools"
+Provides-Extra: proxy
+Requires-Dist: redis>=5.0.0; extra == "proxy"
+Requires-Dist: langfuse>=2.0.0; extra == "proxy"
+Requires-Dist: uuid6>=2025.0.0; extra == "proxy"
+Dynamic: license-file
+# Eval Protocol
+[![PyPI - Version](https://img.shields.io/pypi/v/eval-protocol)](https://pypi.org/project/eval-protocol/)
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/eval-protocol/python-sdk)
+**Eval Protocol (EP) is an open solution for doing reinforcement learning fine-tuning on existing agents — across any language, container, or framework.**
+![Eval Protocol overview](https://github.com/eval-protocol/python-sdk/raw/main/docs/intro.png)
+Most teams already have complex agents running in production — often across remote services with heavy dependencies, Docker containers, or TypeScript backends deployed on Vercel. When they try to train or fine-tune these agents with reinforcement learning, connecting them to a trainer quickly becomes painful.
+Eval Protocol makes this possible in two ways:
+1. **Expose your agent through a simple API**
+   Wrap your existing agent (Python, TypeScript, Docker, etc.) in a simple HTTP service using EP’s rollout interface. EP handles the rollout orchestration, metadata passing, and trace storage automatically.
+2. **Connect with any trainer**
+   Once your agent speaks the EP standard, it can be fine-tuned or evaluated with any supported trainer — Fireworks RFT, TRL, Unsloth, or your own — with no environment rewrites.
+The result: RL that works out-of-the-box for existing production agents.
+## Who This Is For
+- **Applied AI teams** adding RL to existing production agents.
+- **Research engineers** experimenting with fine-tuning complex, multi-turn or tool-using agents.
+- **MLOps teams** building reproducible, language-agnostic rollout pipelines.
+## Quickstart
+- See the Quickstart repository: [eval-protocol/quickstart](https://github.com/eval-protocol/quickstart/tree/main)
+## Resources
+- **[Documentation](https://evalprotocol.io)** – Guides and API reference
+- **[Discord](https://discord.com/channels/1137072072808472616/1400975572405850155)** – Community
+- **[GitHub](https://github.com/eval-protocol/python-sdk)** – Source and examples
+## License
+[MIT](LICENSE)

eval_protocol-0.2.98.dev1/README.md ADDED Viewed

@@ -0,0 +1,39 @@
+# Eval Protocol
+[![PyPI - Version](https://img.shields.io/pypi/v/eval-protocol)](https://pypi.org/project/eval-protocol/)
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/eval-protocol/python-sdk)
+**Eval Protocol (EP) is an open solution for doing reinforcement learning fine-tuning on existing agents — across any language, container, or framework.**
+![Eval Protocol overview](https://github.com/eval-protocol/python-sdk/raw/main/docs/intro.png)
+Most teams already have complex agents running in production — often across remote services with heavy dependencies, Docker containers, or TypeScript backends deployed on Vercel. When they try to train or fine-tune these agents with reinforcement learning, connecting them to a trainer quickly becomes painful.
+Eval Protocol makes this possible in two ways:
+1. **Expose your agent through a simple API**
+   Wrap your existing agent (Python, TypeScript, Docker, etc.) in a simple HTTP service using EP’s rollout interface. EP handles the rollout orchestration, metadata passing, and trace storage automatically.
+2. **Connect with any trainer**
+   Once your agent speaks the EP standard, it can be fine-tuned or evaluated with any supported trainer — Fireworks RFT, TRL, Unsloth, or your own — with no environment rewrites.
+The result: RL that works out-of-the-box for existing production agents.
+## Who This Is For
+- **Applied AI teams** adding RL to existing production agents.
+- **Research engineers** experimenting with fine-tuning complex, multi-turn or tool-using agents.
+- **MLOps teams** building reproducible, language-agnostic rollout pipelines.
+## Quickstart
+- See the Quickstart repository: [eval-protocol/quickstart](https://github.com/eval-protocol/quickstart/tree/main)
+## Resources
+- **[Documentation](https://evalprotocol.io)** – Guides and API reference
+- **[Discord](https://discord.com/channels/1137072072808472616/1400975572405850155)** – Community
+- **[GitHub](https://github.com/eval-protocol/python-sdk)** – Source and examples
+## License
+[MIT](LICENSE)

{eval_protocol-0.2.11.dev1 → eval_protocol-0.2.98.dev1}/development/normalize_sandbox_fusion.py RENAMED Viewed

@@ -56,7 +56,7 @@ OUTPUT_JSONL_FILE = "./development/CODING_DATASET.jsonl"
 try:
     repobench_p_tokenizer = AutoTokenizer.from_pretrained("gpt2")
 except OSError:
-    print("Warning: Could not load gpt2 tokenizer for Repobench-P. " "Falling back to basic split for token counting.")
+    print("Warning: Could not load gpt2 tokenizer for Repobench-P. Falling back to basic split for token counting.")
     repobench_p_tokenizer = None
@@ -108,8 +108,7 @@ def format_aider_prompt(problem_json: dict) -> str:
     """Format the prompt for Aider benchmark style problems."""
     question = problem_json.get("content", "")
     return (
-        f"{question}\n\nPlease generate the code in the following format:\n"
-        "```python\n# Your code response here\n```"
+        f"{question}\n\nPlease generate the code in the following format:\n```python\n# Your code response here\n```"
     )
@@ -327,7 +326,7 @@ def normalize_problem_to_openai_format(
             try:
                 labels = json.loads(labels_data)
             except json.JSONDecodeError:
-                print(f"Warning: Skipping ID {problem_id_str} in {filename} " "- malformed JSON in labels.")
+                print(f"Warning: Skipping ID {problem_id_str} in {filename} - malformed JSON in labels.")
                 return None
         elif isinstance(labels_data, dict):
             labels = labels_data
@@ -426,10 +425,10 @@ def normalize_problem_to_openai_format(
             )
             return None
         if not final_user_content.strip() or not final_assistant_content.strip():
-            print(f"Warning: Skipping ID {problem_id_str} in {filename} - " "empty processed content.")
+            print(f"Warning: Skipping ID {problem_id_str} in {filename} - empty processed content.")
             return None
         if final_assistant_content.strip() == "import sys; sys.exit(0)":
-            print(f"Warning: Skipping ID {problem_id_str} in {filename} - " "placeholder solution.")
+            print(f"Warning: Skipping ID {problem_id_str} in {filename} - placeholder solution.")
             return None
         return {
@@ -439,7 +438,7 @@ def normalize_problem_to_openai_format(
             ]
         }
     except Exception as e:
-        print(f"Warning: Skipping ID {problem_id_str} in {filename} - " f"error ({type(e).__name__}: {e}).")
+        print(f"Warning: Skipping ID {problem_id_str} in {filename} - error ({type(e).__name__}: {e}).")
         import traceback
         traceback.print_exc()
@@ -474,7 +473,7 @@ def main():
                 file_error_count += 1
                 continue
-            print(f"Processing file {filename_idx + 1}/{len(ALL_SOURCE_JSONL_FILES)}: " f"{filename}...")
+            print(f"Processing file {filename_idx + 1}/{len(ALL_SOURCE_JSONL_FILES)}: {filename}...")
             lines_in_file = 0
             processed_in_file = 0
             skipped_in_file = 0
@@ -488,7 +487,7 @@ def main():
                         try:
                             problem_data = json.loads(stripped_line)
                         except json.JSONDecodeError:
-                            print(f"Warning: Malformed JSON on line {line_number} " f"in {filepath}. Skipping line.")
+                            print(f"Warning: Malformed JSON on line {line_number} in {filepath}. Skipping line.")
                             skipped_in_file += 1
                             continue
@@ -507,7 +506,7 @@ def main():
                 processed_count += processed_in_file
                 skipped_count += skipped_in_file
             except Exception as e:
-                print(f"Error processing file {filepath}: {type(e).__name__}: {e}. " "Skipping rest of file.")
+                print(f"Error processing file {filepath}: {type(e).__name__}: {e}. Skipping rest of file.")
                 import traceback
                 traceback.print_exc()

{eval_protocol-0.2.11.dev1 → eval_protocol-0.2.98.dev1}/development/utils/subprocess_manager.py RENAMED Viewed

@@ -139,7 +139,7 @@ def start_ngrok_and_get_url(
         # Or by setting NGROK_AUTHTOKEN environment variable.
         # Forcing it via command line is also an option but less common for persistent setup.
         print(
-            f"Note: Ngrok authtoken should be pre-configured by the user (e.g., 'ngrok config add-authtoken <token>') or via NGROK_AUTHTOKEN env var."
+            "Note: Ngrok authtoken should be pre-configured by the user (e.g., 'ngrok config add-authtoken <token>') or via NGROK_AUTHTOKEN env var."
         )
         # Example if passing via env for the subprocess:
         # ngrok_env = os.environ.copy()

eval_protocol-0.2.98.dev1/eval_protocol/__init__.py ADDED Viewed

@@ -0,0 +1,178 @@
+"""
+Fireworks Eval Protocol - Simplify reward modeling and evaluation for LLM RL fine-tuning.
+A Python library for defining, testing, deploying, and using reward functions
+for LLM fine-tuning, including launching full RL jobs on the Fireworks platform.
+The library also provides an agent evaluation framework for testing and evaluating
+tool-augmented models using self-contained task bundles.
+"""
+import warnings
+from .auth import get_fireworks_account_id, get_fireworks_api_key
+from .common_utils import load_jsonl
+from .config import RewardKitConfig, get_config, load_config
+from .mcp_env import (
+    AnthropicPolicy,
+    FireworksPolicy,
+    LiteLLMPolicy,
+    OpenAIPolicy,
+    make,
+    rollout,
+    test_mcp,
+)
+from .data_loader import DynamicDataLoader, InlineDataLoader
+from . import mcp, rewards
+from .models import EvaluateResult, Message, MetricResult, EvaluationRow, InputMetadata, Status
+from .playback_policy import PlaybackPolicyBase
+from .resources import create_llm_resource
+from .reward_function import RewardFunction
+from .typed_interface import reward_function
+from .quickstart.aha_judge import aha_judge
+from .utils.evaluation_row_utils import (
+    multi_turn_assistant_to_ground_truth,
+    assistant_to_ground_truth,
+    filter_longest_conversation,
+)
+from .pytest import evaluation_test, SingleTurnRolloutProcessor, RemoteRolloutProcessor, GithubActionRolloutProcessor
+from .pytest.parameterize import DefaultParameterIdGenerator
+from .log_utils.elasticsearch_direct_http_handler import ElasticsearchDirectHttpHandler
+from .log_utils.rollout_id_filter import RolloutIdFilter
+from .log_utils.util import setup_rollout_logging_for_elasticsearch_handler
+from .log_utils.fireworks_tracing_http_handler import FireworksTracingHttpHandler
+from .log_utils.elasticsearch_client import ElasticsearchConfig
+from .types.remote_rollout_processor import (
+    InitRequest,
+    RolloutMetadata,
+    StatusResponse,
+    create_langfuse_config_tags,
+    DataLoaderConfig,
+)
+try:
+    from .adapters import OpenAIResponsesAdapter
+except ImportError:
+    OpenAIResponsesAdapter = None
+try:
+    from .adapters import LangfuseAdapter, create_langfuse_adapter
+except ImportError:
+    LangfuseAdapter = None
+try:
+    from .adapters import BraintrustAdapter, create_braintrust_adapter
+except ImportError:
+    BraintrustAdapter = None
+try:
+    from .adapters import LangSmithAdapter
+except ImportError:
+    LangSmithAdapter = None
+try:
+    from .adapters import WeaveAdapter
+except ImportError:
+    WeaveAdapter = None
+try:
+    from .proxy import create_app, AuthProvider, AccountInfo  # pyright: ignore[reportAssignmentType]
+except ImportError:
+    def create_app(*args, **kwargs):
+        raise ImportError(
+            "Proxy functionality requires additional dependencies. "
+            "Please install with: pip install eval-protocol[proxy]"
+        )
+    class AuthProvider:
+        def __init__(self, *args, **kwargs):
+            raise ImportError(
+                "Proxy functionality requires additional dependencies. "
+                "Please install with: pip install eval-protocol[proxy]"
+            )
+    class AccountInfo:
+        def __init__(self, *args, **kwargs):
+            raise ImportError(
+                "Proxy functionality requires additional dependencies. "
+                "Please install with: pip install eval-protocol[proxy]"
+            )
+warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
+__all__ = [
+    "ElasticsearchConfig",
+    "ElasticsearchDirectHttpHandler",
+    "RolloutIdFilter",
+    "setup_rollout_logging_for_elasticsearch_handler",
+    "DataLoaderConfig",
+    "Status",
+    "RemoteRolloutProcessor",
+    "GithubActionRolloutProcessor",
+    "InputMetadata",
+    "EvaluationRow",
+    "DefaultParameterIdGenerator",
+    "DynamicDataLoader",
+    "InlineDataLoader",
+    "aha_judge",
+    "multi_turn_assistant_to_ground_truth",
+    "assistant_to_ground_truth",
+    "filter_longest_conversation",
+    "evaluation_test",
+    "SingleTurnRolloutProcessor",
+    "OpenAIResponsesAdapter",
+    "LangfuseAdapter",
+    "create_langfuse_adapter",
+    "BraintrustAdapter",
+    "create_braintrust_adapter",
+    "LangSmithAdapter",
+    "FireworksTracingHttpHandler",
+    # Core interfaces
+    "Message",
+    "MetricResult",
+    "EvaluateResult",
+    "reward_function",
+    "RewardFunction",
+    # Authentication
+    "get_fireworks_api_key",
+    "get_fireworks_account_id",
+    # Configuration
+    "load_config",
+    "get_config",
+    "RewardKitConfig",
+    # Utilities
+    "load_jsonl",
+    # MCP Environment API
+    "make",
+    "rollout",
+    "LiteLLMPolicy",
+    "AnthropicPolicy",
+    "FireworksPolicy",
+    "OpenAIPolicy",
+    "test_mcp",
+    # Playback functionality
+    "PlaybackPolicyBase",
+    # Resource management
+    "create_llm_resource",
+    # Submodules
+    "rewards",
+    "mcp",
+    # Remote server types
+    "InitRequest",
+    "RolloutMetadata",
+    "StatusResponse",
+    "create_langfuse_config_tags",
+    # Proxy
+    "create_app",
+    "AuthProvider",
+    "AccountInfo",
+]
+from . import _version
+__version__ = _version.get_versions()["version"]

{eval_protocol-0.2.11.dev1 → eval_protocol-0.2.98.dev1}/eval_protocol/_version.py RENAMED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2025-08-15T00:15:02-0700",
+ "date": "2025-12-15T16:40:32-0800",
  "dirty": false,
  "error": null,
- "full-revisionid": "58d840995e6ca925da6fa17dc48b0b0d9ad9d2e8",
- "version": "0.2.11-dev1"
+ "full-revisionid": "438a49431d16626a8e883cfb04afecfb188eb9dc",
+ "version": "0.2.98.dev.1"
 }
 '''  # END VERSION_JSON

eval_protocol-0.2.98.dev1/eval_protocol/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,101 @@
+"""Data source adapters for Eval Protocol.
+This package provides adapters for integrating with various data sources
+and converting them to EvaluationRow format for use in evaluation pipelines.
+Available adapters:
+- BaseAdapter: Abstract base class for all adapters
+- LangfuseAdapter: Pull data from Langfuse deployments
+- FireworksTracingAdapter: Pull data from Langfuse via Fireworks tracing proxy
+- HuggingFaceAdapter: Load datasets from HuggingFace Hub
+- BigQueryAdapter: Query data from Google BigQuery
+- TRL integration (legacy)
+"""
+# Always available
+from .base import BaseAdapter
+__all__ = ["BaseAdapter"]
+# Conditional imports based on available dependencies
+try:
+    from .langfuse import LangfuseAdapter, create_langfuse_adapter
+    __all__.extend(["LangfuseAdapter", "create_langfuse_adapter"])
+except ImportError:
+    pass
+from .fireworks_tracing import FireworksTracingAdapter
+__all__.extend(["FireworksTracingAdapter"])
+try:
+    from .huggingface import (
+        HuggingFaceAdapter,
+        create_gsm8k_adapter,
+        create_huggingface_adapter,
+        create_math_adapter,
+    )
+    __all__.extend(
+        [
+            "HuggingFaceAdapter",
+            "create_huggingface_adapter",
+            "create_gsm8k_adapter",
+            "create_math_adapter",
+        ]
+    )
+except ImportError:
+    pass
+try:
+    from .bigquery import (
+        BigQueryAdapter,
+        create_bigquery_adapter,
+    )
+    __all__.extend(
+        [
+            "BigQueryAdapter",
+            "create_bigquery_adapter",
+        ]
+    )
+except ImportError:
+    pass
+try:
+    from .braintrust import BraintrustAdapter, create_braintrust_adapter
+    __all__.extend(["BraintrustAdapter", "create_braintrust_adapter"])
+except ImportError:
+    pass
+# Legacy adapters (always available)
+try:
+    from .trl import create_trl_adapter
+    __all__.extend(["create_trl_adapter"])
+except ImportError:
+    pass
+try:
+    from .openai_responses import OpenAIResponsesAdapter
+    __all__.extend(["OpenAIResponsesAdapter"])
+except ImportError:
+    pass
+try:
+    from .langsmith import LangSmithAdapter
+    __all__.extend(["LangSmithAdapter"])
+except ImportError:
+    pass
+try:
+    from .weave import WeaveAdapter
+    __all__.extend(["WeaveAdapter"])
+except ImportError:
+    pass

eval_protocol-0.2.98.dev1/eval_protocol/adapters/base.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""
+Base adapter interface for Eval Protocol.
+"""
+from abc import ABC, abstractmethod
+from typing import List
+from eval_protocol.models import EvaluationRow
+class BaseAdapter(ABC):
+    """Abstract base class for all Eval Protocol adapters."""
+    @abstractmethod
+    def get_evaluation_rows(self, *args, **kwargs) -> List[EvaluationRow]:
+        """Get evaluation rows from the data source."""
+        pass
+    def upload_scores(self, rows: List[EvaluationRow], model_name: str, mean_score: float) -> None:
+        """Upload evaluation scores back to the data source for tracking and analysis."""
+        pass
+    def upload_score(self, row: EvaluationRow, model_name: str) -> None:
+        """Upload evaluation score for a single row back to the data source."""
+        pass

eval-protocol 0.2.11.dev1__tar.gz → 0.2.98.dev1__tar.gz

eval-protocol 0.2.11.dev1tar.gz → 0.2.98.dev1tar.gz