PyPI - scorebook - Versions diffs - 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl - Mend

scorebook 0.0.9py3-none-any.whl → 0.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

scorebook/__init__.py +14 -6
scorebook/cli/auth.py +1 -1
scorebook/eval_datasets/__init__.py +5 -0
scorebook/eval_datasets/eval_dataset.py +719 -0
scorebook/evaluate/__init__.py +15 -0
scorebook/evaluate/_async/__init__.py +0 -0
scorebook/evaluate/_async/evaluate_async.py +443 -0
scorebook/evaluate/_sync/__init__.py +0 -0
scorebook/evaluate/_sync/evaluate.py +443 -0
scorebook/evaluate/evaluate_helpers.py +388 -0
scorebook/exceptions.py +48 -0
scorebook/inference/__init__.py +4 -0
scorebook/inference/clients/__init__.py +8 -0
scorebook/inference/{bedrock.py → clients/bedrock.py} +1 -1
scorebook/inference/{openai.py → clients/openai.py} +35 -23
scorebook/inference/{portkey.py → clients/portkey.py} +1 -1
scorebook/inference/{vertex.py → clients/vertex.py} +1 -1
scorebook/{inference_pipeline.py → inference/inference_pipeline.py} +66 -4
scorebook/settings.py +21 -0
scorebook/trismik/__init__.py +10 -0
scorebook/types.py +8 -5
scorebook/utils/__init__.py +11 -4
scorebook/utils/async_utils.py +20 -1
scorebook/utils/io_helpers.py +18 -5
scorebook/utils/progress_bars.py +739 -96
scorebook/utils/{build_prompt.py → render_template.py} +13 -12
{scorebook-0.0.9.dist-info → scorebook-0.0.11.dist-info}/METADATA +4 -4
scorebook-0.0.11.dist-info/RECORD +42 -0
scorebook/eval_dataset.py +0 -404
scorebook/evaluate.py +0 -623
scorebook/trismik_services/__init__.py +0 -6
scorebook/trismik_services/adaptive_testing_service.py +0 -141
scorebook/trismik_services/upload_classic_eval_run.py +0 -102
scorebook-0.0.9.dist-info/RECORD +0 -36
/scorebook/{trismik_services/login.py → trismik/credentials.py} +0 -0
{scorebook-0.0.9.dist-info → scorebook-0.0.11.dist-info}/WHEEL +0 -0
{scorebook-0.0.9.dist-info → scorebook-0.0.11.dist-info}/entry_points.txt +0 -0
{scorebook-0.0.9.dist-info → scorebook-0.0.11.dist-info}/licenses/LICENSE +0 -0

scorebook/{inference_pipeline.py → inference/inference_pipeline.py} RENAMED Viewed

@@ -9,6 +9,8 @@ configurable way.
 import asyncio
 from typing import Any, Callable, Dict, List, Optional, cast
+from scorebook.utils import is_awaitable
 class InferencePipeline:
     """A pipeline for processing items through model inference.
@@ -18,6 +20,8 @@ class InferencePipeline:
     2. Model inference
     3. Postprocessing of model outputs
+    The pipeline automatically adapts to sync or async execution based on the
+    inference function provided during initialization.
     Attributes:
         model: Name or identifier of the model being used
@@ -35,6 +39,9 @@ class InferencePipeline:
     ) -> None:
         """Initialize the inference pipeline.
+        The pipeline will automatically become sync or async based on the
+        inference_function provided.
         Args:
             model: Name or identifier of the model to use
             inference_function: Function that performs model inference
@@ -46,8 +53,59 @@ class InferencePipeline:
         self.preprocessor: Optional[Callable] = preprocessor
         self.postprocessor: Optional[Callable] = postprocessor
+        # Dynamically change the class to provide appropriate sync/async interface
+        self.__class__ = (
+            _AsyncInferencePipeline if is_awaitable(inference_function) else _SyncInferencePipeline
+        )
+class _SyncInferencePipeline(InferencePipeline):
+    """Synchronous version of InferencePipeline."""
+    def run(self, items: List[Dict[str, Any]], **hyperparameters: Any) -> List[Any]:
+        """Execute the complete inference pipeline synchronously.
+        Args:
+            items: List of items to process through the pipeline
+            **hyperparameters: Model-specific parameters for inference
+        Returns:
+            List of processed outputs after running through the complete pipeline
+        """
+        if self.preprocessor:
+            input_items = [self.preprocessor(item, **hyperparameters) for item in items]
+        else:
+            input_items = items
+        # Sync inference function - call directly
+        inference_outputs = self.inference_function(input_items, **hyperparameters)
+        if self.postprocessor:
+            return [
+                self.postprocessor(inference_output, **hyperparameters)
+                for inference_output in inference_outputs
+            ]
+        else:
+            return cast(List[Any], inference_outputs)
+    def __call__(self, items: List[Dict[str, Any]], **hyperparameters: Any) -> List[Any]:
+        """Make the pipeline instance callable synchronously.
+        Args:
+            items: List of items to process through the pipeline
+            **hyperparameters: Model-specific parameters for inference
+        Returns:
+            List of processed outputs after running through the complete pipeline
+        """
+        return self.run(items, **hyperparameters)
+class _AsyncInferencePipeline(InferencePipeline):
+    """Asynchronous version of InferencePipeline."""
     async def run(self, items: List[Dict[str, Any]], **hyperparameters: Any) -> List[Any]:
-        """Execute the complete inference pipeline on a list of items.
+        """Execute the complete inference pipeline asynchronously.
         Args:
             items: List of items to process through the pipeline
@@ -61,10 +119,14 @@ class InferencePipeline:
         else:
             input_items = items
-        if asyncio.iscoroutinefunction(self.inference_function):
+        # Handle both sync and async inference functions
+        if is_awaitable(self.inference_function):
             inference_outputs = await self.inference_function(input_items, **hyperparameters)
         else:
-            inference_outputs = self.inference_function(input_items, **hyperparameters)
+            # Run sync function in thread pool to avoid blocking
+            inference_outputs = await asyncio.to_thread(
+                self.inference_function, input_items, **hyperparameters
+            )
         if self.postprocessor:
             return [
@@ -75,7 +137,7 @@ class InferencePipeline:
             return cast(List[Any], inference_outputs)
     async def __call__(self, items: List[Dict[str, Any]], **hyperparameters: Any) -> List[Any]:
-        """Make the pipeline instance callable by wrapping the run method.
+        """Make the pipeline instance callable asynchronously.
         Args:
             items: List of items to process through the pipeline

scorebook/settings.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Configuration settings for Scorebook."""
+import os
+# Optional: Load environment variables from .env file if python-dotenv is available
+try:
+    from dotenv import load_dotenv
+    load_dotenv(verbose=False)
+except ImportError:  # pragma: no cover
+    pass  # python-dotenv not installed, skip .env file loading
+# Trismik API settings
+TRISMIK_API_BASE_URL = "https://api.trismik.com"
+TRISMIK_ADAPTIVE_TESTING_URL = f"{TRISMIK_API_BASE_URL}/adaptive-testing"
+# Allow override via environment variable
+TRISMIK_SERVICE_URL = os.environ.get("TRISMIK_SERVICE_URL", TRISMIK_ADAPTIVE_TESTING_URL)
+# Progress bar configuration
+SHOW_PROGRESS_BARS = os.environ.get("SCOREBOOK_SHOW_PROGRESS_BARS", "true").lower() == "true"

scorebook/trismik/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""Trismik authentication and API integration.
+Note: Trismik evaluation functionality has been moved to scorebook.evaluate module.
+This module now only provides authentication functions.
+"""
+# Import shared credential functions
+from .credentials import get_stored_token, get_token, login, logout, whoami
+__all__ = ["login", "logout", "whoami", "get_stored_token", "get_token"]

scorebook/types.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Union
-from scorebook.eval_dataset import EvalDataset
+from scorebook.eval_datasets import EvalDataset
 @dataclass
@@ -21,7 +21,7 @@ class EvalRunSpec:
     dataset_index: int
     hyperparameter_config: Dict[str, Any]
     hyperparameters_index: int
-    items: List[Dict[str, Any]]
+    inputs: List[Any]
     labels: List[Any]
     def __str__(self) -> str:
@@ -64,13 +64,15 @@ class ClassicEvalRunResult:
         if self.outputs:
             for idx, output in enumerate(self.outputs):
-                if idx >= len(self.run_spec.items):
+                if idx >= len(self.run_spec.inputs):
                     break
                 result = {
-                    "item_id": idx,
+                    "id": idx,
                     "dataset_name": self.run_spec.dataset.name,
-                    "inference_output": output,
+                    "input": self.run_spec.inputs[idx],
+                    "label": self.run_spec.labels[idx] if idx < len(self.run_spec.labels) else None,
+                    "output": output,
                     **self.run_spec.hyperparameter_config,
                 }
@@ -125,6 +127,7 @@ class AdaptiveEvalRunResult:
     """Results from executing an adaptive evaluation run."""
     run_spec: AdaptiveEvalRunSpec
+    run_completed: bool
     scores: Dict[str, Any]
     @property

scorebook/utils/__init__.py CHANGED Viewed

@@ -1,9 +1,16 @@
 """Utility functions and common helpers for the Scorebook framework."""
-from scorebook.utils.async_utils import is_awaitable
-from scorebook.utils.build_prompt import build_prompt
+from scorebook.utils.async_utils import async_nullcontext, is_awaitable
 from scorebook.utils.io_helpers import validate_path
-from scorebook.utils.progress_bars import evaluation_progress
+from scorebook.utils.progress_bars import evaluation_progress_context
+from scorebook.utils.render_template import render_template
 from scorebook.utils.transform_helpers import expand_dict
-__all__ = ["is_awaitable", "validate_path", "expand_dict", "evaluation_progress", "build_prompt"]
+__all__ = [
+    "async_nullcontext",
+    "is_awaitable",
+    "validate_path",
+    "expand_dict",
+    "evaluation_progress_context",
+    "render_template",
+]

scorebook/utils/async_utils.py CHANGED Viewed

@@ -1,7 +1,10 @@
 """Async utilities for handling callable objects and coroutines."""
 import asyncio
-from typing import Callable
+from contextlib import asynccontextmanager
+from typing import AsyncIterator, Callable, Optional, TypeVar
+T = TypeVar("T")
 def is_awaitable(obj: Callable) -> bool:
@@ -25,3 +28,19 @@ def is_awaitable(obj: Callable) -> bool:
         return True
     return False
+@asynccontextmanager
+async def async_nullcontext(value: Optional[T] = None) -> AsyncIterator[Optional[T]]:
+    """Async version of contextlib.nullcontext for Python 3.9 compatibility.
+    contextlib.nullcontext() is sync-only and cannot be used with async with on Python 3.9.
+    This provides an async equivalent that can be used with async context managers.
+    Args:
+        value: Optional value to yield from the context manager
+    Yields:
+        The provided value
+    """
+    yield value

scorebook/utils/io_helpers.py CHANGED Viewed

@@ -1,15 +1,18 @@
 """Input/output helper functions for Scorebook."""
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Tuple, Union
-def validate_path(file_path: str, expected_suffix: Optional[str] = None) -> Path:
+def validate_path(
+    file_path: Union[str, Path], expected_suffix: Optional[Union[str, Tuple[str, ...]]] = None
+) -> Path:
     """Validate that a file path exists and optionally check its suffix.
     Args:
         file_path: Path to the file as string or Path object
-        expected_suffix: Optional file extension to validate (e.g. ".json", ".csv")
+        expected_suffix: Optional file extension(s) to validate.
+            Can be a single string (e.g. ".json") or tuple of strings (e.g. (".yaml", ".yml"))
     Returns:
         Path object for the validated file path
@@ -22,7 +25,17 @@ def validate_path(file_path: str, expected_suffix: Optional[str] = None) -> Path
     if not path.exists():
         raise FileNotFoundError(f"File not found: {file_path}")
-    if expected_suffix and path.suffix.lower() != expected_suffix.lower():
-        raise ValueError(f"File must have {expected_suffix} extension, got: {path.suffix}")
+    if expected_suffix:
+        # Convert single suffix to tuple for uniform handling
+        allowed_suffixes = (
+            (expected_suffix,) if isinstance(expected_suffix, str) else expected_suffix
+        )
+        allowed_suffixes_lower = tuple(s.lower() for s in allowed_suffixes)
+        if path.suffix.lower() not in allowed_suffixes_lower:
+            suffix_list = ", ".join(f"'{s}'" for s in allowed_suffixes)
+            raise ValueError(
+                f"File must have one of ({suffix_list}) extensions, got: '{path.suffix}'"
+            )
     return path

scorebook 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

scorebook 0.0.9py3-none-any.whl → 0.0.11py3-none-any.whl