scorebook 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. scorebook/__init__.py +14 -6
  2. scorebook/cli/auth.py +1 -1
  3. scorebook/eval_datasets/__init__.py +5 -0
  4. scorebook/eval_datasets/eval_dataset.py +719 -0
  5. scorebook/evaluate/__init__.py +15 -0
  6. scorebook/evaluate/_async/__init__.py +0 -0
  7. scorebook/evaluate/_async/evaluate_async.py +443 -0
  8. scorebook/evaluate/_sync/__init__.py +0 -0
  9. scorebook/evaluate/_sync/evaluate.py +443 -0
  10. scorebook/evaluate/evaluate_helpers.py +388 -0
  11. scorebook/exceptions.py +48 -0
  12. scorebook/inference/__init__.py +4 -0
  13. scorebook/inference/clients/__init__.py +8 -0
  14. scorebook/inference/{bedrock.py → clients/bedrock.py} +1 -1
  15. scorebook/inference/{openai.py → clients/openai.py} +35 -23
  16. scorebook/inference/{portkey.py → clients/portkey.py} +1 -1
  17. scorebook/inference/{vertex.py → clients/vertex.py} +1 -1
  18. scorebook/{inference_pipeline.py → inference/inference_pipeline.py} +66 -4
  19. scorebook/settings.py +21 -0
  20. scorebook/trismik/__init__.py +10 -0
  21. scorebook/types.py +8 -5
  22. scorebook/utils/__init__.py +11 -4
  23. scorebook/utils/async_utils.py +20 -1
  24. scorebook/utils/io_helpers.py +18 -5
  25. scorebook/utils/progress_bars.py +739 -96
  26. scorebook/utils/{build_prompt.py → render_template.py} +13 -12
  27. {scorebook-0.0.9.dist-info → scorebook-0.0.11.dist-info}/METADATA +4 -4
  28. scorebook-0.0.11.dist-info/RECORD +42 -0
  29. scorebook/eval_dataset.py +0 -404
  30. scorebook/evaluate.py +0 -623
  31. scorebook/trismik_services/__init__.py +0 -6
  32. scorebook/trismik_services/adaptive_testing_service.py +0 -141
  33. scorebook/trismik_services/upload_classic_eval_run.py +0 -102
  34. scorebook-0.0.9.dist-info/RECORD +0 -36
  35. /scorebook/{trismik_services/login.py → trismik/credentials.py} +0 -0
  36. {scorebook-0.0.9.dist-info → scorebook-0.0.11.dist-info}/WHEEL +0 -0
  37. {scorebook-0.0.9.dist-info → scorebook-0.0.11.dist-info}/entry_points.txt +0 -0
  38. {scorebook-0.0.9.dist-info → scorebook-0.0.11.dist-info}/licenses/LICENSE +0 -0
@@ -9,6 +9,8 @@ configurable way.
9
9
  import asyncio
10
10
  from typing import Any, Callable, Dict, List, Optional, cast
11
11
 
12
+ from scorebook.utils import is_awaitable
13
+
12
14
 
13
15
  class InferencePipeline:
14
16
  """A pipeline for processing items through model inference.
@@ -18,6 +20,8 @@ class InferencePipeline:
18
20
  2. Model inference
19
21
  3. Postprocessing of model outputs
20
22
 
23
+ The pipeline automatically adapts to sync or async execution based on the
24
+ inference function provided during initialization.
21
25
 
22
26
  Attributes:
23
27
  model: Name or identifier of the model being used
@@ -35,6 +39,9 @@ class InferencePipeline:
35
39
  ) -> None:
36
40
  """Initialize the inference pipeline.
37
41
 
42
+ The pipeline will automatically become sync or async based on the
43
+ inference_function provided.
44
+
38
45
  Args:
39
46
  model: Name or identifier of the model to use
40
47
  inference_function: Function that performs model inference
@@ -46,8 +53,59 @@ class InferencePipeline:
46
53
  self.preprocessor: Optional[Callable] = preprocessor
47
54
  self.postprocessor: Optional[Callable] = postprocessor
48
55
 
56
+ # Dynamically change the class to provide appropriate sync/async interface
57
+ self.__class__ = (
58
+ _AsyncInferencePipeline if is_awaitable(inference_function) else _SyncInferencePipeline
59
+ )
60
+
61
+
62
+ class _SyncInferencePipeline(InferencePipeline):
63
+ """Synchronous version of InferencePipeline."""
64
+
65
+ def run(self, items: List[Dict[str, Any]], **hyperparameters: Any) -> List[Any]:
66
+ """Execute the complete inference pipeline synchronously.
67
+
68
+ Args:
69
+ items: List of items to process through the pipeline
70
+ **hyperparameters: Model-specific parameters for inference
71
+
72
+ Returns:
73
+ List of processed outputs after running through the complete pipeline
74
+ """
75
+ if self.preprocessor:
76
+ input_items = [self.preprocessor(item, **hyperparameters) for item in items]
77
+ else:
78
+ input_items = items
79
+
80
+ # Sync inference function - call directly
81
+ inference_outputs = self.inference_function(input_items, **hyperparameters)
82
+
83
+ if self.postprocessor:
84
+ return [
85
+ self.postprocessor(inference_output, **hyperparameters)
86
+ for inference_output in inference_outputs
87
+ ]
88
+ else:
89
+ return cast(List[Any], inference_outputs)
90
+
91
+ def __call__(self, items: List[Dict[str, Any]], **hyperparameters: Any) -> List[Any]:
92
+ """Make the pipeline instance callable synchronously.
93
+
94
+ Args:
95
+ items: List of items to process through the pipeline
96
+ **hyperparameters: Model-specific parameters for inference
97
+
98
+ Returns:
99
+ List of processed outputs after running through the complete pipeline
100
+ """
101
+ return self.run(items, **hyperparameters)
102
+
103
+
104
+ class _AsyncInferencePipeline(InferencePipeline):
105
+ """Asynchronous version of InferencePipeline."""
106
+
49
107
  async def run(self, items: List[Dict[str, Any]], **hyperparameters: Any) -> List[Any]:
50
- """Execute the complete inference pipeline on a list of items.
108
+ """Execute the complete inference pipeline asynchronously.
51
109
 
52
110
  Args:
53
111
  items: List of items to process through the pipeline
@@ -61,10 +119,14 @@ class InferencePipeline:
61
119
  else:
62
120
  input_items = items
63
121
 
64
- if asyncio.iscoroutinefunction(self.inference_function):
122
+ # Handle both sync and async inference functions
123
+ if is_awaitable(self.inference_function):
65
124
  inference_outputs = await self.inference_function(input_items, **hyperparameters)
66
125
  else:
67
- inference_outputs = self.inference_function(input_items, **hyperparameters)
126
+ # Run sync function in thread pool to avoid blocking
127
+ inference_outputs = await asyncio.to_thread(
128
+ self.inference_function, input_items, **hyperparameters
129
+ )
68
130
 
69
131
  if self.postprocessor:
70
132
  return [
@@ -75,7 +137,7 @@ class InferencePipeline:
75
137
  return cast(List[Any], inference_outputs)
76
138
 
77
139
  async def __call__(self, items: List[Dict[str, Any]], **hyperparameters: Any) -> List[Any]:
78
- """Make the pipeline instance callable by wrapping the run method.
140
+ """Make the pipeline instance callable asynchronously.
79
141
 
80
142
  Args:
81
143
  items: List of items to process through the pipeline
scorebook/settings.py ADDED
@@ -0,0 +1,21 @@
1
+ """Configuration settings for Scorebook."""
2
+
3
+ import os
4
+
5
+ # Optional: Load environment variables from .env file if python-dotenv is available
6
+ try:
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv(verbose=False)
10
+ except ImportError: # pragma: no cover
11
+ pass # python-dotenv not installed, skip .env file loading
12
+
13
+ # Trismik API settings
14
+ TRISMIK_API_BASE_URL = "https://api.trismik.com"
15
+ TRISMIK_ADAPTIVE_TESTING_URL = f"{TRISMIK_API_BASE_URL}/adaptive-testing"
16
+
17
+ # Allow override via environment variable
18
+ TRISMIK_SERVICE_URL = os.environ.get("TRISMIK_SERVICE_URL", TRISMIK_ADAPTIVE_TESTING_URL)
19
+
20
+ # Progress bar configuration
21
+ SHOW_PROGRESS_BARS = os.environ.get("SCOREBOOK_SHOW_PROGRESS_BARS", "true").lower() == "true"
@@ -0,0 +1,10 @@
1
+ """Trismik authentication and API integration.
2
+
3
+ Note: Trismik evaluation functionality has been moved to scorebook.evaluate module.
4
+ This module now only provides authentication functions.
5
+ """
6
+
7
+ # Import shared credential functions
8
+ from .credentials import get_stored_token, get_token, login, logout, whoami
9
+
10
+ __all__ = ["login", "logout", "whoami", "get_stored_token", "get_token"]
scorebook/types.py CHANGED
@@ -3,7 +3,7 @@
3
3
  from dataclasses import dataclass
4
4
  from typing import Any, Dict, List, Optional, Union
5
5
 
6
- from scorebook.eval_dataset import EvalDataset
6
+ from scorebook.eval_datasets import EvalDataset
7
7
 
8
8
 
9
9
  @dataclass
@@ -21,7 +21,7 @@ class EvalRunSpec:
21
21
  dataset_index: int
22
22
  hyperparameter_config: Dict[str, Any]
23
23
  hyperparameters_index: int
24
- items: List[Dict[str, Any]]
24
+ inputs: List[Any]
25
25
  labels: List[Any]
26
26
 
27
27
  def __str__(self) -> str:
@@ -64,13 +64,15 @@ class ClassicEvalRunResult:
64
64
 
65
65
  if self.outputs:
66
66
  for idx, output in enumerate(self.outputs):
67
- if idx >= len(self.run_spec.items):
67
+ if idx >= len(self.run_spec.inputs):
68
68
  break
69
69
 
70
70
  result = {
71
- "item_id": idx,
71
+ "id": idx,
72
72
  "dataset_name": self.run_spec.dataset.name,
73
- "inference_output": output,
73
+ "input": self.run_spec.inputs[idx],
74
+ "label": self.run_spec.labels[idx] if idx < len(self.run_spec.labels) else None,
75
+ "output": output,
74
76
  **self.run_spec.hyperparameter_config,
75
77
  }
76
78
 
@@ -125,6 +127,7 @@ class AdaptiveEvalRunResult:
125
127
  """Results from executing an adaptive evaluation run."""
126
128
 
127
129
  run_spec: AdaptiveEvalRunSpec
130
+ run_completed: bool
128
131
  scores: Dict[str, Any]
129
132
 
130
133
  @property
@@ -1,9 +1,16 @@
1
1
  """Utility functions and common helpers for the Scorebook framework."""
2
2
 
3
- from scorebook.utils.async_utils import is_awaitable
4
- from scorebook.utils.build_prompt import build_prompt
3
+ from scorebook.utils.async_utils import async_nullcontext, is_awaitable
5
4
  from scorebook.utils.io_helpers import validate_path
6
- from scorebook.utils.progress_bars import evaluation_progress
5
+ from scorebook.utils.progress_bars import evaluation_progress_context
6
+ from scorebook.utils.render_template import render_template
7
7
  from scorebook.utils.transform_helpers import expand_dict
8
8
 
9
- __all__ = ["is_awaitable", "validate_path", "expand_dict", "evaluation_progress", "build_prompt"]
9
+ __all__ = [
10
+ "async_nullcontext",
11
+ "is_awaitable",
12
+ "validate_path",
13
+ "expand_dict",
14
+ "evaluation_progress_context",
15
+ "render_template",
16
+ ]
@@ -1,7 +1,10 @@
1
1
  """Async utilities for handling callable objects and coroutines."""
2
2
 
3
3
  import asyncio
4
- from typing import Callable
4
+ from contextlib import asynccontextmanager
5
+ from typing import AsyncIterator, Callable, Optional, TypeVar
6
+
7
+ T = TypeVar("T")
5
8
 
6
9
 
7
10
  def is_awaitable(obj: Callable) -> bool:
@@ -25,3 +28,19 @@ def is_awaitable(obj: Callable) -> bool:
25
28
  return True
26
29
 
27
30
  return False
31
+
32
+
33
+ @asynccontextmanager
34
+ async def async_nullcontext(value: Optional[T] = None) -> AsyncIterator[Optional[T]]:
35
+ """Async version of contextlib.nullcontext for Python 3.9 compatibility.
36
+
37
+ contextlib.nullcontext() is sync-only and cannot be used with async with on Python 3.9.
38
+ This provides an async equivalent that can be used with async context managers.
39
+
40
+ Args:
41
+ value: Optional value to yield from the context manager
42
+
43
+ Yields:
44
+ The provided value
45
+ """
46
+ yield value
@@ -1,15 +1,18 @@
1
1
  """Input/output helper functions for Scorebook."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import Optional
4
+ from typing import Optional, Tuple, Union
5
5
 
6
6
 
7
- def validate_path(file_path: str, expected_suffix: Optional[str] = None) -> Path:
7
+ def validate_path(
8
+ file_path: Union[str, Path], expected_suffix: Optional[Union[str, Tuple[str, ...]]] = None
9
+ ) -> Path:
8
10
  """Validate that a file path exists and optionally check its suffix.
9
11
 
10
12
  Args:
11
13
  file_path: Path to the file as string or Path object
12
- expected_suffix: Optional file extension to validate (e.g. ".json", ".csv")
14
+ expected_suffix: Optional file extension(s) to validate.
15
+ Can be a single string (e.g. ".json") or tuple of strings (e.g. (".yaml", ".yml"))
13
16
 
14
17
  Returns:
15
18
  Path object for the validated file path
@@ -22,7 +25,17 @@ def validate_path(file_path: str, expected_suffix: Optional[str] = None) -> Path
22
25
  if not path.exists():
23
26
  raise FileNotFoundError(f"File not found: {file_path}")
24
27
 
25
- if expected_suffix and path.suffix.lower() != expected_suffix.lower():
26
- raise ValueError(f"File must have {expected_suffix} extension, got: {path.suffix}")
28
+ if expected_suffix:
29
+ # Convert single suffix to tuple for uniform handling
30
+ allowed_suffixes = (
31
+ (expected_suffix,) if isinstance(expected_suffix, str) else expected_suffix
32
+ )
33
+ allowed_suffixes_lower = tuple(s.lower() for s in allowed_suffixes)
34
+
35
+ if path.suffix.lower() not in allowed_suffixes_lower:
36
+ suffix_list = ", ".join(f"'{s}'" for s in allowed_suffixes)
37
+ raise ValueError(
38
+ f"File must have one of ({suffix_list}) extensions, got: '{path.suffix}'"
39
+ )
27
40
 
28
41
  return path