scorebook 0.0.9__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {scorebook-0.0.9 → scorebook-0.0.11}/PKG-INFO +4 -4
  2. scorebook-0.0.11/pyproject.toml +134 -0
  3. scorebook-0.0.11/src/scorebook/__init__.py +26 -0
  4. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/cli/auth.py +1 -1
  5. scorebook-0.0.11/src/scorebook/eval_datasets/__init__.py +5 -0
  6. scorebook-0.0.11/src/scorebook/eval_datasets/eval_dataset.py +719 -0
  7. scorebook-0.0.11/src/scorebook/evaluate/__init__.py +15 -0
  8. scorebook-0.0.11/src/scorebook/evaluate/_async/__init__.py +0 -0
  9. scorebook-0.0.11/src/scorebook/evaluate/_async/evaluate_async.py +443 -0
  10. scorebook-0.0.11/src/scorebook/evaluate/_sync/__init__.py +0 -0
  11. scorebook-0.0.11/src/scorebook/evaluate/_sync/evaluate.py +443 -0
  12. scorebook-0.0.11/src/scorebook/evaluate/evaluate_helpers.py +388 -0
  13. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/exceptions.py +48 -0
  14. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/inference/__init__.py +4 -0
  15. scorebook-0.0.11/src/scorebook/inference/clients/__init__.py +8 -0
  16. {scorebook-0.0.9/src/scorebook/inference → scorebook-0.0.11/src/scorebook/inference/clients}/bedrock.py +1 -1
  17. {scorebook-0.0.9/src/scorebook/inference → scorebook-0.0.11/src/scorebook/inference/clients}/openai.py +35 -23
  18. {scorebook-0.0.9/src/scorebook/inference → scorebook-0.0.11/src/scorebook/inference/clients}/portkey.py +1 -1
  19. {scorebook-0.0.9/src/scorebook/inference → scorebook-0.0.11/src/scorebook/inference/clients}/vertex.py +1 -1
  20. {scorebook-0.0.9/src/scorebook → scorebook-0.0.11/src/scorebook/inference}/inference_pipeline.py +66 -4
  21. scorebook-0.0.11/src/scorebook/settings.py +21 -0
  22. scorebook-0.0.11/src/scorebook/trismik/__init__.py +10 -0
  23. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/types.py +8 -5
  24. scorebook-0.0.11/src/scorebook/utils/__init__.py +16 -0
  25. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/utils/async_utils.py +20 -1
  26. scorebook-0.0.11/src/scorebook/utils/io_helpers.py +41 -0
  27. scorebook-0.0.11/src/scorebook/utils/progress_bars.py +789 -0
  28. scorebook-0.0.9/src/scorebook/utils/build_prompt.py → scorebook-0.0.11/src/scorebook/utils/render_template.py +13 -12
  29. scorebook-0.0.9/pyproject.toml +0 -83
  30. scorebook-0.0.9/src/scorebook/__init__.py +0 -18
  31. scorebook-0.0.9/src/scorebook/eval_dataset.py +0 -404
  32. scorebook-0.0.9/src/scorebook/evaluate.py +0 -623
  33. scorebook-0.0.9/src/scorebook/trismik_services/__init__.py +0 -6
  34. scorebook-0.0.9/src/scorebook/trismik_services/adaptive_testing_service.py +0 -141
  35. scorebook-0.0.9/src/scorebook/trismik_services/upload_classic_eval_run.py +0 -102
  36. scorebook-0.0.9/src/scorebook/utils/__init__.py +0 -9
  37. scorebook-0.0.9/src/scorebook/utils/io_helpers.py +0 -28
  38. scorebook-0.0.9/src/scorebook/utils/progress_bars.py +0 -146
  39. {scorebook-0.0.9 → scorebook-0.0.11}/LICENSE +0 -0
  40. {scorebook-0.0.9 → scorebook-0.0.11}/README.md +0 -0
  41. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/cli/__init__.py +0 -0
  42. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/cli/main.py +0 -0
  43. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/__init__.py +0 -0
  44. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/accuracy.py +0 -0
  45. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/metric_base.py +0 -0
  46. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/metric_registry.py +0 -0
  47. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/precision.py +0 -0
  48. /scorebook-0.0.9/src/scorebook/trismik_services/login.py → /scorebook-0.0.11/src/scorebook/trismik/credentials.py +0 -0
  49. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/utils/jinja_helpers.py +0 -0
  50. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/utils/mappers.py +0 -0
  51. {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/utils/transform_helpers.py +0 -0
@@ -1,18 +1,17 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scorebook
3
- Version: 0.0.9
3
+ Version: 0.0.11
4
4
  Summary: A Python project for LLM evaluation.
5
5
  License-File: LICENSE
6
6
  Author: Euan Campbell
7
7
  Author-email: euan@trismik.com
8
- Requires-Python: >=3.9
8
+ Requires-Python: >=3.9, <3.14
9
9
  Classifier: Programming Language :: Python :: 3
10
10
  Classifier: Programming Language :: Python :: 3.9
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
14
  Classifier: Programming Language :: Python :: 3.13
15
- Classifier: Programming Language :: Python :: 3.14
16
15
  Provides-Extra: bedrock
17
16
  Provides-Extra: examples
18
17
  Provides-Extra: openai
@@ -24,6 +23,7 @@ Requires-Dist: datasets (>=3.6.0)
24
23
  Requires-Dist: fsspec[gcs] ; extra == "vertex"
25
24
  Requires-Dist: google-cloud-storage ; extra == "vertex"
26
25
  Requires-Dist: google-genai ; extra == "vertex"
26
+ Requires-Dist: ipywidgets (>=8.0.0)
27
27
  Requires-Dist: notebook (>=7.4.5,<8.0.0)
28
28
  Requires-Dist: notebook ; extra == "examples"
29
29
  Requires-Dist: openai ; extra == "openai"
@@ -37,7 +37,7 @@ Requires-Dist: torch ; extra == "examples"
37
37
  Requires-Dist: torchaudio ; extra == "examples"
38
38
  Requires-Dist: torchvision ; extra == "examples"
39
39
  Requires-Dist: transformers ; extra == "examples"
40
- Requires-Dist: trismik (>=0.9.12)
40
+ Requires-Dist: trismik (>=1.0.1,<2.0.0)
41
41
  Description-Content-Type: text/markdown
42
42
 
43
43
  # Scorebook
@@ -0,0 +1,134 @@
1
+ [project]
2
+ name = "scorebook"
3
+ dynamic = ["version"]
4
+ description = "A Python project for LLM evaluation."
5
+ authors = [
6
+ { name = "Euan Campbell", email = "euan@trismik.com"},
7
+ { name = "Marco Basaldella", email = "marco@trismik.com" }
8
+ ]
9
+ readme = "README.md"
10
+ requires-python = ">=3.9, <3.14"
11
+ dependencies = [
12
+ "datasets>=3.6.0",
13
+ "notebook (>=7.4.5,<8.0.0)",
14
+ "trismik (>=1.0.1, <2.0.0)",
15
+ "ipywidgets>=8.0.0",
16
+ ]
17
+
18
+ [project.scripts]
19
+ scorebook = "scorebook.cli.main:main"
20
+
21
+ [tool.poetry]
22
+ version = "0.0.11" # base version
23
+ packages = [{ include = "scorebook", from = "src" }]
24
+
25
+ [tool.poetry.dependencies]
26
+ python = ">=3.9,<3.14"
27
+ datasets = ">=3.6.0"
28
+ notebook = ">=7.4.5,<8.0.0"
29
+ trismik = ">=1.0.1,<2.0.0"
30
+ ipywidgets = ">=8.0.0"
31
+
32
+ # Optional dependencies
33
+ openai = {version = "*", optional = true}
34
+ python-dotenv = {version = "*", optional = true}
35
+ portkey-ai = {version = "*", optional = true}
36
+ boto3 = {version = "1.40.0", optional = true}
37
+ google-genai = {version = "*", optional = true}
38
+ pandas = {version = "*", optional = true}
39
+ google-cloud-storage = {version = "*", optional = true}
40
+ fsspec = {version = "*", extras = ["gcs"], optional = true}
41
+ transformers = {version = "*", optional = true}
42
+ torch = {version = "*", optional = true}
43
+ torchvision = {version = "*", optional = true}
44
+ torchaudio = {version = "*", optional = true}
45
+ accelerate = {version = "*", optional = true}
46
+
47
+ [tool.poetry.extras]
48
+ openai = ["openai", "python-dotenv"]
49
+ portkey = ["portkey-ai", "python-dotenv"]
50
+ bedrock = ["boto3", "python-dotenv"]
51
+ vertex = ["google-genai", "pandas", "google-cloud-storage", "fsspec", "python-dotenv"]
52
+ examples = ["transformers", "torch", "torchvision", "torchaudio", "accelerate", "notebook"]
53
+
54
+ [[tool.poetry.source]]
55
+ name = "testpypi"
56
+ url = "https://test.pypi.org/simple/"
57
+ priority = "supplemental"
58
+
59
+ [tool.poetry.group.dev.dependencies]
60
+ pytest = "^8.3.2"
61
+ pytest-asyncio = "^0.24.0"
62
+ pre-commit = "^3.6.2"
63
+ black = "^24.3.0"
64
+ isort = "^5.13.2"
65
+ Flake8-pyproject = "^1.2.3"
66
+ flake8 = "^7.0.0"
67
+ mypy = "^1.15.0"
68
+ autoflake = "^2.3.1"
69
+ toml = "^0.10.2"
70
+ types-pyyaml = "^6.0.12.20250822"
71
+ unasync = {version = "^0.5.0", python = ">=3.9,<4"}
72
+ tomlkit = "^0.13.2"
73
+ detect-secrets = "^1.5.0"
74
+
75
+ [project.optional-dependencies]
76
+ openai = ["openai", "python-dotenv"]
77
+ portkey = ["portkey-ai", "python-dotenv"]
78
+ bedrock = ["boto3==1.40.0", "python-dotenv"]
79
+ vertex = ["google-genai", "pandas", "google-cloud-storage", "fsspec[gcs]", "python-dotenv"]
80
+ examples = ["transformers", "torch", "torchvision", "torchaudio", "accelerate", "notebook"]
81
+
82
+
83
+ [build-system]
84
+ requires = ["poetry-core"]
85
+ build-backend = "poetry.core.masonry.api"
86
+
87
+ [tool.pytest.ini_options]
88
+ asyncio_default_fixture_loop_scope = "class"
89
+ markers = [
90
+ "unit: Unit tests that use mocks and don't require external dependencies",
91
+ "integration: Integration tests that may require network access or external services",
92
+ ]
93
+
94
+ [tool.black]
95
+ line-length = 100
96
+ target-version = ['py39']
97
+ include = '\.pyi?$'
98
+
99
+ [tool.isort]
100
+ profile = "black"
101
+ line_length = 100
102
+ multi_line_output = 3
103
+
104
+ [tool.mypy]
105
+ python_version = "3.9"
106
+ warn_return_any = true
107
+ warn_unused_configs = true
108
+ disallow_untyped_defs = true
109
+ disallow_incomplete_defs = true
110
+ check_untyped_defs = true
111
+ disallow_untyped_decorators = true
112
+ no_implicit_optional = true
113
+ warn_redundant_casts = true
114
+ warn_unused_ignores = true
115
+ warn_no_return = true
116
+ warn_unreachable = true
117
+ install_types = true
118
+
119
+ [tool.flake8] # note that this depends on Flake8-pyproject
120
+ ignore = ["D202", "W503", "W504"]
121
+
122
+ [tool.unasync]
123
+ [[tool.unasync.rules]]
124
+ fromdir = "src/scorebook/evaluate/_async/"
125
+ todir = "src/scorebook/evaluate/_sync/"
126
+
127
+
128
+ # Custom replacements beyond default async/await transformations
129
+ [tool.unasync.rules.replacements]
130
+ "evaluate_async" = "evaluate"
131
+ "Asynchronous evaluation complete" = "Synchronous evaluation complete"
132
+ " run_results = asyncio.gather(*[worker(run) for run in runs])" = " run_results = [worker(run) for run in runs]"
133
+ "async_nullcontext" = "nullcontext"
134
+ "create_trismik_async_client" = "create_trismik_sync_client"
@@ -0,0 +1,26 @@
1
+ """
2
+ Scorebook package.
3
+
4
+ A Python project for scorebook functionality.
5
+ """
6
+
7
+ import importlib.metadata
8
+
9
+ # get version from pyproject.toml
10
+ __version__ = importlib.metadata.version(__package__ or __name__)
11
+
12
+ from scorebook.eval_datasets import EvalDataset
13
+ from scorebook.evaluate import evaluate, evaluate_async
14
+ from scorebook.inference.inference_pipeline import InferencePipeline
15
+ from scorebook.trismik.credentials import login, whoami
16
+ from scorebook.utils.render_template import render_template
17
+
18
+ __all__ = [
19
+ "EvalDataset",
20
+ "evaluate",
21
+ "evaluate_async",
22
+ "render_template",
23
+ "login",
24
+ "whoami",
25
+ "InferencePipeline",
26
+ ]
@@ -4,7 +4,7 @@ import argparse
4
4
  import getpass
5
5
  import sys
6
6
 
7
- from scorebook.trismik.login import get_stored_token, get_token_path, login, logout, whoami
7
+ from scorebook.trismik.credentials import get_stored_token, get_token_path, login, logout, whoami
8
8
 
9
9
 
10
10
  def auth_command(args: argparse.Namespace) -> int:
@@ -0,0 +1,5 @@
1
+ """Dataset utilities for scorebook."""
2
+
3
+ from scorebook.eval_datasets.eval_dataset import EvalDataset
4
+
5
+ __all__ = ["EvalDataset"]