scorebook 0.0.9__tar.gz → 0.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scorebook-0.0.9 → scorebook-0.0.11}/PKG-INFO +4 -4
- scorebook-0.0.11/pyproject.toml +134 -0
- scorebook-0.0.11/src/scorebook/__init__.py +26 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/cli/auth.py +1 -1
- scorebook-0.0.11/src/scorebook/eval_datasets/__init__.py +5 -0
- scorebook-0.0.11/src/scorebook/eval_datasets/eval_dataset.py +719 -0
- scorebook-0.0.11/src/scorebook/evaluate/__init__.py +15 -0
- scorebook-0.0.11/src/scorebook/evaluate/_async/__init__.py +0 -0
- scorebook-0.0.11/src/scorebook/evaluate/_async/evaluate_async.py +443 -0
- scorebook-0.0.11/src/scorebook/evaluate/_sync/__init__.py +0 -0
- scorebook-0.0.11/src/scorebook/evaluate/_sync/evaluate.py +443 -0
- scorebook-0.0.11/src/scorebook/evaluate/evaluate_helpers.py +388 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/exceptions.py +48 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/inference/__init__.py +4 -0
- scorebook-0.0.11/src/scorebook/inference/clients/__init__.py +8 -0
- {scorebook-0.0.9/src/scorebook/inference → scorebook-0.0.11/src/scorebook/inference/clients}/bedrock.py +1 -1
- {scorebook-0.0.9/src/scorebook/inference → scorebook-0.0.11/src/scorebook/inference/clients}/openai.py +35 -23
- {scorebook-0.0.9/src/scorebook/inference → scorebook-0.0.11/src/scorebook/inference/clients}/portkey.py +1 -1
- {scorebook-0.0.9/src/scorebook/inference → scorebook-0.0.11/src/scorebook/inference/clients}/vertex.py +1 -1
- {scorebook-0.0.9/src/scorebook → scorebook-0.0.11/src/scorebook/inference}/inference_pipeline.py +66 -4
- scorebook-0.0.11/src/scorebook/settings.py +21 -0
- scorebook-0.0.11/src/scorebook/trismik/__init__.py +10 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/types.py +8 -5
- scorebook-0.0.11/src/scorebook/utils/__init__.py +16 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/utils/async_utils.py +20 -1
- scorebook-0.0.11/src/scorebook/utils/io_helpers.py +41 -0
- scorebook-0.0.11/src/scorebook/utils/progress_bars.py +789 -0
- scorebook-0.0.9/src/scorebook/utils/build_prompt.py → scorebook-0.0.11/src/scorebook/utils/render_template.py +13 -12
- scorebook-0.0.9/pyproject.toml +0 -83
- scorebook-0.0.9/src/scorebook/__init__.py +0 -18
- scorebook-0.0.9/src/scorebook/eval_dataset.py +0 -404
- scorebook-0.0.9/src/scorebook/evaluate.py +0 -623
- scorebook-0.0.9/src/scorebook/trismik_services/__init__.py +0 -6
- scorebook-0.0.9/src/scorebook/trismik_services/adaptive_testing_service.py +0 -141
- scorebook-0.0.9/src/scorebook/trismik_services/upload_classic_eval_run.py +0 -102
- scorebook-0.0.9/src/scorebook/utils/__init__.py +0 -9
- scorebook-0.0.9/src/scorebook/utils/io_helpers.py +0 -28
- scorebook-0.0.9/src/scorebook/utils/progress_bars.py +0 -146
- {scorebook-0.0.9 → scorebook-0.0.11}/LICENSE +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/README.md +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/cli/__init__.py +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/cli/main.py +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/__init__.py +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/accuracy.py +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/metric_base.py +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/metric_registry.py +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/metrics/precision.py +0 -0
- /scorebook-0.0.9/src/scorebook/trismik_services/login.py → /scorebook-0.0.11/src/scorebook/trismik/credentials.py +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/utils/jinja_helpers.py +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/utils/mappers.py +0 -0
- {scorebook-0.0.9 → scorebook-0.0.11}/src/scorebook/utils/transform_helpers.py +0 -0
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scorebook
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.11
|
|
4
4
|
Summary: A Python project for LLM evaluation.
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Author: Euan Campbell
|
|
7
7
|
Author-email: euan@trismik.com
|
|
8
|
-
Requires-Python: >=3.9
|
|
8
|
+
Requires-Python: >=3.9, <3.14
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.9
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.14
|
|
16
15
|
Provides-Extra: bedrock
|
|
17
16
|
Provides-Extra: examples
|
|
18
17
|
Provides-Extra: openai
|
|
@@ -24,6 +23,7 @@ Requires-Dist: datasets (>=3.6.0)
|
|
|
24
23
|
Requires-Dist: fsspec[gcs] ; extra == "vertex"
|
|
25
24
|
Requires-Dist: google-cloud-storage ; extra == "vertex"
|
|
26
25
|
Requires-Dist: google-genai ; extra == "vertex"
|
|
26
|
+
Requires-Dist: ipywidgets (>=8.0.0)
|
|
27
27
|
Requires-Dist: notebook (>=7.4.5,<8.0.0)
|
|
28
28
|
Requires-Dist: notebook ; extra == "examples"
|
|
29
29
|
Requires-Dist: openai ; extra == "openai"
|
|
@@ -37,7 +37,7 @@ Requires-Dist: torch ; extra == "examples"
|
|
|
37
37
|
Requires-Dist: torchaudio ; extra == "examples"
|
|
38
38
|
Requires-Dist: torchvision ; extra == "examples"
|
|
39
39
|
Requires-Dist: transformers ; extra == "examples"
|
|
40
|
-
Requires-Dist: trismik (>=0.
|
|
40
|
+
Requires-Dist: trismik (>=1.0.1,<2.0.0)
|
|
41
41
|
Description-Content-Type: text/markdown
|
|
42
42
|
|
|
43
43
|
# Scorebook
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "scorebook"
|
|
3
|
+
dynamic = ["version"]
|
|
4
|
+
description = "A Python project for LLM evaluation."
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "Euan Campbell", email = "euan@trismik.com"},
|
|
7
|
+
{ name = "Marco Basaldella", email = "marco@trismik.com" }
|
|
8
|
+
]
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9, <3.14"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"datasets>=3.6.0",
|
|
13
|
+
"notebook (>=7.4.5,<8.0.0)",
|
|
14
|
+
"trismik (>=1.0.1, <2.0.0)",
|
|
15
|
+
"ipywidgets>=8.0.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.scripts]
|
|
19
|
+
scorebook = "scorebook.cli.main:main"
|
|
20
|
+
|
|
21
|
+
[tool.poetry]
|
|
22
|
+
version = "0.0.11" # base version
|
|
23
|
+
packages = [{ include = "scorebook", from = "src" }]
|
|
24
|
+
|
|
25
|
+
[tool.poetry.dependencies]
|
|
26
|
+
python = ">=3.9,<3.14"
|
|
27
|
+
datasets = ">=3.6.0"
|
|
28
|
+
notebook = ">=7.4.5,<8.0.0"
|
|
29
|
+
trismik = ">=1.0.1,<2.0.0"
|
|
30
|
+
ipywidgets = ">=8.0.0"
|
|
31
|
+
|
|
32
|
+
# Optional dependencies
|
|
33
|
+
openai = {version = "*", optional = true}
|
|
34
|
+
python-dotenv = {version = "*", optional = true}
|
|
35
|
+
portkey-ai = {version = "*", optional = true}
|
|
36
|
+
boto3 = {version = "1.40.0", optional = true}
|
|
37
|
+
google-genai = {version = "*", optional = true}
|
|
38
|
+
pandas = {version = "*", optional = true}
|
|
39
|
+
google-cloud-storage = {version = "*", optional = true}
|
|
40
|
+
fsspec = {version = "*", extras = ["gcs"], optional = true}
|
|
41
|
+
transformers = {version = "*", optional = true}
|
|
42
|
+
torch = {version = "*", optional = true}
|
|
43
|
+
torchvision = {version = "*", optional = true}
|
|
44
|
+
torchaudio = {version = "*", optional = true}
|
|
45
|
+
accelerate = {version = "*", optional = true}
|
|
46
|
+
|
|
47
|
+
[tool.poetry.extras]
|
|
48
|
+
openai = ["openai", "python-dotenv"]
|
|
49
|
+
portkey = ["portkey-ai", "python-dotenv"]
|
|
50
|
+
bedrock = ["boto3", "python-dotenv"]
|
|
51
|
+
vertex = ["google-genai", "pandas", "google-cloud-storage", "fsspec", "python-dotenv"]
|
|
52
|
+
examples = ["transformers", "torch", "torchvision", "torchaudio", "accelerate", "notebook"]
|
|
53
|
+
|
|
54
|
+
[[tool.poetry.source]]
|
|
55
|
+
name = "testpypi"
|
|
56
|
+
url = "https://test.pypi.org/simple/"
|
|
57
|
+
priority = "supplemental"
|
|
58
|
+
|
|
59
|
+
[tool.poetry.group.dev.dependencies]
|
|
60
|
+
pytest = "^8.3.2"
|
|
61
|
+
pytest-asyncio = "^0.24.0"
|
|
62
|
+
pre-commit = "^3.6.2"
|
|
63
|
+
black = "^24.3.0"
|
|
64
|
+
isort = "^5.13.2"
|
|
65
|
+
Flake8-pyproject = "^1.2.3"
|
|
66
|
+
flake8 = "^7.0.0"
|
|
67
|
+
mypy = "^1.15.0"
|
|
68
|
+
autoflake = "^2.3.1"
|
|
69
|
+
toml = "^0.10.2"
|
|
70
|
+
types-pyyaml = "^6.0.12.20250822"
|
|
71
|
+
unasync = {version = "^0.5.0", python = ">=3.9,<4"}
|
|
72
|
+
tomlkit = "^0.13.2"
|
|
73
|
+
detect-secrets = "^1.5.0"
|
|
74
|
+
|
|
75
|
+
[project.optional-dependencies]
|
|
76
|
+
openai = ["openai", "python-dotenv"]
|
|
77
|
+
portkey = ["portkey-ai", "python-dotenv"]
|
|
78
|
+
bedrock = ["boto3==1.40.0", "python-dotenv"]
|
|
79
|
+
vertex = ["google-genai", "pandas", "google-cloud-storage", "fsspec[gcs]", "python-dotenv"]
|
|
80
|
+
examples = ["transformers", "torch", "torchvision", "torchaudio", "accelerate", "notebook"]
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
[build-system]
|
|
84
|
+
requires = ["poetry-core"]
|
|
85
|
+
build-backend = "poetry.core.masonry.api"
|
|
86
|
+
|
|
87
|
+
[tool.pytest.ini_options]
|
|
88
|
+
asyncio_default_fixture_loop_scope = "class"
|
|
89
|
+
markers = [
|
|
90
|
+
"unit: Unit tests that use mocks and don't require external dependencies",
|
|
91
|
+
"integration: Integration tests that may require network access or external services",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
[tool.black]
|
|
95
|
+
line-length = 100
|
|
96
|
+
target-version = ['py39']
|
|
97
|
+
include = '\.pyi?$'
|
|
98
|
+
|
|
99
|
+
[tool.isort]
|
|
100
|
+
profile = "black"
|
|
101
|
+
line_length = 100
|
|
102
|
+
multi_line_output = 3
|
|
103
|
+
|
|
104
|
+
[tool.mypy]
|
|
105
|
+
python_version = "3.9"
|
|
106
|
+
warn_return_any = true
|
|
107
|
+
warn_unused_configs = true
|
|
108
|
+
disallow_untyped_defs = true
|
|
109
|
+
disallow_incomplete_defs = true
|
|
110
|
+
check_untyped_defs = true
|
|
111
|
+
disallow_untyped_decorators = true
|
|
112
|
+
no_implicit_optional = true
|
|
113
|
+
warn_redundant_casts = true
|
|
114
|
+
warn_unused_ignores = true
|
|
115
|
+
warn_no_return = true
|
|
116
|
+
warn_unreachable = true
|
|
117
|
+
install_types = true
|
|
118
|
+
|
|
119
|
+
[tool.flake8] # note that this depends on Flake8-pyproject
|
|
120
|
+
ignore = ["D202", "W503", "W504"]
|
|
121
|
+
|
|
122
|
+
[tool.unasync]
|
|
123
|
+
[[tool.unasync.rules]]
|
|
124
|
+
fromdir = "src/scorebook/evaluate/_async/"
|
|
125
|
+
todir = "src/scorebook/evaluate/_sync/"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# Custom replacements beyond default async/await transformations
|
|
129
|
+
[tool.unasync.rules.replacements]
|
|
130
|
+
"evaluate_async" = "evaluate"
|
|
131
|
+
"Asynchronous evaluation complete" = "Synchronous evaluation complete"
|
|
132
|
+
" run_results = asyncio.gather(*[worker(run) for run in runs])" = " run_results = [worker(run) for run in runs]"
|
|
133
|
+
"async_nullcontext" = "nullcontext"
|
|
134
|
+
"create_trismik_async_client" = "create_trismik_sync_client"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Scorebook package.
|
|
3
|
+
|
|
4
|
+
A Python project for scorebook functionality.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import importlib.metadata
|
|
8
|
+
|
|
9
|
+
# get version from pyproject.toml
|
|
10
|
+
__version__ = importlib.metadata.version(__package__ or __name__)
|
|
11
|
+
|
|
12
|
+
from scorebook.eval_datasets import EvalDataset
|
|
13
|
+
from scorebook.evaluate import evaluate, evaluate_async
|
|
14
|
+
from scorebook.inference.inference_pipeline import InferencePipeline
|
|
15
|
+
from scorebook.trismik.credentials import login, whoami
|
|
16
|
+
from scorebook.utils.render_template import render_template
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"EvalDataset",
|
|
20
|
+
"evaluate",
|
|
21
|
+
"evaluate_async",
|
|
22
|
+
"render_template",
|
|
23
|
+
"login",
|
|
24
|
+
"whoami",
|
|
25
|
+
"InferencePipeline",
|
|
26
|
+
]
|
|
@@ -4,7 +4,7 @@ import argparse
|
|
|
4
4
|
import getpass
|
|
5
5
|
import sys
|
|
6
6
|
|
|
7
|
-
from scorebook.trismik.
|
|
7
|
+
from scorebook.trismik.credentials import get_stored_token, get_token_path, login, logout, whoami
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def auth_command(args: argparse.Namespace) -> int:
|