scorebook 0.0.10__tar.gz → 0.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scorebook-0.0.10 → scorebook-0.0.12}/PKG-INFO +3 -2
- {scorebook-0.0.10 → scorebook-0.0.12}/pyproject.toml +25 -12
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/__init__.py +11 -4
- scorebook-0.0.12/src/scorebook/eval_datasets/__init__.py +5 -0
- scorebook-0.0.12/src/scorebook/eval_datasets/eval_dataset.py +719 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/_async/evaluate_async.py +135 -130
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/_sync/evaluate.py +135 -131
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/evaluate_helpers.py +46 -23
- scorebook-0.0.12/src/scorebook/exceptions.py +106 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/bedrock.py +1 -1
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/portkey.py +1 -1
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/vertex.py +1 -1
- scorebook-0.0.12/src/scorebook/score/__init__.py +6 -0
- scorebook-0.0.12/src/scorebook/score/_async/__init__.py +0 -0
- scorebook-0.0.12/src/scorebook/score/_async/score_async.py +145 -0
- scorebook-0.0.12/src/scorebook/score/_sync/__init__.py +0 -0
- scorebook-0.0.12/src/scorebook/score/_sync/score.py +145 -0
- scorebook-0.0.12/src/scorebook/score/score_helpers.py +207 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/settings.py +3 -0
- scorebook-0.0.12/src/scorebook/trismik/upload_results.py +254 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/types.py +36 -54
- scorebook-0.0.12/src/scorebook/utils/__init__.py +23 -0
- scorebook-0.0.12/src/scorebook/utils/common_helpers.py +41 -0
- scorebook-0.0.12/src/scorebook/utils/io_helpers.py +41 -0
- scorebook-0.0.12/src/scorebook/utils/progress_bars.py +856 -0
- scorebook-0.0.10/src/scorebook/utils/build_prompt.py → scorebook-0.0.12/src/scorebook/utils/render_template.py +13 -12
- scorebook-0.0.10/src/scorebook/eval_dataset.py +0 -404
- scorebook-0.0.10/src/scorebook/exceptions.py +0 -54
- scorebook-0.0.10/src/scorebook/utils/__init__.py +0 -16
- scorebook-0.0.10/src/scorebook/utils/io_helpers.py +0 -28
- scorebook-0.0.10/src/scorebook/utils/progress_bars.py +0 -107
- {scorebook-0.0.10 → scorebook-0.0.12}/LICENSE +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/README.md +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/cli/__init__.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/cli/auth.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/cli/main.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/__init__.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/_async/__init__.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/_sync/__init__.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/__init__.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/__init__.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/openai.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/inference_pipeline.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/__init__.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/accuracy.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/metric_base.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/metric_registry.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/precision.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/trismik/__init__.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/trismik/credentials.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/utils/async_utils.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/utils/jinja_helpers.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/utils/mappers.py +0 -0
- {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/utils/transform_helpers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scorebook
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.12
|
|
4
4
|
Summary: A Python project for LLM evaluation.
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Author: Euan Campbell
|
|
@@ -23,6 +23,7 @@ Requires-Dist: datasets (>=3.6.0)
|
|
|
23
23
|
Requires-Dist: fsspec[gcs] ; extra == "vertex"
|
|
24
24
|
Requires-Dist: google-cloud-storage ; extra == "vertex"
|
|
25
25
|
Requires-Dist: google-genai ; extra == "vertex"
|
|
26
|
+
Requires-Dist: ipywidgets (>=8.0.0)
|
|
26
27
|
Requires-Dist: notebook (>=7.4.5,<8.0.0)
|
|
27
28
|
Requires-Dist: notebook ; extra == "examples"
|
|
28
29
|
Requires-Dist: openai ; extra == "openai"
|
|
@@ -36,7 +37,7 @@ Requires-Dist: torch ; extra == "examples"
|
|
|
36
37
|
Requires-Dist: torchaudio ; extra == "examples"
|
|
37
38
|
Requires-Dist: torchvision ; extra == "examples"
|
|
38
39
|
Requires-Dist: transformers ; extra == "examples"
|
|
39
|
-
Requires-Dist: trismik (
|
|
40
|
+
Requires-Dist: trismik (==1.0.1)
|
|
40
41
|
Description-Content-Type: text/markdown
|
|
41
42
|
|
|
42
43
|
# Scorebook
|
|
@@ -11,21 +11,23 @@ requires-python = ">=3.9, <3.14"
|
|
|
11
11
|
dependencies = [
|
|
12
12
|
"datasets>=3.6.0",
|
|
13
13
|
"notebook (>=7.4.5,<8.0.0)",
|
|
14
|
-
"trismik
|
|
14
|
+
"trismik==1.0.1",
|
|
15
|
+
"ipywidgets>=8.0.0",
|
|
15
16
|
]
|
|
16
17
|
|
|
17
18
|
[project.scripts]
|
|
18
19
|
scorebook = "scorebook.cli.main:main"
|
|
19
20
|
|
|
20
21
|
[tool.poetry]
|
|
21
|
-
version = "0.0.
|
|
22
|
+
version = "0.0.12" # base version
|
|
22
23
|
packages = [{ include = "scorebook", from = "src" }]
|
|
23
24
|
|
|
24
25
|
[tool.poetry.dependencies]
|
|
25
26
|
python = ">=3.9,<3.14"
|
|
26
27
|
datasets = ">=3.6.0"
|
|
27
28
|
notebook = ">=7.4.5,<8.0.0"
|
|
28
|
-
trismik = "
|
|
29
|
+
trismik = "1.0.1"
|
|
30
|
+
ipywidgets = ">=8.0.0"
|
|
29
31
|
|
|
30
32
|
# Optional dependencies
|
|
31
33
|
openai = {version = "*", optional = true}
|
|
@@ -68,6 +70,7 @@ toml = "^0.10.2"
|
|
|
68
70
|
types-pyyaml = "^6.0.12.20250822"
|
|
69
71
|
unasync = {version = "^0.5.0", python = ">=3.9,<4"}
|
|
70
72
|
tomlkit = "^0.13.2"
|
|
73
|
+
detect-secrets = "^1.5.0"
|
|
71
74
|
|
|
72
75
|
[project.optional-dependencies]
|
|
73
76
|
openai = ["openai", "python-dotenv"]
|
|
@@ -83,6 +86,10 @@ build-backend = "poetry.core.masonry.api"
|
|
|
83
86
|
|
|
84
87
|
[tool.pytest.ini_options]
|
|
85
88
|
asyncio_default_fixture_loop_scope = "class"
|
|
89
|
+
markers = [
|
|
90
|
+
"unit: Unit tests that use mocks and don't require external dependencies",
|
|
91
|
+
"integration: Integration tests that may require network access or external services",
|
|
92
|
+
]
|
|
86
93
|
|
|
87
94
|
[tool.black]
|
|
88
95
|
line-length = 100
|
|
@@ -112,16 +119,22 @@ install_types = true
|
|
|
112
119
|
[tool.flake8] # note that this depends on Flake8-pyproject
|
|
113
120
|
ignore = ["D202", "W503", "W504"]
|
|
114
121
|
|
|
115
|
-
[tool.unasync]
|
|
116
122
|
[[tool.unasync.rules]]
|
|
117
123
|
fromdir = "src/scorebook/evaluate/_async/"
|
|
118
124
|
todir = "src/scorebook/evaluate/_sync/"
|
|
125
|
+
replacements."scorebook.score._async.score_async" = "scorebook.score._sync.score"
|
|
126
|
+
replacements."scorebook.score._async" = "scorebook.score._sync"
|
|
127
|
+
replacements.evaluate_async = "evaluate"
|
|
128
|
+
replacements."Asynchronous evaluation complete" = "Synchronous evaluation complete"
|
|
129
|
+
replacements." run_results = asyncio.gather(*[worker(run) for run in runs])" = " run_results = [worker(run) for run in runs]"
|
|
130
|
+
replacements.async_nullcontext = "nullcontext"
|
|
131
|
+
replacements.create_trismik_async_client = "create_trismik_sync_client"
|
|
132
|
+
replacements.score_async = "score"
|
|
119
133
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
"
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
"create_trismik_async_client" = "create_trismik_sync_client"
|
|
134
|
+
[[tool.unasync.rules]]
|
|
135
|
+
fromdir = "src/scorebook/score/_async/"
|
|
136
|
+
todir = "src/scorebook/score/_sync/"
|
|
137
|
+
replacements.score_async = "score"
|
|
138
|
+
replacements."Async scoring complete" = "Scoring complete"
|
|
139
|
+
replacements.calculate_metric_scores_async = "calculate_metric_scores"
|
|
140
|
+
replacements.upload_result_async = "upload_result"
|
|
@@ -9,18 +9,25 @@ import importlib.metadata
|
|
|
9
9
|
# get version from pyproject.toml
|
|
10
10
|
__version__ = importlib.metadata.version(__package__ or __name__)
|
|
11
11
|
|
|
12
|
-
from scorebook.
|
|
12
|
+
from scorebook.eval_datasets import EvalDataset
|
|
13
13
|
from scorebook.evaluate import evaluate, evaluate_async
|
|
14
14
|
from scorebook.inference.inference_pipeline import InferencePipeline
|
|
15
|
-
from scorebook.
|
|
16
|
-
from scorebook.
|
|
15
|
+
from scorebook.score import score, score_async
|
|
16
|
+
from scorebook.trismik.credentials import login, logout, whoami
|
|
17
|
+
from scorebook.trismik.upload_results import upload_result, upload_result_async
|
|
18
|
+
from scorebook.utils.render_template import render_template
|
|
17
19
|
|
|
18
20
|
__all__ = [
|
|
19
21
|
"EvalDataset",
|
|
20
22
|
"evaluate",
|
|
21
23
|
"evaluate_async",
|
|
22
|
-
"
|
|
24
|
+
"score",
|
|
25
|
+
"score_async",
|
|
26
|
+
"render_template",
|
|
23
27
|
"login",
|
|
28
|
+
"logout",
|
|
24
29
|
"whoami",
|
|
25
30
|
"InferencePipeline",
|
|
31
|
+
"upload_result",
|
|
32
|
+
"upload_result_async",
|
|
26
33
|
]
|