scorebook 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {scorebook-0.0.10 → scorebook-0.0.12}/PKG-INFO +3 -2
  2. {scorebook-0.0.10 → scorebook-0.0.12}/pyproject.toml +25 -12
  3. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/__init__.py +11 -4
  4. scorebook-0.0.12/src/scorebook/eval_datasets/__init__.py +5 -0
  5. scorebook-0.0.12/src/scorebook/eval_datasets/eval_dataset.py +719 -0
  6. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/_async/evaluate_async.py +135 -130
  7. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/_sync/evaluate.py +135 -131
  8. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/evaluate_helpers.py +46 -23
  9. scorebook-0.0.12/src/scorebook/exceptions.py +106 -0
  10. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/bedrock.py +1 -1
  11. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/portkey.py +1 -1
  12. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/vertex.py +1 -1
  13. scorebook-0.0.12/src/scorebook/score/__init__.py +6 -0
  14. scorebook-0.0.12/src/scorebook/score/_async/__init__.py +0 -0
  15. scorebook-0.0.12/src/scorebook/score/_async/score_async.py +145 -0
  16. scorebook-0.0.12/src/scorebook/score/_sync/__init__.py +0 -0
  17. scorebook-0.0.12/src/scorebook/score/_sync/score.py +145 -0
  18. scorebook-0.0.12/src/scorebook/score/score_helpers.py +207 -0
  19. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/settings.py +3 -0
  20. scorebook-0.0.12/src/scorebook/trismik/upload_results.py +254 -0
  21. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/types.py +36 -54
  22. scorebook-0.0.12/src/scorebook/utils/__init__.py +23 -0
  23. scorebook-0.0.12/src/scorebook/utils/common_helpers.py +41 -0
  24. scorebook-0.0.12/src/scorebook/utils/io_helpers.py +41 -0
  25. scorebook-0.0.12/src/scorebook/utils/progress_bars.py +856 -0
  26. scorebook-0.0.10/src/scorebook/utils/build_prompt.py → scorebook-0.0.12/src/scorebook/utils/render_template.py +13 -12
  27. scorebook-0.0.10/src/scorebook/eval_dataset.py +0 -404
  28. scorebook-0.0.10/src/scorebook/exceptions.py +0 -54
  29. scorebook-0.0.10/src/scorebook/utils/__init__.py +0 -16
  30. scorebook-0.0.10/src/scorebook/utils/io_helpers.py +0 -28
  31. scorebook-0.0.10/src/scorebook/utils/progress_bars.py +0 -107
  32. {scorebook-0.0.10 → scorebook-0.0.12}/LICENSE +0 -0
  33. {scorebook-0.0.10 → scorebook-0.0.12}/README.md +0 -0
  34. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/cli/__init__.py +0 -0
  35. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/cli/auth.py +0 -0
  36. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/cli/main.py +0 -0
  37. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/__init__.py +0 -0
  38. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/_async/__init__.py +0 -0
  39. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/evaluate/_sync/__init__.py +0 -0
  40. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/__init__.py +0 -0
  41. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/__init__.py +0 -0
  42. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/clients/openai.py +0 -0
  43. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/inference/inference_pipeline.py +0 -0
  44. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/__init__.py +0 -0
  45. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/accuracy.py +0 -0
  46. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/metric_base.py +0 -0
  47. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/metric_registry.py +0 -0
  48. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/metrics/precision.py +0 -0
  49. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/trismik/__init__.py +0 -0
  50. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/trismik/credentials.py +0 -0
  51. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/utils/async_utils.py +0 -0
  52. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/utils/jinja_helpers.py +0 -0
  53. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/utils/mappers.py +0 -0
  54. {scorebook-0.0.10 → scorebook-0.0.12}/src/scorebook/utils/transform_helpers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scorebook
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: A Python project for LLM evaluation.
5
5
  License-File: LICENSE
6
6
  Author: Euan Campbell
@@ -23,6 +23,7 @@ Requires-Dist: datasets (>=3.6.0)
23
23
  Requires-Dist: fsspec[gcs] ; extra == "vertex"
24
24
  Requires-Dist: google-cloud-storage ; extra == "vertex"
25
25
  Requires-Dist: google-genai ; extra == "vertex"
26
+ Requires-Dist: ipywidgets (>=8.0.0)
26
27
  Requires-Dist: notebook (>=7.4.5,<8.0.0)
27
28
  Requires-Dist: notebook ; extra == "examples"
28
29
  Requires-Dist: openai ; extra == "openai"
@@ -36,7 +37,7 @@ Requires-Dist: torch ; extra == "examples"
36
37
  Requires-Dist: torchaudio ; extra == "examples"
37
38
  Requires-Dist: torchvision ; extra == "examples"
38
39
  Requires-Dist: transformers ; extra == "examples"
39
- Requires-Dist: trismik (>=1.0.1,<2.0.0)
40
+ Requires-Dist: trismik (==1.0.1)
40
41
  Description-Content-Type: text/markdown
41
42
 
42
43
  # Scorebook
@@ -11,21 +11,23 @@ requires-python = ">=3.9, <3.14"
11
11
  dependencies = [
12
12
  "datasets>=3.6.0",
13
13
  "notebook (>=7.4.5,<8.0.0)",
14
- "trismik (>=1.0.1, <2.0.0)",
14
+ "trismik==1.0.1",
15
+ "ipywidgets>=8.0.0",
15
16
  ]
16
17
 
17
18
  [project.scripts]
18
19
  scorebook = "scorebook.cli.main:main"
19
20
 
20
21
  [tool.poetry]
21
- version = "0.0.10" # base version
22
+ version = "0.0.12" # base version
22
23
  packages = [{ include = "scorebook", from = "src" }]
23
24
 
24
25
  [tool.poetry.dependencies]
25
26
  python = ">=3.9,<3.14"
26
27
  datasets = ">=3.6.0"
27
28
  notebook = ">=7.4.5,<8.0.0"
28
- trismik = ">=1.0.1,<2.0.0"
29
+ trismik = "1.0.1"
30
+ ipywidgets = ">=8.0.0"
29
31
 
30
32
  # Optional dependencies
31
33
  openai = {version = "*", optional = true}
@@ -68,6 +70,7 @@ toml = "^0.10.2"
68
70
  types-pyyaml = "^6.0.12.20250822"
69
71
  unasync = {version = "^0.5.0", python = ">=3.9,<4"}
70
72
  tomlkit = "^0.13.2"
73
+ detect-secrets = "^1.5.0"
71
74
 
72
75
  [project.optional-dependencies]
73
76
  openai = ["openai", "python-dotenv"]
@@ -83,6 +86,10 @@ build-backend = "poetry.core.masonry.api"
83
86
 
84
87
  [tool.pytest.ini_options]
85
88
  asyncio_default_fixture_loop_scope = "class"
89
+ markers = [
90
+ "unit: Unit tests that use mocks and don't require external dependencies",
91
+ "integration: Integration tests that may require network access or external services",
92
+ ]
86
93
 
87
94
  [tool.black]
88
95
  line-length = 100
@@ -112,16 +119,22 @@ install_types = true
112
119
  [tool.flake8] # note that this depends on Flake8-pyproject
113
120
  ignore = ["D202", "W503", "W504"]
114
121
 
115
- [tool.unasync]
116
122
  [[tool.unasync.rules]]
117
123
  fromdir = "src/scorebook/evaluate/_async/"
118
124
  todir = "src/scorebook/evaluate/_sync/"
125
+ replacements."scorebook.score._async.score_async" = "scorebook.score._sync.score"
126
+ replacements."scorebook.score._async" = "scorebook.score._sync"
127
+ replacements.evaluate_async = "evaluate"
128
+ replacements."Asynchronous evaluation complete" = "Synchronous evaluation complete"
129
+ replacements." run_results = asyncio.gather(*[worker(run) for run in runs])" = " run_results = [worker(run) for run in runs]"
130
+ replacements.async_nullcontext = "nullcontext"
131
+ replacements.create_trismik_async_client = "create_trismik_sync_client"
132
+ replacements.score_async = "score"
119
133
 
120
-
121
- # Custom replacements beyond default async/await transformations
122
- [tool.unasync.rules.replacements]
123
- "evaluate_async" = "evaluate"
124
- "Asynchronous evaluation complete" = "Synchronous evaluation complete"
125
- " run_results = asyncio.gather(*[worker(run) for run in runs])" = " run_results = [worker(run) for run in runs]"
126
- "async_nullcontext" = "nullcontext"
127
- "create_trismik_async_client" = "create_trismik_sync_client"
134
+ [[tool.unasync.rules]]
135
+ fromdir = "src/scorebook/score/_async/"
136
+ todir = "src/scorebook/score/_sync/"
137
+ replacements.score_async = "score"
138
+ replacements."Async scoring complete" = "Scoring complete"
139
+ replacements.calculate_metric_scores_async = "calculate_metric_scores"
140
+ replacements.upload_result_async = "upload_result"
@@ -9,18 +9,25 @@ import importlib.metadata
9
9
  # get version from pyproject.toml
10
10
  __version__ = importlib.metadata.version(__package__ or __name__)
11
11
 
12
- from scorebook.eval_dataset import EvalDataset
12
+ from scorebook.eval_datasets import EvalDataset
13
13
  from scorebook.evaluate import evaluate, evaluate_async
14
14
  from scorebook.inference.inference_pipeline import InferencePipeline
15
- from scorebook.trismik.credentials import login, whoami
16
- from scorebook.utils.build_prompt import build_prompt
15
+ from scorebook.score import score, score_async
16
+ from scorebook.trismik.credentials import login, logout, whoami
17
+ from scorebook.trismik.upload_results import upload_result, upload_result_async
18
+ from scorebook.utils.render_template import render_template
17
19
 
18
20
  __all__ = [
19
21
  "EvalDataset",
20
22
  "evaluate",
21
23
  "evaluate_async",
22
- "build_prompt",
24
+ "score",
25
+ "score_async",
26
+ "render_template",
23
27
  "login",
28
+ "logout",
24
29
  "whoami",
25
30
  "InferencePipeline",
31
+ "upload_result",
32
+ "upload_result_async",
26
33
  ]
@@ -0,0 +1,5 @@
1
+ """Dataset utilities for scorebook."""
2
+
3
+ from scorebook.eval_datasets.eval_dataset import EvalDataset
4
+
5
+ __all__ = ["EvalDataset"]