scorebook 0.0.4__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {scorebook-0.0.4 → scorebook-0.0.5}/PKG-INFO +2 -1
  2. {scorebook-0.0.4 → scorebook-0.0.5}/pyproject.toml +9 -1
  3. scorebook-0.0.5/src/scorebook/__init__.py +18 -0
  4. scorebook-0.0.5/src/scorebook/cli/__init__.py +1 -0
  5. scorebook-0.0.5/src/scorebook/cli/auth.py +98 -0
  6. scorebook-0.0.5/src/scorebook/cli/main.py +57 -0
  7. {scorebook-0.0.4/src/scorebook/types → scorebook-0.0.5/src/scorebook}/eval_dataset.py +38 -0
  8. scorebook-0.0.5/src/scorebook/evaluate.py +531 -0
  9. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/inference/openai.py +23 -1
  10. scorebook-0.0.5/src/scorebook/trismik/__init__.py +6 -0
  11. scorebook-0.0.5/src/scorebook/trismik/adaptive_testing_service.py +141 -0
  12. scorebook-0.0.5/src/scorebook/trismik/login.py +120 -0
  13. scorebook-0.0.5/src/scorebook/types.py +165 -0
  14. scorebook-0.0.4/src/scorebook/__init__.py +0 -16
  15. scorebook-0.0.4/src/scorebook/evaluator.py +0 -379
  16. scorebook-0.0.4/src/scorebook/types/__init__.py +0 -12
  17. scorebook-0.0.4/src/scorebook/types/eval_result.py +0 -133
  18. scorebook-0.0.4/src/scorebook/types/eval_run_spec.py +0 -28
  19. scorebook-0.0.4/src/scorebook/utils/logging_utils.py +0 -1
  20. {scorebook-0.0.4 → scorebook-0.0.5}/LICENSE +0 -0
  21. {scorebook-0.0.4 → scorebook-0.0.5}/README.md +0 -0
  22. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/exceptions.py +0 -0
  23. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/inference/__init__.py +0 -0
  24. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/inference/bedrock.py +0 -0
  25. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/inference/portkey.py +0 -0
  26. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/inference/vertex.py +0 -0
  27. {scorebook-0.0.4/src/scorebook/types → scorebook-0.0.5/src/scorebook}/inference_pipeline.py +0 -0
  28. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/metrics/__init__.py +0 -0
  29. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/metrics/accuracy.py +0 -0
  30. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/metrics/metric_base.py +0 -0
  31. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/metrics/metric_registry.py +0 -0
  32. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/metrics/precision.py +0 -0
  33. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/utils/__init__.py +0 -0
  34. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/utils/async_utils.py +0 -0
  35. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/utils/build_prompt.py +0 -0
  36. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/utils/io_helpers.py +0 -0
  37. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/utils/jinja_helpers.py +0 -0
  38. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/utils/mappers.py +0 -0
  39. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/utils/progress_bars.py +0 -0
  40. {scorebook-0.0.4 → scorebook-0.0.5}/src/scorebook/utils/transform_helpers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: scorebook
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: A Python project for LLM evaluation.
5
5
  Author: Euan Campbell
6
6
  Author-email: euan@trismik.com
@@ -35,6 +35,7 @@ Requires-Dist: torch ; extra == "examples"
35
35
  Requires-Dist: torchaudio ; extra == "examples"
36
36
  Requires-Dist: torchvision ; extra == "examples"
37
37
  Requires-Dist: transformers ; extra == "examples"
38
+ Requires-Dist: trismik
38
39
  Description-Content-Type: text/markdown
39
40
 
40
41
  # Scorebook
@@ -11,12 +11,20 @@ requires-python = ">=3.9"
11
11
  dependencies = [
12
12
  "datasets>=3.6.0",
13
13
  "notebook (>=7.4.5,<8.0.0)",
14
+ "trismik",
14
15
  ]
15
16
 
17
+ [project.scripts]
18
+ scorebook = "scorebook.cli.main:main"
19
+
16
20
  [tool.poetry]
17
- version = "0.0.4" # base version
21
+ version = "0.0.5" # base version
18
22
  packages = [{ include = "scorebook", from = "src" }]
19
23
 
24
+ [[tool.poetry.source]]
25
+ name = "testpypi"
26
+ url = "https://test.pypi.org/simple/"
27
+ priority = "supplemental"
20
28
 
21
29
  [tool.poetry.group.dev.dependencies]
22
30
  pytest = "^8.3.2"
@@ -0,0 +1,18 @@
1
+ """
2
+ Scorebook package.
3
+
4
+ A Python project for scorebook functionality.
5
+ """
6
+
7
+ import importlib.metadata
8
+
9
+ # get version from pyproject.toml
10
+ __version__ = importlib.metadata.version(__package__ or __name__)
11
+
12
+ from scorebook.eval_dataset import EvalDataset
13
+ from scorebook.evaluate import evaluate
14
+ from scorebook.inference_pipeline import InferencePipeline
15
+ from scorebook.trismik.login import login, whoami
16
+ from scorebook.utils.build_prompt import build_prompt
17
+
18
+ __all__ = ["EvalDataset", "evaluate", "build_prompt", "login", "whoami", "InferencePipeline"]
@@ -0,0 +1 @@
1
+ """CLI module for scorebook."""
@@ -0,0 +1,98 @@
1
+ """Authentication CLI commands."""
2
+
3
+ import argparse
4
+ import getpass
5
+ import sys
6
+
7
+ from scorebook.trismik.login import get_stored_token, get_token_path, login, logout, whoami
8
+
9
+
10
+ def auth_command(args: argparse.Namespace) -> int:
11
+ """Handle auth subcommands."""
12
+ if args.auth_command == "login":
13
+ return login_command(args)
14
+ elif args.auth_command == "logout":
15
+ return logout_command(args)
16
+ elif args.auth_command == "whoami":
17
+ return whoami_command(args)
18
+ else:
19
+ print(
20
+ "Error: No auth command specified. Use 'login', 'logout', or 'whoami'.", file=sys.stderr
21
+ )
22
+ return 1
23
+
24
+
25
+ def login_command(args: argparse.Namespace) -> int:
26
+ """Handle login command."""
27
+ try:
28
+ token = args.token
29
+
30
+ if not token:
31
+ # Check if we're already logged in
32
+ stored_token = get_stored_token()
33
+
34
+ if stored_token:
35
+ print("You are already logged in.")
36
+ overwrite = (
37
+ input("Do you want to overwrite the existing token? (y/N): ").lower().strip()
38
+ )
39
+ if overwrite not in ("y", "yes"):
40
+ print("Login cancelled.")
41
+ return 0
42
+
43
+ # Prompt for token securely
44
+ print("Enter your Trismik API token:")
45
+ print("(You can find your token at: https://trismik.com/settings/tokens)")
46
+ token = getpass.getpass("Token: ").strip()
47
+
48
+ if not token:
49
+ print("Error: No token provided.", file=sys.stderr)
50
+ return 1
51
+
52
+ # Login
53
+ login(token)
54
+
55
+ # Success message
56
+ print(f"Successfully logged in! Token saved to {get_token_path()}")
57
+ return 0
58
+
59
+ except ValueError as e:
60
+ print(f"Error: {e}", file=sys.stderr)
61
+ return 1
62
+ except KeyboardInterrupt:
63
+ print("\nLogin cancelled.")
64
+ return 130
65
+ except Exception as e:
66
+ print(f"Unexpected error: {e}", file=sys.stderr)
67
+ return 1
68
+
69
+
70
+ def logout_command(args: argparse.Namespace) -> int:
71
+ """Handle logout command."""
72
+ try:
73
+ success = logout()
74
+ if success:
75
+ print("Successfully logged out!")
76
+ else:
77
+ print("Not currently logged in.")
78
+ return 0
79
+ except Exception as e:
80
+ print(f"Error during logout: {e}", file=sys.stderr)
81
+ return 1
82
+
83
+
84
+ def whoami_command(args: argparse.Namespace) -> int:
85
+ """Handle whoami command."""
86
+ try:
87
+ token = whoami()
88
+ if token is None:
89
+ print("Not logged in. Run 'scorebook auth login' first.")
90
+ return 1
91
+ else:
92
+ # TODO: Make actual API call to get user info
93
+ # For now, just confirm we have a token
94
+ print(f"Logged in with token: {token[:8]}...")
95
+ return 0
96
+ except Exception as e:
97
+ print(f"Error checking login status: {e}", file=sys.stderr)
98
+ return 1
@@ -0,0 +1,57 @@
1
+ """Main CLI entry point for scorebook."""
2
+
3
+ import argparse
4
+ import sys
5
+ from typing import List, Optional
6
+
7
+ from .auth import auth_command
8
+
9
+
10
+ def create_parser() -> argparse.ArgumentParser:
11
+ """Create the main argument parser."""
12
+ parser = argparse.ArgumentParser(
13
+ prog="scorebook",
14
+ description="Scorebook CLI - A Python project for LLM evaluation",
15
+ )
16
+
17
+ # Add subcommands
18
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
19
+
20
+ # Auth subcommand
21
+ auth_parser = subparsers.add_parser("auth", help="Authentication commands")
22
+ auth_subparsers = auth_parser.add_subparsers(dest="auth_command", help="Auth commands")
23
+
24
+ # Auth login
25
+ login_parser = auth_subparsers.add_parser("login", help="Login to scorebook")
26
+ login_parser.add_argument("--token", help="API token to use for login")
27
+
28
+ # Auth logout
29
+ auth_subparsers.add_parser("logout", help="Logout from scorebook")
30
+
31
+ # Auth whoami
32
+ auth_subparsers.add_parser("whoami", help="Show current login status")
33
+
34
+ return parser
35
+
36
+
37
+ def main(argv: Optional[List[str]] = None) -> int:
38
+ """Run the main CLI entry point."""
39
+ parser = create_parser()
40
+ args = parser.parse_args(argv)
41
+
42
+ try:
43
+ if args.command == "auth":
44
+ return auth_command(args)
45
+ else:
46
+ parser.print_help()
47
+ return 1
48
+ except KeyboardInterrupt:
49
+ print("\nOperation cancelled.")
50
+ return 130
51
+ except Exception as e:
52
+ print(f"Error: {e}", file=sys.stderr)
53
+ return 1
54
+
55
+
56
+ if __name__ == "__main__":
57
+ sys.exit(main())
@@ -2,6 +2,7 @@
2
2
 
3
3
  import csv
4
4
  import json
5
+ import random
5
6
  from typing import Any, Dict, Iterator, List, Optional, Type, Union
6
7
 
7
8
  import yaml
@@ -364,3 +365,40 @@ class EvalDataset:
364
365
  resolved.append(MetricRegistry.get(m)) # Use registry for str or class
365
366
 
366
367
  return resolved
368
+
369
+ def sample(self, sample_size: int) -> "EvalDataset":
370
+ """Create a new dataset with randomly sampled items from this dataset.
371
+
372
+ Args:
373
+ sample_size: The number of items to sample from the dataset
374
+
375
+ Returns:
376
+ A new EvalDataset with randomly sampled items
377
+
378
+ Raises:
379
+ ValueError: If sample_size is larger than the dataset size
380
+ """
381
+ dataset_size = len(self.items)
382
+
383
+ if sample_size > dataset_size:
384
+ raise ValueError(
385
+ f"Sample size {sample_size} is larger than dataset size {dataset_size} "
386
+ f"for dataset '{self.name}'"
387
+ )
388
+
389
+ # Create randomly sampled items
390
+ sampled_items = random.sample(self.items, sample_size)
391
+
392
+ # Create a new EvalDataset instance with sampled items using from_list
393
+ sampled_dataset = self.from_list(
394
+ name=self.name,
395
+ label=self.label,
396
+ metrics=self.metrics,
397
+ data=sampled_items,
398
+ )
399
+
400
+ # Preserve the prompt template if it exists
401
+ if self.prompt_template is not None:
402
+ sampled_dataset.prompt_template = self.prompt_template
403
+
404
+ return sampled_dataset