colliderml 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. colliderml-0.1.1/LICENSE +21 -0
  2. colliderml-0.1.1/MANIFEST.in +8 -0
  3. colliderml-0.1.1/PKG-INFO +138 -0
  4. colliderml-0.1.1/README.md +94 -0
  5. colliderml-0.1.1/colliderml/__init__.py +8 -0
  6. colliderml-0.1.1/colliderml/cli.py +108 -0
  7. colliderml-0.1.1/colliderml/core/__init__.py +7 -0
  8. colliderml-0.1.1/colliderml/core/data/__init__.py +21 -0
  9. colliderml-0.1.1/colliderml/core/data/config.py +104 -0
  10. colliderml-0.1.1/colliderml/core/data/dataset.py +45 -0
  11. colliderml-0.1.1/colliderml/core/data/manifest.py +139 -0
  12. colliderml-0.1.1/colliderml/core/get.py +45 -0
  13. colliderml-0.1.1/colliderml/core/io/__init__.py +5 -0
  14. colliderml-0.1.1/colliderml/core/io/downloader.py +226 -0
  15. colliderml-0.1.1/colliderml/utils/__init__.py +3 -0
  16. colliderml-0.1.1/colliderml.egg-info/PKG-INFO +138 -0
  17. colliderml-0.1.1/colliderml.egg-info/SOURCES.txt +41 -0
  18. colliderml-0.1.1/colliderml.egg-info/dependency_links.txt +1 -0
  19. colliderml-0.1.1/colliderml.egg-info/entry_points.txt +2 -0
  20. colliderml-0.1.1/colliderml.egg-info/requires.txt +14 -0
  21. colliderml-0.1.1/colliderml.egg-info/top_level.txt +1 -0
  22. colliderml-0.1.1/docs/.vitepress/cache/deps/chunk-VJWGEPT5.js +12542 -0
  23. colliderml-0.1.1/docs/.vitepress/cache/deps/chunk-VJWGEPT5.js.map +7 -0
  24. colliderml-0.1.1/docs/.vitepress/cache/deps/vitepress___@vue_devtools-api.js +163 -0
  25. colliderml-0.1.1/docs/.vitepress/cache/deps/vitepress___@vue_devtools-api.js.map +7 -0
  26. colliderml-0.1.1/docs/.vitepress/cache/deps/vitepress___@vueuse_core.js +9188 -0
  27. colliderml-0.1.1/docs/.vitepress/cache/deps/vitepress___@vueuse_core.js.map +7 -0
  28. colliderml-0.1.1/docs/.vitepress/cache/deps/vue.js +343 -0
  29. colliderml-0.1.1/docs/.vitepress/cache/deps/vue.js.map +7 -0
  30. colliderml-0.1.1/docs/.vitepress/components/AboutData.vue +88 -0
  31. colliderml-0.1.1/docs/.vitepress/components/DataConfig.vue +862 -0
  32. colliderml-0.1.1/docs/.vitepress/config.ts +78 -0
  33. colliderml-0.1.1/docs/.vitepress/shims-vue.d.ts +5 -0
  34. colliderml-0.1.1/docs/.vitepress/theme/custom.css +21 -0
  35. colliderml-0.1.1/docs/.vitepress/theme/index.ts +14 -0
  36. colliderml-0.1.1/docs/guide/introduction.md +48 -0
  37. colliderml-0.1.1/docs/index.md +60 -0
  38. colliderml-0.1.1/setup.cfg +33 -0
  39. colliderml-0.1.1/setup.py +50 -0
  40. colliderml-0.1.1/tests/core/io/test_downloader.py +145 -0
  41. colliderml-0.1.1/tests/core/io/test_downloader_integration.py +119 -0
  42. colliderml-0.1.1/tests/test_downloader.py +65 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 ColliderML Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,8 @@
1
+ include LICENSE
2
+ include README.md
3
+ include setup.cfg
4
+ recursive-include tests *
5
+ recursive-include docs *
6
+ global-exclude *.py[cod]
7
+ global-exclude __pycache__
8
+ global-exclude *.so
@@ -0,0 +1,138 @@
1
+ Metadata-Version: 2.2
2
+ Name: colliderml
3
+ Version: 0.1.1
4
+ Summary: A modern machine learning library for high-energy physics data analysis
5
+ Home-page: https://github.com/murnanedaniel/colliderml
6
+ Author: Daniel Murnane
7
+ Author-email: dtmurnane@lbl.gov
8
+ Project-URL: Documentation, https://murnanedaniel.github.io/colliderml
9
+ Project-URL: Source, https://github.com/murnanedaniel/colliderml
10
+ Project-URL: Issues, https://github.com/murnanedaniel/colliderml/issues
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Topic :: Scientific/Engineering :: Physics
17
+ Requires-Python: >=3.10,<3.12
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: requests>=2.31.0
21
+ Requires-Dist: tqdm>=4.66.0
22
+ Requires-Dist: numpy>=1.24.0
23
+ Requires-Dist: pydantic>=2.5.0
24
+ Requires-Dist: h5py>=3.10.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
27
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
28
+ Requires-Dist: black>=23.11.0; extra == "dev"
29
+ Requires-Dist: ruff>=0.1.6; extra == "dev"
30
+ Requires-Dist: mypy>=1.7.0; extra == "dev"
31
+ Requires-Dist: mkdocs-material>=9.4.0; extra == "dev"
32
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "dev"
33
+ Dynamic: author
34
+ Dynamic: author-email
35
+ Dynamic: classifier
36
+ Dynamic: description
37
+ Dynamic: description-content-type
38
+ Dynamic: home-page
39
+ Dynamic: project-url
40
+ Dynamic: provides-extra
41
+ Dynamic: requires-dist
42
+ Dynamic: requires-python
43
+ Dynamic: summary
44
+
45
+ # ColliderML
46
+
47
+ [![Tests](https://github.com/murnanedaniel/colliderml/actions/workflows/tests.yml/badge.svg)](https://github.com/murnanedaniel/colliderml/actions/workflows/tests.yml)
48
+ ![Coverage](./coverage.svg)
49
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
50
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
51
+
52
+ A modern machine learning library for high-energy physics data analysis.
53
+
54
+ ## Features
55
+
56
+ - Efficient parallel data downloading with resume capability
57
+ - Support for common HEP data formats
58
+ - Machine learning utilities for particle physics
59
+ - Visualization tools for physics data
60
+
61
+ ## Installation
62
+
63
+ ### For Users
64
+ ```bash
65
+ # Create and activate environment
66
+ conda create -n collider-env python=3.11 # 3.10 or 3.11 recommended
67
+ conda activate collider-env
68
+
69
+ # Install package
70
+ pip install colliderml
71
+ ```
72
+
73
+ ### For Developers
74
+ ```bash
75
+ # Create and activate environment
76
+ conda create -n collider-dev python=3.11 # 3.10 or 3.11 recommended
77
+ conda activate collider-dev
78
+
79
+ # Clone repository
80
+ git clone https://github.com/murnanedaniel/colliderml.git
81
+ cd colliderml
82
+
83
+ # Install in development mode with extra dependencies
84
+ pip install -e ".[dev]"
85
+ ```
86
+
87
+ ## Quick Start
88
+
89
+ ```python
90
+ from colliderml.core.data.manifest import ManifestClient
91
+ from colliderml.core.io import DataDownloader
92
+
93
+ manifest = ManifestClient()
94
+ files = manifest.select_files(campaign=None, datasets=["ttbar"], objects=["tracks"], max_events=1000)
95
+
96
+ downloader = DataDownloader()
97
+ results = downloader.download_files([f.path for f in files], local_dir="data", max_workers=4, resume=True)
98
+
99
+ for path, result in results.items():
100
+ print(path, result.success, result.error)
101
+ ```
102
+
103
+ ### Features
104
+
105
+ - **Manifest-driven**: Always selects files from the latest portal manifest
106
+ - **Parallel Downloads**: Download multiple files concurrently
107
+ - **Resume Capability**: Optionally resume interrupted downloads
108
+ - **Progress Tracking**: Real-time progress bars
109
+ - **Clear Errors**: Helpful failure messages and HEAD checks
110
+
111
+ ## Development
112
+
113
+ 1. Activate your environment:
114
+ ```bash
115
+ conda activate collider-dev
116
+ ```
117
+
118
+ 2. Run tests:
119
+ ```bash
120
+ # Run unit tests only
121
+ pytest -v -m "not integration"
122
+
123
+ # Run all tests including integration tests
124
+ pytest -v
125
+
126
+ # Run with coverage report
127
+ pytest --cov=colliderml
128
+ ```
129
+
130
+ 3. Build documentation:
131
+ ```bash
132
+ mkdocs build
133
+ mkdocs serve # View at http://127.0.0.1:8000
134
+ ```
135
+
136
+ ## License
137
+
138
+ [MIT License](LICENSE)
@@ -0,0 +1,94 @@
1
+ # ColliderML
2
+
3
+ [![Tests](https://github.com/murnanedaniel/colliderml/actions/workflows/tests.yml/badge.svg)](https://github.com/murnanedaniel/colliderml/actions/workflows/tests.yml)
4
+ ![Coverage](./coverage.svg)
5
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
7
+
8
+ A modern machine learning library for high-energy physics data analysis.
9
+
10
+ ## Features
11
+
12
+ - Efficient parallel data downloading with resume capability
13
+ - Support for common HEP data formats
14
+ - Machine learning utilities for particle physics
15
+ - Visualization tools for physics data
16
+
17
+ ## Installation
18
+
19
+ ### For Users
20
+ ```bash
21
+ # Create and activate environment
22
+ conda create -n collider-env python=3.11 # 3.10 or 3.11 recommended
23
+ conda activate collider-env
24
+
25
+ # Install package
26
+ pip install colliderml
27
+ ```
28
+
29
+ ### For Developers
30
+ ```bash
31
+ # Create and activate environment
32
+ conda create -n collider-dev python=3.11 # 3.10 or 3.11 recommended
33
+ conda activate collider-dev
34
+
35
+ # Clone repository
36
+ git clone https://github.com/murnanedaniel/colliderml.git
37
+ cd colliderml
38
+
39
+ # Install in development mode with extra dependencies
40
+ pip install -e ".[dev]"
41
+ ```
42
+
43
+ ## Quick Start
44
+
45
+ ```python
46
+ from colliderml.core.data.manifest import ManifestClient
47
+ from colliderml.core.io import DataDownloader
48
+
49
+ manifest = ManifestClient()
50
+ files = manifest.select_files(campaign=None, datasets=["ttbar"], objects=["tracks"], max_events=1000)
51
+
52
+ downloader = DataDownloader()
53
+ results = downloader.download_files([f.path for f in files], local_dir="data", max_workers=4, resume=True)
54
+
55
+ for path, result in results.items():
56
+ print(path, result.success, result.error)
57
+ ```
58
+
59
+ ### Features
60
+
61
+ - **Manifest-driven**: Always selects files from the latest portal manifest
62
+ - **Parallel Downloads**: Download multiple files concurrently
63
+ - **Resume Capability**: Optionally resume interrupted downloads
64
+ - **Progress Tracking**: Real-time progress bars
65
+ - **Clear Errors**: Helpful failure messages and HEAD checks
66
+
67
+ ## Development
68
+
69
+ 1. Activate your environment:
70
+ ```bash
71
+ conda activate collider-dev
72
+ ```
73
+
74
+ 2. Run tests:
75
+ ```bash
76
+ # Run unit tests only
77
+ pytest -v -m "not integration"
78
+
79
+ # Run all tests including integration tests
80
+ pytest -v
81
+
82
+ # Run with coverage report
83
+ pytest --cov=colliderml
84
+ ```
85
+
86
+ 3. Build documentation:
87
+ ```bash
88
+ mkdocs build
89
+ mkdocs serve # View at http://127.0.0.1:8000
90
+ ```
91
+
92
+ ## License
93
+
94
+ [MIT License](LICENSE)
@@ -0,0 +1,8 @@
1
+ """ColliderML: A modern machine learning library for high-energy physics data analysis."""
2
+
3
+ __version__ = "0.1.1"
4
+
5
+ from . import core
6
+ from . import utils
7
+
8
+ __all__ = ["core", "utils"]
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env python3
2
+ """ColliderML command line interface."""
3
+
4
+ import argparse
5
+ import sys
6
+ from pathlib import Path
7
+ from colliderml.core.io import DataDownloader
8
+ from colliderml.core.data.manifest import ManifestClient
9
+
10
+
11
+ def get(args):
12
+ """Handle the get command (manifest-driven)."""
13
+ downloader = DataDownloader()
14
+ manifest = ManifestClient()
15
+
16
+ # Determine campaign
17
+ campaign = args.campaign if args.campaign and args.campaign != "default" else None
18
+
19
+ # Parse lists
20
+ datasets = args.datasets.split(',') if args.datasets else None
21
+ objects = args.objects.split(',') if args.objects else None
22
+
23
+ # Select files
24
+ try:
25
+ files = manifest.select_files(
26
+ campaign=campaign,
27
+ datasets=datasets,
28
+ objects=objects,
29
+ max_events=args.events,
30
+ version=args.version,
31
+ )
32
+ except Exception as e:
33
+ print(f"\nError reading manifest: {e}")
34
+ sys.exit(1)
35
+
36
+ if not files:
37
+ print("No files matched the selection from the manifest.")
38
+ sys.exit(0)
39
+
40
+ print("\nGet Configuration:")
41
+ print(f"Campaign: {args.campaign or 'default'}")
42
+ print(f"Version: {args.version or 'dataset defaults'}")
43
+ print(f"Datasets: {', '.join(datasets) if datasets else 'ALL'}")
44
+ print(f"Objects: {', '.join(objects) if objects else 'ALL'}")
45
+ print(f"Requested events: {args.events if args.events else 'ALL'}")
46
+ print(f"Output directory: {args.output_dir}")
47
+
48
+ # Download
49
+ remote_paths = [f.path for f in files]
50
+ results = downloader.download_files(
51
+ remote_paths=remote_paths,
52
+ local_dir=args.output_dir,
53
+ max_workers=args.workers,
54
+ resume=not args.no_resume,
55
+ )
56
+
57
+ successful = [r for r in results.values() if r.success]
58
+ failed = [r for r in results.values() if not r.success]
59
+
60
+ print("\nGet Summary:")
61
+ print(f"Total files: {len(results)}")
62
+ print(f"Successful: {len(successful)}")
63
+ print(f"Failed: {len(failed)}")
64
+ if failed:
65
+ print("\nFailed downloads:")
66
+ for path, result in results.items():
67
+ if not result.success:
68
+ print(f"✗ {path}: {result.error}")
69
+ sys.exit(1)
70
+
71
+
72
+ def main():
73
+ """Main entry point."""
74
+ parser = argparse.ArgumentParser(description="ColliderML command line interface")
75
+ subparsers = parser.add_subparsers(dest='command', help='Command to run')
76
+
77
+ # Get command (manifest-driven)
78
+ get_parser = subparsers.add_parser('get', help='Get files using manifest selection')
79
+ get_parser.add_argument('-c', '--campaign', type=str, default='default',
80
+ help='Campaign name (or "default" to use manifest default)')
81
+ get_parser.add_argument('-d', '--datasets', type=str,
82
+ help='Comma-separated list of datasets (e.g. ttbar,qcd)')
83
+ get_parser.add_argument('-o', '--objects', type=str,
84
+ help='Comma-separated list of objects (e.g. tracks,hits)')
85
+ get_parser.add_argument('-e', '--events', type=int, default=None,
86
+ help='Max number of events to download (across selection)')
87
+ get_parser.add_argument('-O', '--output-dir', '--output_dir', dest='output_dir', type=str, default='data',
88
+ help='Directory to save downloaded files')
89
+ get_parser.add_argument('-w', '--workers', type=int, default=4,
90
+ help='Number of parallel downloads')
91
+ get_parser.add_argument('--no-resume', '--no_resume', dest='no_resume', action='store_true',
92
+ help='Disable resuming partial downloads')
93
+ get_parser.add_argument('-v', '--version', type=str, default=None,
94
+ help='Dataset version to use (overrides dataset default_version)')
95
+
96
+ args = parser.parse_args()
97
+
98
+ if args.command == 'get':
99
+ get(args)
100
+ else:
101
+ parser.print_help()
102
+ sys.exit(1)
103
+
104
+
105
+ if __name__ == '__main__':
106
+ main()
107
+
108
+
@@ -0,0 +1,7 @@
1
+ """Core functionality for ColliderML."""
2
+
3
+ from . import io
4
+ from . import data
5
+ from .get import get
6
+
7
+ __all__ = ["io", "data", "get"]
@@ -0,0 +1,21 @@
1
+ """Data handling functionality for ColliderML."""
2
+
3
+ from .dataset import Dataset
4
+ from .config import (
5
+ PileupLevel,
6
+ DataType,
7
+ OBJECT_CONFIGS,
8
+ VALID_PROCESSES,
9
+ BASE_URL,
10
+ MANIFEST_URL,
11
+ )
12
+
13
+ __all__ = [
14
+ "Dataset",
15
+ "PileupLevel",
16
+ "DataType",
17
+ "OBJECT_CONFIGS",
18
+ "VALID_PROCESSES",
19
+ "BASE_URL",
20
+ "MANIFEST_URL",
21
+ ]
@@ -0,0 +1,104 @@
1
+ """Configuration and lightweight enums/constants for ColliderML.
2
+
3
+ This module provides base URLs and minimal legacy constants while we migrate
4
+ to a manifest-driven data selection model.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from enum import Enum
10
+ from typing import Dict, Set
11
+
12
+
13
+ # Base URL configuration
14
+ BASE_URL: str = "https://portal.nersc.gov/cfs/m4958/ColliderML/"
15
+ MANIFEST_URL: str = f"{BASE_URL}manifest.json"
16
+
17
+ # Downloader base URLs list (kept for backward compatibility)
18
+ DEFAULT_URLS = [BASE_URL]
19
+
20
+
21
+ # Legacy-style enums (kept to avoid breaking existing imports/tests)
22
+ class PileupLevel(str, Enum):
23
+ SINGLE = "single-particle"
24
+ LOW = "pileup-10"
25
+ HIGH = "pileup-200"
26
+
27
+
28
+ class DataType(str, Enum):
29
+ RECO = "reco"
30
+ TRUTH = "truth"
31
+ MEASUREMENTS = "measurements"
32
+
33
+
34
+ # Minimal object configuration placeholder to satisfy existing imports
35
+ class _ObjectConfig:
36
+ def __init__(self, data_type: DataType):
37
+ self.data_type = data_type
38
+
39
+
40
+ # Representative objects; the authoritative list comes from the manifest
41
+ OBJECT_CONFIGS: Dict[str, _ObjectConfig] = {
42
+ "tracks": _ObjectConfig(DataType.RECO),
43
+ "particle_flow": _ObjectConfig(DataType.RECO),
44
+ "particles": _ObjectConfig(DataType.TRUTH),
45
+ "tracker_hits": _ObjectConfig(DataType.MEASUREMENTS),
46
+ }
47
+
48
+
49
+ # Representative physics processes; authoritative set comes from the manifest
50
+ VALID_PROCESSES: Set[str] = {"ttbar", "wjets", "zjets", "susy", "higgs", "qcd", "exotics"}
51
+
52
+
53
+ # Legacy constants used by older CLI/tests; superseded by manifest data
54
+ EVENTS_PER_FILE: int = 1000
55
+
56
+ # Optional dataset sizes for summary planning (will be superseded by manifest totals)
57
+ DATASET_SIZES: Dict[str, int] = {
58
+ "ttbar": 100_000,
59
+ "qcd": 100_000,
60
+ }
61
+
62
+
63
+ def get_object_path(
64
+ pileup: PileupLevel,
65
+ process: str,
66
+ object_name: str,
67
+ start_event: int,
68
+ end_event: int,
69
+ ) -> str:
70
+ """Build a legacy-style relative path for an object file.
71
+
72
+ This is maintained only for backward compatibility with existing tests and
73
+ scripts. New code should select paths from the manifest instead.
74
+ """
75
+
76
+ # Very lightweight validation to match test expectations
77
+ if process not in VALID_PROCESSES:
78
+ raise ValueError("Invalid process")
79
+ if object_name not in OBJECT_CONFIGS:
80
+ raise ValueError("Invalid object type")
81
+
82
+ # Use a fixed version segment to satisfy test path format
83
+ version = "v1"
84
+ data_type = OBJECT_CONFIGS[object_name].data_type.value
85
+ pileup_str = pileup.value if isinstance(pileup, PileupLevel) else str(pileup)
86
+ filename = f"{pileup_str}.{process}.{version}.{data_type}.{object_name}.events{start_event}-{end_event}.h5"
87
+ rel_path = f"{pileup_str}/{process}/{version}/{data_type}/{object_name}/{filename}"
88
+ return rel_path
89
+
90
+
91
+ __all__ = [
92
+ "BASE_URL",
93
+ "MANIFEST_URL",
94
+ "DEFAULT_URLS",
95
+ "PileupLevel",
96
+ "DataType",
97
+ "OBJECT_CONFIGS",
98
+ "VALID_PROCESSES",
99
+ "EVENTS_PER_FILE",
100
+ "DATASET_SIZES",
101
+ "get_object_path",
102
+ ]
103
+
104
+
@@ -0,0 +1,45 @@
1
+ """Dataset class for handling HEP data."""
2
+
3
+ from typing import Optional, List, Dict, Any
4
+ from pathlib import Path
5
+
6
+ class Dataset:
7
+ """Base class for handling HEP datasets."""
8
+
9
+ def __init__(self, name: str, files: List[str]):
10
+ """Initialize a dataset.
11
+
12
+ Args:
13
+ name: Name of the dataset.
14
+ files: List of file paths in the dataset.
15
+ """
16
+ self.name = name
17
+ self.files = files
18
+ self._metadata: Dict[str, Any] = {}
19
+
20
+ def __len__(self) -> int:
21
+ """Get the number of files in the dataset."""
22
+ return len(self.files)
23
+
24
+ def add_metadata(self, key: str, value: Any) -> None:
25
+ """Add metadata to the dataset.
26
+
27
+ Args:
28
+ key: Metadata key.
29
+ value: Metadata value.
30
+ """
31
+ self._metadata[key] = value
32
+
33
+ def get_metadata(self, key: str) -> Any:
34
+ """Get metadata from the dataset.
35
+
36
+ Args:
37
+ key: Metadata key.
38
+
39
+ Returns:
40
+ The metadata value.
41
+
42
+ Raises:
43
+ KeyError: If the key doesn't exist.
44
+ """
45
+ return self._metadata[key]