deriva-ml 1.17.10__py3-none-any.whl → 1.17.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +43 -1
- deriva_ml/asset/__init__.py +17 -0
- deriva_ml/asset/asset.py +357 -0
- deriva_ml/asset/aux_classes.py +100 -0
- deriva_ml/bump_version.py +254 -11
- deriva_ml/catalog/__init__.py +21 -0
- deriva_ml/catalog/clone.py +1199 -0
- deriva_ml/catalog/localize.py +426 -0
- deriva_ml/core/__init__.py +29 -0
- deriva_ml/core/base.py +817 -1067
- deriva_ml/core/config.py +169 -21
- deriva_ml/core/constants.py +120 -19
- deriva_ml/core/definitions.py +123 -13
- deriva_ml/core/enums.py +47 -73
- deriva_ml/core/ermrest.py +226 -193
- deriva_ml/core/exceptions.py +297 -14
- deriva_ml/core/filespec.py +99 -28
- deriva_ml/core/logging_config.py +225 -0
- deriva_ml/core/mixins/__init__.py +42 -0
- deriva_ml/core/mixins/annotation.py +915 -0
- deriva_ml/core/mixins/asset.py +384 -0
- deriva_ml/core/mixins/dataset.py +237 -0
- deriva_ml/core/mixins/execution.py +408 -0
- deriva_ml/core/mixins/feature.py +365 -0
- deriva_ml/core/mixins/file.py +263 -0
- deriva_ml/core/mixins/path_builder.py +145 -0
- deriva_ml/core/mixins/rid_resolution.py +204 -0
- deriva_ml/core/mixins/vocabulary.py +400 -0
- deriva_ml/core/mixins/workflow.py +322 -0
- deriva_ml/core/validation.py +389 -0
- deriva_ml/dataset/__init__.py +2 -1
- deriva_ml/dataset/aux_classes.py +20 -4
- deriva_ml/dataset/catalog_graph.py +575 -0
- deriva_ml/dataset/dataset.py +1242 -1008
- deriva_ml/dataset/dataset_bag.py +1311 -182
- deriva_ml/dataset/history.py +27 -14
- deriva_ml/dataset/upload.py +225 -38
- deriva_ml/demo_catalog.py +126 -110
- deriva_ml/execution/__init__.py +46 -2
- deriva_ml/execution/base_config.py +639 -0
- deriva_ml/execution/execution.py +543 -242
- deriva_ml/execution/execution_configuration.py +26 -11
- deriva_ml/execution/execution_record.py +592 -0
- deriva_ml/execution/find_caller.py +298 -0
- deriva_ml/execution/model_protocol.py +175 -0
- deriva_ml/execution/multirun_config.py +153 -0
- deriva_ml/execution/runner.py +595 -0
- deriva_ml/execution/workflow.py +223 -34
- deriva_ml/experiment/__init__.py +8 -0
- deriva_ml/experiment/experiment.py +411 -0
- deriva_ml/feature.py +6 -1
- deriva_ml/install_kernel.py +143 -6
- deriva_ml/interfaces.py +862 -0
- deriva_ml/model/__init__.py +99 -0
- deriva_ml/model/annotations.py +1278 -0
- deriva_ml/model/catalog.py +286 -60
- deriva_ml/model/database.py +144 -649
- deriva_ml/model/deriva_ml_database.py +308 -0
- deriva_ml/model/handles.py +14 -0
- deriva_ml/run_model.py +319 -0
- deriva_ml/run_notebook.py +507 -38
- deriva_ml/schema/__init__.py +18 -2
- deriva_ml/schema/annotations.py +62 -33
- deriva_ml/schema/create_schema.py +169 -69
- deriva_ml/schema/validation.py +601 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -4
- deriva_ml-1.17.11.dist-info/RECORD +77 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +1 -0
- deriva_ml/protocols/dataset.py +0 -19
- deriva_ml/test.py +0 -94
- deriva_ml-1.17.10.dist-info/RECORD +0 -45
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Centralized logging configuration for DerivaML.
|
|
2
|
+
|
|
3
|
+
This module provides a consistent logging setup for all DerivaML components.
|
|
4
|
+
It configures the 'deriva_ml' logger namespace and related library loggers
|
|
5
|
+
(deriva-py, bdbag, bagit) without impacting the calling application's
|
|
6
|
+
logging configuration.
|
|
7
|
+
|
|
8
|
+
Key design principles:
|
|
9
|
+
- DerivaML configures only its own logger and related library loggers
|
|
10
|
+
- Never calls logging.basicConfig() to avoid affecting the root logger
|
|
11
|
+
- Respects Hydra's logging configuration when running under Hydra
|
|
12
|
+
- Hydra loggers follow the deriva_ml logging level
|
|
13
|
+
|
|
14
|
+
The module provides:
|
|
15
|
+
- get_logger(): Get the standard DerivaML logger
|
|
16
|
+
- configure_logging(): Set up logging with specified levels
|
|
17
|
+
- LoggerMixin: Mixin class providing _logger attribute
|
|
18
|
+
- is_hydra_initialized(): Check if running under Hydra
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
>>> from deriva_ml.core.logging_config import configure_logging, get_logger
|
|
22
|
+
>>> import logging
|
|
23
|
+
>>>
|
|
24
|
+
>>> configure_logging(level=logging.DEBUG)
|
|
25
|
+
>>> logger = get_logger()
|
|
26
|
+
>>> logger.info("DerivaML initialized")
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
import logging
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
# The standard logger name used throughout DerivaML
|
|
33
|
+
LOGGER_NAME = "deriva_ml"
|
|
34
|
+
|
|
35
|
+
# Default logging format (only used when adding handlers outside Hydra context)
|
|
36
|
+
DEFAULT_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
37
|
+
|
|
38
|
+
# Related library loggers whose levels should follow deriva_level
|
|
39
|
+
# These are libraries that DerivaML uses internally
|
|
40
|
+
DERIVA_LOGGERS = [
|
|
41
|
+
"deriva",
|
|
42
|
+
"bagit",
|
|
43
|
+
"bdbag",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
# Hydra loggers whose levels should follow the deriva_ml level
|
|
47
|
+
HYDRA_LOGGERS = [
|
|
48
|
+
"hydra",
|
|
49
|
+
"hydra.core",
|
|
50
|
+
"hydra.utils",
|
|
51
|
+
"omegaconf",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def is_hydra_initialized() -> bool:
|
|
56
|
+
"""Check if running within an initialized Hydra context.
|
|
57
|
+
|
|
58
|
+
This is used to determine whether Hydra is managing logging configuration.
|
|
59
|
+
When Hydra is initialized, we avoid adding handlers or calling basicConfig
|
|
60
|
+
since Hydra has already configured logging via dictConfig.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
True if Hydra's GlobalHydra is initialized, False otherwise.
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
>>> if is_hydra_initialized():
|
|
67
|
+
... # Hydra is managing logging
|
|
68
|
+
... pass
|
|
69
|
+
"""
|
|
70
|
+
try:
|
|
71
|
+
from hydra.core.global_hydra import GlobalHydra
|
|
72
|
+
|
|
73
|
+
return GlobalHydra.instance().is_initialized()
|
|
74
|
+
except (ImportError, Exception):
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_logger(name: str | None = None) -> logging.Logger:
|
|
79
|
+
"""Get a DerivaML logger.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
name: Optional sub-logger name. If provided, returns a child logger
|
|
83
|
+
under the deriva_ml namespace (e.g., 'deriva_ml.dataset').
|
|
84
|
+
If None, returns the main deriva_ml logger.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The configured logger instance.
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
>>> logger = get_logger() # Main deriva_ml logger
|
|
91
|
+
>>> dataset_logger = get_logger("dataset") # deriva_ml.dataset
|
|
92
|
+
"""
|
|
93
|
+
if name is None:
|
|
94
|
+
return logging.getLogger(LOGGER_NAME)
|
|
95
|
+
return logging.getLogger(f"{LOGGER_NAME}.{name}")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def configure_logging(
|
|
99
|
+
level: int = logging.WARNING,
|
|
100
|
+
deriva_level: int | None = None,
|
|
101
|
+
format_string: str = DEFAULT_FORMAT,
|
|
102
|
+
handler: logging.Handler | None = None,
|
|
103
|
+
) -> logging.Logger:
|
|
104
|
+
"""Configure logging for DerivaML and related libraries.
|
|
105
|
+
|
|
106
|
+
This function sets up logging levels for DerivaML, related libraries
|
|
107
|
+
(deriva-py, bdbag, bagit), and Hydra loggers. It is designed to:
|
|
108
|
+
|
|
109
|
+
1. Configure only specific logger namespaces, not the root logger
|
|
110
|
+
2. Respect Hydra's logging configuration when running under Hydra
|
|
111
|
+
3. Allow deriva-py libraries to have a separate logging level
|
|
112
|
+
|
|
113
|
+
The logging level hierarchy:
|
|
114
|
+
- deriva_ml logger: uses `level`
|
|
115
|
+
- Hydra loggers: follow `level` (deriva_ml level)
|
|
116
|
+
- Deriva/bdbag/bagit loggers: use `deriva_level` (defaults to `level`)
|
|
117
|
+
|
|
118
|
+
When running under Hydra:
|
|
119
|
+
- Only sets log levels on specific loggers
|
|
120
|
+
- Does NOT add handlers (Hydra has already configured them)
|
|
121
|
+
- Does NOT call basicConfig()
|
|
122
|
+
|
|
123
|
+
When running standalone (no Hydra):
|
|
124
|
+
- Sets log levels on specific loggers
|
|
125
|
+
- Adds a StreamHandler to deriva_ml logger if none exists
|
|
126
|
+
- Still does NOT touch the root logger or call basicConfig()
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
level: Log level for deriva_ml and Hydra loggers. Defaults to WARNING.
|
|
130
|
+
deriva_level: Log level for deriva-py libraries (deriva, bagit, bdbag).
|
|
131
|
+
If None, uses the same level as `level`.
|
|
132
|
+
format_string: Format string for log messages (used only when adding
|
|
133
|
+
handlers outside Hydra context).
|
|
134
|
+
handler: Optional handler to add to the deriva_ml logger. If None and
|
|
135
|
+
not running under Hydra, uses StreamHandler with format_string.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
The configured deriva_ml logger.
|
|
139
|
+
|
|
140
|
+
Example:
|
|
141
|
+
>>> import logging
|
|
142
|
+
>>> # Same level for everything
|
|
143
|
+
>>> configure_logging(level=logging.DEBUG)
|
|
144
|
+
>>>
|
|
145
|
+
>>> # Verbose DerivaML, quieter deriva-py libraries
|
|
146
|
+
>>> configure_logging(
|
|
147
|
+
... level=logging.INFO,
|
|
148
|
+
... deriva_level=logging.WARNING,
|
|
149
|
+
... )
|
|
150
|
+
"""
|
|
151
|
+
if deriva_level is None:
|
|
152
|
+
deriva_level = level
|
|
153
|
+
|
|
154
|
+
# Configure main DerivaML logger
|
|
155
|
+
logger = get_logger()
|
|
156
|
+
logger.setLevel(level)
|
|
157
|
+
|
|
158
|
+
# Configure Hydra loggers to follow deriva_ml level
|
|
159
|
+
for logger_name in HYDRA_LOGGERS:
|
|
160
|
+
logging.getLogger(logger_name).setLevel(level)
|
|
161
|
+
|
|
162
|
+
# Configure deriva-py and related library loggers
|
|
163
|
+
for logger_name in DERIVA_LOGGERS:
|
|
164
|
+
logging.getLogger(logger_name).setLevel(deriva_level)
|
|
165
|
+
|
|
166
|
+
# Only add handlers if not running under Hydra
|
|
167
|
+
# Hydra configures handlers via dictConfig, we don't want to duplicate
|
|
168
|
+
if not is_hydra_initialized():
|
|
169
|
+
if not logger.handlers:
|
|
170
|
+
if handler is None:
|
|
171
|
+
handler = logging.StreamHandler()
|
|
172
|
+
handler.setFormatter(logging.Formatter(format_string))
|
|
173
|
+
logger.addHandler(handler)
|
|
174
|
+
|
|
175
|
+
return logger
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def apply_logger_overrides(overrides: dict[str, Any]) -> None:
|
|
179
|
+
"""Apply logger level overrides from a configuration dictionary.
|
|
180
|
+
|
|
181
|
+
This is used for compatibility with deriva's DEFAULT_LOGGER_OVERRIDES
|
|
182
|
+
pattern, allowing fine-grained control over specific loggers.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
overrides: Dictionary mapping logger names to log levels.
|
|
186
|
+
|
|
187
|
+
Example:
|
|
188
|
+
>>> apply_logger_overrides({
|
|
189
|
+
... "deriva": logging.WARNING,
|
|
190
|
+
... "bdbag": logging.ERROR,
|
|
191
|
+
... })
|
|
192
|
+
"""
|
|
193
|
+
for name, level_value in overrides.items():
|
|
194
|
+
logging.getLogger(name).setLevel(level_value)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class LoggerMixin:
|
|
198
|
+
"""Mixin class that provides a _logger attribute.
|
|
199
|
+
|
|
200
|
+
Classes that inherit from this mixin get a _logger property that
|
|
201
|
+
returns a child logger under the deriva_ml namespace, named after
|
|
202
|
+
the class.
|
|
203
|
+
|
|
204
|
+
Example:
|
|
205
|
+
>>> class MyProcessor(LoggerMixin):
|
|
206
|
+
... def process(self):
|
|
207
|
+
... self._logger.info("Processing started")
|
|
208
|
+
...
|
|
209
|
+
>>> # Logs to 'deriva_ml.MyProcessor'
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def _logger(self) -> logging.Logger:
|
|
214
|
+
"""Get the logger for this class."""
|
|
215
|
+
return get_logger(self.__class__.__name__)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
__all__ = [
|
|
219
|
+
"LOGGER_NAME",
|
|
220
|
+
"get_logger",
|
|
221
|
+
"configure_logging",
|
|
222
|
+
"apply_logger_overrides",
|
|
223
|
+
"is_hydra_initialized",
|
|
224
|
+
"LoggerMixin",
|
|
225
|
+
]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Mixins for DerivaML catalog operations.
|
|
2
|
+
|
|
3
|
+
This module provides mixins that can be used to compose catalog-related
|
|
4
|
+
functionality. Each mixin provides a specific set of operations that can
|
|
5
|
+
be mixed into classes that have access to a catalog.
|
|
6
|
+
|
|
7
|
+
Mixins:
|
|
8
|
+
VocabularyMixin: Vocabulary term management (add, lookup, list terms)
|
|
9
|
+
RidResolutionMixin: RID resolution and retrieval
|
|
10
|
+
PathBuilderMixin: Path building and table access utilities
|
|
11
|
+
WorkflowMixin: Workflow management (add, lookup, list, create)
|
|
12
|
+
FeatureMixin: Feature management (create, lookup, delete, list values)
|
|
13
|
+
DatasetMixin: Dataset management (find, create, lookup, delete)
|
|
14
|
+
AssetMixin: Asset management (create, list assets)
|
|
15
|
+
ExecutionMixin: Execution management (create, restore, update status)
|
|
16
|
+
FileMixin: File management (add, list files)
|
|
17
|
+
AnnotationMixin: Annotation management (display, visible-columns, etc.)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from deriva_ml.core.mixins.annotation import AnnotationMixin
|
|
21
|
+
from deriva_ml.core.mixins.asset import AssetMixin
|
|
22
|
+
from deriva_ml.core.mixins.dataset import DatasetMixin
|
|
23
|
+
from deriva_ml.core.mixins.execution import ExecutionMixin
|
|
24
|
+
from deriva_ml.core.mixins.feature import FeatureMixin
|
|
25
|
+
from deriva_ml.core.mixins.file import FileMixin
|
|
26
|
+
from deriva_ml.core.mixins.path_builder import PathBuilderMixin
|
|
27
|
+
from deriva_ml.core.mixins.rid_resolution import RidResolutionMixin
|
|
28
|
+
from deriva_ml.core.mixins.vocabulary import VocabularyMixin
|
|
29
|
+
from deriva_ml.core.mixins.workflow import WorkflowMixin
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"AnnotationMixin",
|
|
33
|
+
"VocabularyMixin",
|
|
34
|
+
"RidResolutionMixin",
|
|
35
|
+
"PathBuilderMixin",
|
|
36
|
+
"WorkflowMixin",
|
|
37
|
+
"FeatureMixin",
|
|
38
|
+
"DatasetMixin",
|
|
39
|
+
"AssetMixin",
|
|
40
|
+
"ExecutionMixin",
|
|
41
|
+
"FileMixin",
|
|
42
|
+
]
|