deriva-ml 1.17.9__py3-none-any.whl → 1.17.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +43 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +21 -0
  7. deriva_ml/catalog/clone.py +1199 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +817 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +186 -105
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +545 -244
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +224 -35
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -5
  67. deriva_ml-1.17.11.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +2 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.9.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,225 @@
1
+ """Centralized logging configuration for DerivaML.
2
+
3
+ This module provides a consistent logging setup for all DerivaML components.
4
+ It configures the 'deriva_ml' logger namespace and related library loggers
5
+ (deriva-py, bdbag, bagit) without impacting the calling application's
6
+ logging configuration.
7
+
8
+ Key design principles:
9
+ - DerivaML configures only its own logger and related library loggers
10
+ - Never calls logging.basicConfig() to avoid affecting the root logger
11
+ - Respects Hydra's logging configuration when running under Hydra
12
+ - Hydra loggers follow the deriva_ml logging level
13
+
14
+ The module provides:
15
+ - get_logger(): Get the standard DerivaML logger
16
+ - configure_logging(): Set up logging with specified levels
17
+ - LoggerMixin: Mixin class providing _logger attribute
18
+ - is_hydra_initialized(): Check if running under Hydra
19
+
20
+ Example:
21
+ >>> from deriva_ml.core.logging_config import configure_logging, get_logger
22
+ >>> import logging
23
+ >>>
24
+ >>> configure_logging(level=logging.DEBUG)
25
+ >>> logger = get_logger()
26
+ >>> logger.info("DerivaML initialized")
27
+ """
28
+
29
+ import logging
30
+ from typing import Any
31
+
32
+ # The standard logger name used throughout DerivaML
33
+ LOGGER_NAME = "deriva_ml"
34
+
35
+ # Default logging format (only used when adding handlers outside Hydra context)
36
+ DEFAULT_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
37
+
38
+ # Related library loggers whose levels should follow deriva_level
39
+ # These are libraries that DerivaML uses internally
40
+ DERIVA_LOGGERS = [
41
+ "deriva",
42
+ "bagit",
43
+ "bdbag",
44
+ ]
45
+
46
+ # Hydra loggers whose levels should follow the deriva_ml level
47
+ HYDRA_LOGGERS = [
48
+ "hydra",
49
+ "hydra.core",
50
+ "hydra.utils",
51
+ "omegaconf",
52
+ ]
53
+
54
+
55
+ def is_hydra_initialized() -> bool:
56
+ """Check if running within an initialized Hydra context.
57
+
58
+ This is used to determine whether Hydra is managing logging configuration.
59
+ When Hydra is initialized, we avoid adding handlers or calling basicConfig
60
+ since Hydra has already configured logging via dictConfig.
61
+
62
+ Returns:
63
+ True if Hydra's GlobalHydra is initialized, False otherwise.
64
+
65
+ Example:
66
+ >>> if is_hydra_initialized():
67
+ ... # Hydra is managing logging
68
+ ... pass
69
+ """
70
+ try:
71
+ from hydra.core.global_hydra import GlobalHydra
72
+
73
+ return GlobalHydra.instance().is_initialized()
74
+ except (ImportError, Exception):
75
+ return False
76
+
77
+
78
+ def get_logger(name: str | None = None) -> logging.Logger:
79
+ """Get a DerivaML logger.
80
+
81
+ Args:
82
+ name: Optional sub-logger name. If provided, returns a child logger
83
+ under the deriva_ml namespace (e.g., 'deriva_ml.dataset').
84
+ If None, returns the main deriva_ml logger.
85
+
86
+ Returns:
87
+ The configured logger instance.
88
+
89
+ Example:
90
+ >>> logger = get_logger() # Main deriva_ml logger
91
+ >>> dataset_logger = get_logger("dataset") # deriva_ml.dataset
92
+ """
93
+ if name is None:
94
+ return logging.getLogger(LOGGER_NAME)
95
+ return logging.getLogger(f"{LOGGER_NAME}.{name}")
96
+
97
+
98
+ def configure_logging(
99
+ level: int = logging.WARNING,
100
+ deriva_level: int | None = None,
101
+ format_string: str = DEFAULT_FORMAT,
102
+ handler: logging.Handler | None = None,
103
+ ) -> logging.Logger:
104
+ """Configure logging for DerivaML and related libraries.
105
+
106
+ This function sets up logging levels for DerivaML, related libraries
107
+ (deriva-py, bdbag, bagit), and Hydra loggers. It is designed to:
108
+
109
+ 1. Configure only specific logger namespaces, not the root logger
110
+ 2. Respect Hydra's logging configuration when running under Hydra
111
+ 3. Allow deriva-py libraries to have a separate logging level
112
+
113
+ The logging level hierarchy:
114
+ - deriva_ml logger: uses `level`
115
+ - Hydra loggers: follow `level` (deriva_ml level)
116
+ - Deriva/bdbag/bagit loggers: use `deriva_level` (defaults to `level`)
117
+
118
+ When running under Hydra:
119
+ - Only sets log levels on specific loggers
120
+ - Does NOT add handlers (Hydra has already configured them)
121
+ - Does NOT call basicConfig()
122
+
123
+ When running standalone (no Hydra):
124
+ - Sets log levels on specific loggers
125
+ - Adds a StreamHandler to deriva_ml logger if none exists
126
+ - Still does NOT touch the root logger or call basicConfig()
127
+
128
+ Args:
129
+ level: Log level for deriva_ml and Hydra loggers. Defaults to WARNING.
130
+ deriva_level: Log level for deriva-py libraries (deriva, bagit, bdbag).
131
+ If None, uses the same level as `level`.
132
+ format_string: Format string for log messages (used only when adding
133
+ handlers outside Hydra context).
134
+ handler: Optional handler to add to the deriva_ml logger. If None and
135
+ not running under Hydra, uses StreamHandler with format_string.
136
+
137
+ Returns:
138
+ The configured deriva_ml logger.
139
+
140
+ Example:
141
+ >>> import logging
142
+ >>> # Same level for everything
143
+ >>> configure_logging(level=logging.DEBUG)
144
+ >>>
145
+ >>> # Verbose DerivaML, quieter deriva-py libraries
146
+ >>> configure_logging(
147
+ ... level=logging.INFO,
148
+ ... deriva_level=logging.WARNING,
149
+ ... )
150
+ """
151
+ if deriva_level is None:
152
+ deriva_level = level
153
+
154
+ # Configure main DerivaML logger
155
+ logger = get_logger()
156
+ logger.setLevel(level)
157
+
158
+ # Configure Hydra loggers to follow deriva_ml level
159
+ for logger_name in HYDRA_LOGGERS:
160
+ logging.getLogger(logger_name).setLevel(level)
161
+
162
+ # Configure deriva-py and related library loggers
163
+ for logger_name in DERIVA_LOGGERS:
164
+ logging.getLogger(logger_name).setLevel(deriva_level)
165
+
166
+ # Only add handlers if not running under Hydra
167
+ # Hydra configures handlers via dictConfig, we don't want to duplicate
168
+ if not is_hydra_initialized():
169
+ if not logger.handlers:
170
+ if handler is None:
171
+ handler = logging.StreamHandler()
172
+ handler.setFormatter(logging.Formatter(format_string))
173
+ logger.addHandler(handler)
174
+
175
+ return logger
176
+
177
+
178
+ def apply_logger_overrides(overrides: dict[str, Any]) -> None:
179
+ """Apply logger level overrides from a configuration dictionary.
180
+
181
+ This is used for compatibility with deriva's DEFAULT_LOGGER_OVERRIDES
182
+ pattern, allowing fine-grained control over specific loggers.
183
+
184
+ Args:
185
+ overrides: Dictionary mapping logger names to log levels.
186
+
187
+ Example:
188
+ >>> apply_logger_overrides({
189
+ ... "deriva": logging.WARNING,
190
+ ... "bdbag": logging.ERROR,
191
+ ... })
192
+ """
193
+ for name, level_value in overrides.items():
194
+ logging.getLogger(name).setLevel(level_value)
195
+
196
+
197
+ class LoggerMixin:
198
+ """Mixin class that provides a _logger attribute.
199
+
200
+ Classes that inherit from this mixin get a _logger property that
201
+ returns a child logger under the deriva_ml namespace, named after
202
+ the class.
203
+
204
+ Example:
205
+ >>> class MyProcessor(LoggerMixin):
206
+ ... def process(self):
207
+ ... self._logger.info("Processing started")
208
+ ...
209
+ >>> # Logs to 'deriva_ml.MyProcessor'
210
+ """
211
+
212
+ @property
213
+ def _logger(self) -> logging.Logger:
214
+ """Get the logger for this class."""
215
+ return get_logger(self.__class__.__name__)
216
+
217
+
218
+ __all__ = [
219
+ "LOGGER_NAME",
220
+ "get_logger",
221
+ "configure_logging",
222
+ "apply_logger_overrides",
223
+ "is_hydra_initialized",
224
+ "LoggerMixin",
225
+ ]
@@ -0,0 +1,42 @@
1
+ """Mixins for DerivaML catalog operations.
2
+
3
+ This module provides mixins that can be used to compose catalog-related
4
+ functionality. Each mixin provides a specific set of operations that can
5
+ be mixed into classes that have access to a catalog.
6
+
7
+ Mixins:
8
+ VocabularyMixin: Vocabulary term management (add, lookup, list terms)
9
+ RidResolutionMixin: RID resolution and retrieval
10
+ PathBuilderMixin: Path building and table access utilities
11
+ WorkflowMixin: Workflow management (add, lookup, list, create)
12
+ FeatureMixin: Feature management (create, lookup, delete, list values)
13
+ DatasetMixin: Dataset management (find, create, lookup, delete)
14
+ AssetMixin: Asset management (create, list assets)
15
+ ExecutionMixin: Execution management (create, restore, update status)
16
+ FileMixin: File management (add, list files)
17
+ AnnotationMixin: Annotation management (display, visible-columns, etc.)
18
+ """
19
+
20
+ from deriva_ml.core.mixins.annotation import AnnotationMixin
21
+ from deriva_ml.core.mixins.asset import AssetMixin
22
+ from deriva_ml.core.mixins.dataset import DatasetMixin
23
+ from deriva_ml.core.mixins.execution import ExecutionMixin
24
+ from deriva_ml.core.mixins.feature import FeatureMixin
25
+ from deriva_ml.core.mixins.file import FileMixin
26
+ from deriva_ml.core.mixins.path_builder import PathBuilderMixin
27
+ from deriva_ml.core.mixins.rid_resolution import RidResolutionMixin
28
+ from deriva_ml.core.mixins.vocabulary import VocabularyMixin
29
+ from deriva_ml.core.mixins.workflow import WorkflowMixin
30
+
31
+ __all__ = [
32
+ "AnnotationMixin",
33
+ "VocabularyMixin",
34
+ "RidResolutionMixin",
35
+ "PathBuilderMixin",
36
+ "WorkflowMixin",
37
+ "FeatureMixin",
38
+ "DatasetMixin",
39
+ "AssetMixin",
40
+ "ExecutionMixin",
41
+ "FileMixin",
42
+ ]