deriva-ml 1.17.9__py3-none-any.whl → 1.17.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +43 -1
- deriva_ml/asset/__init__.py +17 -0
- deriva_ml/asset/asset.py +357 -0
- deriva_ml/asset/aux_classes.py +100 -0
- deriva_ml/bump_version.py +254 -11
- deriva_ml/catalog/__init__.py +21 -0
- deriva_ml/catalog/clone.py +1199 -0
- deriva_ml/catalog/localize.py +426 -0
- deriva_ml/core/__init__.py +29 -0
- deriva_ml/core/base.py +817 -1067
- deriva_ml/core/config.py +169 -21
- deriva_ml/core/constants.py +120 -19
- deriva_ml/core/definitions.py +123 -13
- deriva_ml/core/enums.py +47 -73
- deriva_ml/core/ermrest.py +226 -193
- deriva_ml/core/exceptions.py +297 -14
- deriva_ml/core/filespec.py +99 -28
- deriva_ml/core/logging_config.py +225 -0
- deriva_ml/core/mixins/__init__.py +42 -0
- deriva_ml/core/mixins/annotation.py +915 -0
- deriva_ml/core/mixins/asset.py +384 -0
- deriva_ml/core/mixins/dataset.py +237 -0
- deriva_ml/core/mixins/execution.py +408 -0
- deriva_ml/core/mixins/feature.py +365 -0
- deriva_ml/core/mixins/file.py +263 -0
- deriva_ml/core/mixins/path_builder.py +145 -0
- deriva_ml/core/mixins/rid_resolution.py +204 -0
- deriva_ml/core/mixins/vocabulary.py +400 -0
- deriva_ml/core/mixins/workflow.py +322 -0
- deriva_ml/core/validation.py +389 -0
- deriva_ml/dataset/__init__.py +2 -1
- deriva_ml/dataset/aux_classes.py +20 -4
- deriva_ml/dataset/catalog_graph.py +575 -0
- deriva_ml/dataset/dataset.py +1242 -1008
- deriva_ml/dataset/dataset_bag.py +1311 -182
- deriva_ml/dataset/history.py +27 -14
- deriva_ml/dataset/upload.py +225 -38
- deriva_ml/demo_catalog.py +186 -105
- deriva_ml/execution/__init__.py +46 -2
- deriva_ml/execution/base_config.py +639 -0
- deriva_ml/execution/execution.py +545 -244
- deriva_ml/execution/execution_configuration.py +26 -11
- deriva_ml/execution/execution_record.py +592 -0
- deriva_ml/execution/find_caller.py +298 -0
- deriva_ml/execution/model_protocol.py +175 -0
- deriva_ml/execution/multirun_config.py +153 -0
- deriva_ml/execution/runner.py +595 -0
- deriva_ml/execution/workflow.py +224 -35
- deriva_ml/experiment/__init__.py +8 -0
- deriva_ml/experiment/experiment.py +411 -0
- deriva_ml/feature.py +6 -1
- deriva_ml/install_kernel.py +143 -6
- deriva_ml/interfaces.py +862 -0
- deriva_ml/model/__init__.py +99 -0
- deriva_ml/model/annotations.py +1278 -0
- deriva_ml/model/catalog.py +286 -60
- deriva_ml/model/database.py +144 -649
- deriva_ml/model/deriva_ml_database.py +308 -0
- deriva_ml/model/handles.py +14 -0
- deriva_ml/run_model.py +319 -0
- deriva_ml/run_notebook.py +507 -38
- deriva_ml/schema/__init__.py +18 -2
- deriva_ml/schema/annotations.py +62 -33
- deriva_ml/schema/create_schema.py +169 -69
- deriva_ml/schema/validation.py +601 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -5
- deriva_ml-1.17.11.dist-info/RECORD +77 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +2 -0
- deriva_ml/protocols/dataset.py +0 -19
- deriva_ml/test.py +0 -94
- deriva_ml-1.17.9.dist-info/RECORD +0 -45
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,639 @@
|
|
|
1
|
+
"""Base configuration for DerivaML applications.
|
|
2
|
+
|
|
3
|
+
This module defines the base configuration and helper functions that simplify
|
|
4
|
+
creating hydra-zen configurations for both script execution and notebooks.
|
|
5
|
+
|
|
6
|
+
Simple Usage (notebooks using only BaseConfig fields):
|
|
7
|
+
# In configs/my_notebook.py
|
|
8
|
+
from deriva_ml.execution import notebook_config
|
|
9
|
+
|
|
10
|
+
notebook_config(
|
|
11
|
+
"my_notebook",
|
|
12
|
+
defaults={"assets": "my_assets", "datasets": "my_dataset"},
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# In notebook
|
|
16
|
+
from deriva_ml.execution import run_notebook
|
|
17
|
+
ml, execution, config = run_notebook("my_notebook")
|
|
18
|
+
|
|
19
|
+
Advanced Usage (notebooks with custom parameters):
|
|
20
|
+
# In configs/my_analysis.py
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from deriva_ml.execution import BaseConfig, notebook_config
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class MyAnalysisConfig(BaseConfig):
|
|
26
|
+
threshold: float = 0.5
|
|
27
|
+
num_samples: int = 100
|
|
28
|
+
|
|
29
|
+
notebook_config(
|
|
30
|
+
"my_analysis",
|
|
31
|
+
config_class=MyAnalysisConfig,
|
|
32
|
+
defaults={"assets": "analysis_assets"},
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# In notebook
|
|
36
|
+
from deriva_ml.execution import run_notebook
|
|
37
|
+
ml, execution, config = run_notebook("my_analysis")
|
|
38
|
+
print(config.threshold) # 0.5
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import importlib
|
|
42
|
+
import json
|
|
43
|
+
import os
|
|
44
|
+
import pkgutil
|
|
45
|
+
from dataclasses import dataclass, field
|
|
46
|
+
from pathlib import Path
|
|
47
|
+
from typing import Any, TypeVar, TYPE_CHECKING
|
|
48
|
+
|
|
49
|
+
from hydra_zen import builds, instantiate, launch, make_config, store
|
|
50
|
+
|
|
51
|
+
if TYPE_CHECKING:
|
|
52
|
+
from deriva_ml import DerivaML
|
|
53
|
+
from deriva_ml.execution import Execution
|
|
54
|
+
|
|
55
|
+
T = TypeVar("T")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Standard hydra defaults for DerivaML applications.
|
|
59
|
+
# Projects can customize these or define their own defaults.
|
|
60
|
+
base_defaults = [
|
|
61
|
+
"_self_",
|
|
62
|
+
{"deriva_ml": "default_deriva"},
|
|
63
|
+
{"datasets": "default_dataset"},
|
|
64
|
+
{"assets": "default_asset"},
|
|
65
|
+
{"workflow": "default_workflow"},
|
|
66
|
+
{"model_config": "default_model"},
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class BaseConfig:
|
|
72
|
+
"""Base configuration for DerivaML applications.
|
|
73
|
+
|
|
74
|
+
This dataclass defines the common configuration structure shared by
|
|
75
|
+
both script execution and notebook modes. Project-specific configs
|
|
76
|
+
should inherit from this class to get the standard DerivaML fields.
|
|
77
|
+
|
|
78
|
+
Note:
|
|
79
|
+
Fields use ``Any`` type annotations because several DerivaML types
|
|
80
|
+
(DerivaMLConfig, DatasetSpec) are Pydantic models which are not
|
|
81
|
+
compatible with OmegaConf structured configs. The actual types at
|
|
82
|
+
runtime are documented below.
|
|
83
|
+
|
|
84
|
+
Attributes:
|
|
85
|
+
deriva_ml: DerivaML connection configuration (DerivaMLConfig at runtime).
|
|
86
|
+
datasets: List of dataset specifications (list[DatasetSpec] at runtime).
|
|
87
|
+
assets: List of asset RIDs to load (list[str] at runtime).
|
|
88
|
+
dry_run: If True, skip catalog writes (for testing/debugging).
|
|
89
|
+
description: Human-readable description of this run.
|
|
90
|
+
config_choices: Dictionary mapping config group names to selected config names.
|
|
91
|
+
This is automatically populated by get_notebook_configuration() with the
|
|
92
|
+
Hydra runtime choices (e.g., {"model_config": "cifar10_quick", "assets": "roc_quick"}).
|
|
93
|
+
Useful for tracking which configurations were used in an execution.
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
>>> from dataclasses import dataclass
|
|
97
|
+
>>> from deriva_ml.execution import BaseConfig
|
|
98
|
+
>>>
|
|
99
|
+
>>> @dataclass
|
|
100
|
+
... class MyConfig(BaseConfig):
|
|
101
|
+
... learning_rate: float = 0.001
|
|
102
|
+
... epochs: int = 10
|
|
103
|
+
"""
|
|
104
|
+
deriva_ml: Any = None
|
|
105
|
+
datasets: Any = field(default_factory=list)
|
|
106
|
+
assets: Any = field(default_factory=list)
|
|
107
|
+
dry_run: bool = False
|
|
108
|
+
description: str = ""
|
|
109
|
+
config_choices: dict[str, str] = field(default_factory=dict)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# Create and register the base config with hydra-zen store.
|
|
113
|
+
# This provides a ready-to-use base that experiments can inherit from.
|
|
114
|
+
DerivaBaseConfig = builds(
|
|
115
|
+
BaseConfig,
|
|
116
|
+
populate_full_signature=True,
|
|
117
|
+
hydra_defaults=base_defaults,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
store(DerivaBaseConfig, name="deriva_base")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def get_notebook_configuration(
|
|
124
|
+
config_class: type[T],
|
|
125
|
+
config_name: str,
|
|
126
|
+
overrides: list[str] | None = None,
|
|
127
|
+
job_name: str = "notebook",
|
|
128
|
+
version_base: str = "1.3",
|
|
129
|
+
) -> T:
|
|
130
|
+
"""Load and return a hydra-zen configuration for use in notebooks.
|
|
131
|
+
|
|
132
|
+
This function is the notebook equivalent of `run_model`. While `run_model`
|
|
133
|
+
launches a full execution with model training, `get_notebook_configuration`
|
|
134
|
+
simply resolves the configuration and returns it for interactive use.
|
|
135
|
+
|
|
136
|
+
The function handles:
|
|
137
|
+
- Adding configurations to the hydra store
|
|
138
|
+
- Launching hydra-zen to resolve defaults and overrides
|
|
139
|
+
- Returning the instantiated configuration object
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
config_class: The hydra-zen builds() class for the configuration.
|
|
143
|
+
This should be a class created with `builds(YourConfig, ...)`.
|
|
144
|
+
config_name: Name of the configuration in the hydra store.
|
|
145
|
+
Must match the name used when calling `store(config_class, name=...)`.
|
|
146
|
+
overrides: Optional list of Hydra override strings (e.g., ["param=value"]).
|
|
147
|
+
job_name: Name for the Hydra job (default: "notebook").
|
|
148
|
+
version_base: Hydra version base (default: "1.3").
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
The instantiated configuration object with all defaults resolved.
|
|
152
|
+
|
|
153
|
+
Example:
|
|
154
|
+
In your notebook's configuration module (e.g., `configs/roc_analysis.py`):
|
|
155
|
+
|
|
156
|
+
>>> from dataclasses import dataclass, field
|
|
157
|
+
>>> from hydra_zen import builds, store
|
|
158
|
+
>>> from deriva_ml.execution import BaseConfig
|
|
159
|
+
>>>
|
|
160
|
+
>>> @dataclass
|
|
161
|
+
... class ROCAnalysisConfig(BaseConfig):
|
|
162
|
+
... execution_rids: list[str] = field(default_factory=list)
|
|
163
|
+
>>>
|
|
164
|
+
>>> ROCAnalysisConfigBuilds = builds(
|
|
165
|
+
... ROCAnalysisConfig,
|
|
166
|
+
... populate_full_signature=True,
|
|
167
|
+
... hydra_defaults=["_self_", {"deriva_ml": "default_deriva"}],
|
|
168
|
+
... )
|
|
169
|
+
>>> store(ROCAnalysisConfigBuilds, name="roc_analysis")
|
|
170
|
+
|
|
171
|
+
In your notebook:
|
|
172
|
+
|
|
173
|
+
>>> from configs import load_all_configs
|
|
174
|
+
>>> from configs.roc_analysis import ROCAnalysisConfigBuilds
|
|
175
|
+
>>> from deriva_ml.execution import get_notebook_configuration
|
|
176
|
+
>>>
|
|
177
|
+
>>> # Load all project configs into hydra store
|
|
178
|
+
>>> load_all_configs()
|
|
179
|
+
>>>
|
|
180
|
+
>>> # Get resolved configuration
|
|
181
|
+
>>> config = get_notebook_configuration(
|
|
182
|
+
... ROCAnalysisConfigBuilds,
|
|
183
|
+
... config_name="roc_analysis",
|
|
184
|
+
... overrides=["execution_rids=[3JRC,3KT0]"],
|
|
185
|
+
... )
|
|
186
|
+
>>>
|
|
187
|
+
>>> # Use the configuration
|
|
188
|
+
>>> print(config.execution_rids) # ['3JRC', '3KT0']
|
|
189
|
+
>>> print(config.deriva_ml.hostname) # From default_deriva config
|
|
190
|
+
|
|
191
|
+
Environment Variables:
|
|
192
|
+
DERIVA_ML_HYDRA_OVERRIDES: JSON-encoded list of override strings.
|
|
193
|
+
When running via `deriva-ml-run-notebook`, this is automatically
|
|
194
|
+
set from command-line arguments. Overrides from this environment
|
|
195
|
+
variable are applied first, then any overrides passed directly
|
|
196
|
+
to this function are applied (taking precedence).
|
|
197
|
+
"""
|
|
198
|
+
# Ensure configs are in the hydra store
|
|
199
|
+
store.add_to_hydra_store(overwrite_ok=True)
|
|
200
|
+
|
|
201
|
+
# Collect overrides from environment variable (set by run_notebook CLI)
|
|
202
|
+
env_overrides_json = os.environ.get("DERIVA_ML_HYDRA_OVERRIDES")
|
|
203
|
+
env_overrides = json.loads(env_overrides_json) if env_overrides_json else []
|
|
204
|
+
|
|
205
|
+
# Merge overrides: env overrides first, then explicit overrides (higher precedence)
|
|
206
|
+
all_overrides = env_overrides + (overrides or [])
|
|
207
|
+
|
|
208
|
+
# Variable to capture choices from within the task function
|
|
209
|
+
captured_choices: dict[str, str] = {}
|
|
210
|
+
|
|
211
|
+
# Define a task function that instantiates and returns the config
|
|
212
|
+
# The cfg from launch() is an OmegaConf DictConfig, so we need to
|
|
213
|
+
# use hydra_zen.instantiate() to convert it to actual Python objects
|
|
214
|
+
def return_instantiated_config(cfg: Any) -> T:
|
|
215
|
+
nonlocal captured_choices
|
|
216
|
+
# Capture the Hydra runtime choices (which config names were selected)
|
|
217
|
+
# Filter out None values (some Hydra internal groups have None choices)
|
|
218
|
+
try:
|
|
219
|
+
from hydra.core.hydra_config import HydraConfig
|
|
220
|
+
choices = HydraConfig.get().runtime.choices
|
|
221
|
+
captured_choices = {k: v for k, v in choices.items() if v is not None}
|
|
222
|
+
except Exception:
|
|
223
|
+
# If HydraConfig is not available, leave choices empty
|
|
224
|
+
pass
|
|
225
|
+
return instantiate(cfg)
|
|
226
|
+
|
|
227
|
+
# Launch hydra-zen to resolve the configuration
|
|
228
|
+
result = launch(
|
|
229
|
+
config_class,
|
|
230
|
+
return_instantiated_config,
|
|
231
|
+
version_base=version_base,
|
|
232
|
+
config_name=config_name,
|
|
233
|
+
job_name=job_name,
|
|
234
|
+
overrides=all_overrides,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Inject the captured choices into the config object
|
|
238
|
+
config = result.return_value
|
|
239
|
+
if hasattr(config, "config_choices"):
|
|
240
|
+
config.config_choices = captured_choices
|
|
241
|
+
|
|
242
|
+
return config
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# ---------------------------------------------------------------------------
|
|
246
|
+
# Registry for notebook configurations
|
|
247
|
+
# ---------------------------------------------------------------------------
|
|
248
|
+
# Maps config_name -> (config_builds_class, config_name)
|
|
249
|
+
_notebook_configs: dict[str, tuple[Any, str]] = {}
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def notebook_config(
|
|
253
|
+
name: str,
|
|
254
|
+
config_class: type[BaseConfig] | None = None,
|
|
255
|
+
defaults: dict[str, str] | None = None,
|
|
256
|
+
**field_defaults: Any,
|
|
257
|
+
) -> Any:
|
|
258
|
+
"""Register a notebook configuration with simplified syntax.
|
|
259
|
+
|
|
260
|
+
This is the recommended way to create notebook configurations. It handles
|
|
261
|
+
all the hydra-zen boilerplate (builds, store, defaults) automatically.
|
|
262
|
+
|
|
263
|
+
For simple notebooks that only use BaseConfig fields (deriva_ml, datasets,
|
|
264
|
+
assets, etc.), just specify which defaults to use. For notebooks with
|
|
265
|
+
custom parameters, provide a config_class that inherits from BaseConfig.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
name: Configuration name. Used both as the hydra config name and
|
|
269
|
+
to look up the config in run_notebook().
|
|
270
|
+
config_class: Optional dataclass inheriting from BaseConfig. If None,
|
|
271
|
+
uses BaseConfig directly (suitable for notebooks that only need
|
|
272
|
+
the standard fields).
|
|
273
|
+
defaults: Dict mapping config group names to config names. These
|
|
274
|
+
override the base defaults. Common groups:
|
|
275
|
+
- "deriva_ml": Connection config (e.g., "default_deriva", "eye_ai")
|
|
276
|
+
- "datasets": Dataset config (e.g., "cifar10_training")
|
|
277
|
+
- "assets": Asset config (e.g., "model_weights")
|
|
278
|
+
- "workflow": Workflow config (e.g., "default_workflow")
|
|
279
|
+
**field_defaults: Default values for fields in config_class.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
The hydra-zen builds() class, in case you need to reference it directly.
|
|
283
|
+
|
|
284
|
+
Examples:
|
|
285
|
+
Simple notebook using only standard fields:
|
|
286
|
+
|
|
287
|
+
# configs/roc_analysis.py
|
|
288
|
+
from deriva_ml.execution import notebook_config
|
|
289
|
+
|
|
290
|
+
notebook_config(
|
|
291
|
+
"roc_analysis",
|
|
292
|
+
defaults={"assets": "roc_comparison_probabilities"},
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
Notebook with custom parameters:
|
|
296
|
+
|
|
297
|
+
# configs/training_analysis.py
|
|
298
|
+
from dataclasses import dataclass
|
|
299
|
+
from deriva_ml.execution import BaseConfig, notebook_config
|
|
300
|
+
|
|
301
|
+
@dataclass
|
|
302
|
+
class TrainingAnalysisConfig(BaseConfig):
|
|
303
|
+
learning_rate: float = 0.001
|
|
304
|
+
batch_size: int = 32
|
|
305
|
+
|
|
306
|
+
notebook_config(
|
|
307
|
+
"training_analysis",
|
|
308
|
+
config_class=TrainingAnalysisConfig,
|
|
309
|
+
defaults={"datasets": "cifar10_training"},
|
|
310
|
+
learning_rate=0.01, # Override default
|
|
311
|
+
)
|
|
312
|
+
"""
|
|
313
|
+
# Use BaseConfig if no custom class provided
|
|
314
|
+
actual_class = config_class or BaseConfig
|
|
315
|
+
|
|
316
|
+
# Build the hydra defaults list
|
|
317
|
+
hydra_defaults = ["_self_"]
|
|
318
|
+
|
|
319
|
+
# Start with base defaults, then apply overrides
|
|
320
|
+
default_groups = {
|
|
321
|
+
"deriva_ml": "default_deriva",
|
|
322
|
+
"datasets": "default_dataset",
|
|
323
|
+
"assets": "default_asset",
|
|
324
|
+
}
|
|
325
|
+
if defaults:
|
|
326
|
+
default_groups.update(defaults)
|
|
327
|
+
|
|
328
|
+
for group, config_name in default_groups.items():
|
|
329
|
+
hydra_defaults.append({group: config_name})
|
|
330
|
+
|
|
331
|
+
# Create the hydra-zen builds() class
|
|
332
|
+
config_builds = builds(
|
|
333
|
+
actual_class,
|
|
334
|
+
populate_full_signature=True,
|
|
335
|
+
hydra_defaults=hydra_defaults,
|
|
336
|
+
**field_defaults,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Register with hydra-zen store
|
|
340
|
+
store(config_builds, name=name)
|
|
341
|
+
|
|
342
|
+
# Also register in our internal registry for run_notebook()
|
|
343
|
+
_notebook_configs[name] = (config_builds, name)
|
|
344
|
+
|
|
345
|
+
return config_builds
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def load_configs(package_name: str = "configs") -> list[str]:
|
|
349
|
+
"""Dynamically import all configuration modules from a package.
|
|
350
|
+
|
|
351
|
+
This function discovers and imports all Python modules in the specified
|
|
352
|
+
package. Each module is expected to register its configurations with
|
|
353
|
+
the hydra-zen store as a side effect of being imported.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
package_name: Name of the package containing config modules.
|
|
357
|
+
Default is "configs" which works for the standard project layout.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
List of module names that were successfully loaded.
|
|
361
|
+
|
|
362
|
+
Raises:
|
|
363
|
+
ImportError: If a config module fails to import.
|
|
364
|
+
|
|
365
|
+
Example:
|
|
366
|
+
# In your main script or notebook
|
|
367
|
+
from deriva_ml.execution import load_configs
|
|
368
|
+
|
|
369
|
+
load_configs() # Loads from "configs" package
|
|
370
|
+
# or
|
|
371
|
+
load_configs("my_project.configs") # Custom package
|
|
372
|
+
|
|
373
|
+
Note:
|
|
374
|
+
The "experiments" module (if present) is loaded last because it
|
|
375
|
+
typically depends on other configs being registered first.
|
|
376
|
+
"""
|
|
377
|
+
loaded_modules = []
|
|
378
|
+
|
|
379
|
+
try:
|
|
380
|
+
package = importlib.import_module(package_name)
|
|
381
|
+
except ImportError:
|
|
382
|
+
# Package doesn't exist, return empty
|
|
383
|
+
return []
|
|
384
|
+
|
|
385
|
+
package_dir = Path(package.__file__).parent
|
|
386
|
+
|
|
387
|
+
# Collect module names
|
|
388
|
+
modules_to_load = []
|
|
389
|
+
for module_info in pkgutil.iter_modules([str(package_dir)]):
|
|
390
|
+
modules_to_load.append(module_info.name)
|
|
391
|
+
|
|
392
|
+
# Sort modules but ensure 'experiments' is loaded last
|
|
393
|
+
modules_to_load.sort()
|
|
394
|
+
if "experiments" in modules_to_load:
|
|
395
|
+
modules_to_load.remove("experiments")
|
|
396
|
+
modules_to_load.append("experiments")
|
|
397
|
+
|
|
398
|
+
for module_name in modules_to_load:
|
|
399
|
+
importlib.import_module(f"{package_name}.{module_name}")
|
|
400
|
+
loaded_modules.append(module_name)
|
|
401
|
+
|
|
402
|
+
return sorted(loaded_modules)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def run_notebook(
|
|
406
|
+
config_name: str,
|
|
407
|
+
overrides: list[str] | None = None,
|
|
408
|
+
workflow_name: str | None = None,
|
|
409
|
+
workflow_type: str = "Analysis Notebook",
|
|
410
|
+
ml_class: type["DerivaML"] | None = None,
|
|
411
|
+
config_package: str = "configs",
|
|
412
|
+
) -> tuple["DerivaML", "Execution", BaseConfig]:
|
|
413
|
+
"""Initialize a notebook with DerivaML execution context.
|
|
414
|
+
|
|
415
|
+
This is the main entry point for notebooks. It handles all the setup:
|
|
416
|
+
1. Loads all config modules from the config package
|
|
417
|
+
2. Resolves the hydra-zen configuration
|
|
418
|
+
3. Creates the DerivaML connection
|
|
419
|
+
4. Creates a workflow and execution context
|
|
420
|
+
5. Downloads any specified datasets and assets
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
config_name: Name of the notebook configuration (registered via
|
|
424
|
+
notebook_config() or store()).
|
|
425
|
+
overrides: Optional list of Hydra override strings
|
|
426
|
+
(e.g., ["assets=different_assets"]).
|
|
427
|
+
workflow_name: Name for the workflow. Defaults to config_name.
|
|
428
|
+
workflow_type: Type of workflow (default: "Analysis Notebook").
|
|
429
|
+
ml_class: Optional DerivaML subclass to use. If None, uses DerivaML.
|
|
430
|
+
config_package: Package containing config modules (default: "configs").
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
Tuple of (ml_instance, execution, config):
|
|
434
|
+
- ml_instance: Connected DerivaML (or subclass) instance
|
|
435
|
+
- execution: Execution context with downloaded inputs
|
|
436
|
+
- config: Resolved configuration object
|
|
437
|
+
|
|
438
|
+
Example:
|
|
439
|
+
# Simple usage
|
|
440
|
+
from deriva_ml.execution import run_notebook
|
|
441
|
+
|
|
442
|
+
ml, execution, config = run_notebook("roc_analysis")
|
|
443
|
+
|
|
444
|
+
# Access config values
|
|
445
|
+
print(config.assets)
|
|
446
|
+
print(config.deriva_ml.hostname)
|
|
447
|
+
|
|
448
|
+
# Use ml and execution
|
|
449
|
+
for asset_table, paths in execution.asset_paths.items():
|
|
450
|
+
for path in paths:
|
|
451
|
+
print(f"Downloaded: {path.file_name}")
|
|
452
|
+
|
|
453
|
+
# At the end of notebook
|
|
454
|
+
execution.upload_execution_outputs()
|
|
455
|
+
|
|
456
|
+
Example with overrides:
|
|
457
|
+
ml, execution, config = run_notebook(
|
|
458
|
+
"roc_analysis",
|
|
459
|
+
overrides=["assets=roc_quick_probabilities"],
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
Example with custom ML class:
|
|
463
|
+
from eye_ai import EyeAI
|
|
464
|
+
|
|
465
|
+
ml, execution, config = run_notebook(
|
|
466
|
+
"eye_analysis",
|
|
467
|
+
ml_class=EyeAI,
|
|
468
|
+
)
|
|
469
|
+
"""
|
|
470
|
+
# Import here to avoid circular imports
|
|
471
|
+
from deriva_ml import DerivaML
|
|
472
|
+
from deriva_ml.execution import Execution, ExecutionConfiguration
|
|
473
|
+
|
|
474
|
+
# Load all config modules
|
|
475
|
+
load_configs(config_package)
|
|
476
|
+
|
|
477
|
+
# Get the config builds class from our registry or try the store
|
|
478
|
+
if config_name in _notebook_configs:
|
|
479
|
+
config_builds, _ = _notebook_configs[config_name]
|
|
480
|
+
else:
|
|
481
|
+
# Fall back to looking up in hydra store by building a simple config
|
|
482
|
+
# This handles configs registered the old way
|
|
483
|
+
config_builds = DerivaBaseConfig
|
|
484
|
+
|
|
485
|
+
# Resolve the configuration
|
|
486
|
+
config = get_notebook_configuration(
|
|
487
|
+
config_builds,
|
|
488
|
+
config_name=config_name,
|
|
489
|
+
overrides=overrides,
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
# Create DerivaML instance
|
|
493
|
+
actual_ml_class = ml_class or DerivaML
|
|
494
|
+
ml = actual_ml_class(
|
|
495
|
+
hostname=config.deriva_ml.hostname,
|
|
496
|
+
catalog_id=config.deriva_ml.catalog_id,
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
# Create workflow
|
|
500
|
+
actual_workflow_name = workflow_name or config_name.replace("_", " ").title()
|
|
501
|
+
workflow = ml.create_workflow(
|
|
502
|
+
name=actual_workflow_name,
|
|
503
|
+
workflow_type=workflow_type,
|
|
504
|
+
description=config.description or f"Running {config_name}",
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
# Create execution configuration
|
|
508
|
+
exec_config = ExecutionConfiguration(
|
|
509
|
+
workflow=workflow,
|
|
510
|
+
datasets=config.datasets if config.datasets else [],
|
|
511
|
+
assets=config.assets if config.assets else [],
|
|
512
|
+
description=config.description or f"Execution of {config_name}",
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
# Create execution context (downloads inputs)
|
|
516
|
+
execution = Execution(configuration=exec_config, ml_object=ml)
|
|
517
|
+
|
|
518
|
+
return ml, execution, config
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
class DescribedList(list):
|
|
522
|
+
"""A list with an attached description.
|
|
523
|
+
|
|
524
|
+
This class extends list to add a `description` attribute while maintaining
|
|
525
|
+
full list compatibility. This allows configuration values (like asset RIDs
|
|
526
|
+
or dataset specs) to carry documentation without changing how they're used.
|
|
527
|
+
|
|
528
|
+
When stored in hydra-zen and resolved via `instantiate()`, the result is a
|
|
529
|
+
DescribedList that behaves like a regular list but has a `description` attribute.
|
|
530
|
+
|
|
531
|
+
Attributes:
|
|
532
|
+
description: Human-readable description of this configuration.
|
|
533
|
+
|
|
534
|
+
Example:
|
|
535
|
+
>>> from hydra_zen import store
|
|
536
|
+
>>> from deriva_ml.execution import with_description
|
|
537
|
+
>>>
|
|
538
|
+
>>> asset_store = store(group="assets")
|
|
539
|
+
>>> asset_store(
|
|
540
|
+
... with_description(
|
|
541
|
+
... ["3WMG", "3XPA"],
|
|
542
|
+
... "Model weights from quick and extended training",
|
|
543
|
+
... ),
|
|
544
|
+
... name="comparison_weights",
|
|
545
|
+
... )
|
|
546
|
+
>>>
|
|
547
|
+
>>> # After instantiation, usage is identical to a regular list:
|
|
548
|
+
>>> # config.assets[0] # "3WMG"
|
|
549
|
+
>>> # len(config.assets) # 2
|
|
550
|
+
>>> # for rid in config.assets: ...
|
|
551
|
+
>>> # config.assets.description # "Model weights from..."
|
|
552
|
+
"""
|
|
553
|
+
|
|
554
|
+
def __init__(self, items: list | None = None, description: str = ""):
|
|
555
|
+
"""Initialize a DescribedList.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
items: Initial list items. If None, creates empty list.
|
|
559
|
+
description: Human-readable description of this list.
|
|
560
|
+
"""
|
|
561
|
+
super().__init__(items or [])
|
|
562
|
+
self.description = description
|
|
563
|
+
|
|
564
|
+
def __repr__(self) -> str:
|
|
565
|
+
"""Return string representation including description."""
|
|
566
|
+
if self.description:
|
|
567
|
+
return f"DescribedList({list(self)!r}, description={self.description!r})"
|
|
568
|
+
return f"DescribedList({list(self)!r})"
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def _make_described_list(items: list, description: str = "") -> DescribedList:
|
|
572
|
+
"""Factory function for creating DescribedList instances.
|
|
573
|
+
|
|
574
|
+
This is used internally by `with_description` to create a hydra-zen
|
|
575
|
+
compatible config.
|
|
576
|
+
"""
|
|
577
|
+
return DescribedList(items, description)
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
# Pre-built config for DescribedList
|
|
581
|
+
_DescribedListConfig = builds(_make_described_list, populate_full_signature=True)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def with_description(items: list, description: str) -> Any:
|
|
585
|
+
"""Create a hydra-zen config for a list with an attached description.
|
|
586
|
+
|
|
587
|
+
Use this to add descriptions to configuration values like asset RIDs
|
|
588
|
+
or dataset specifications. The result is a hydra-zen config that, when
|
|
589
|
+
instantiated, produces a DescribedList.
|
|
590
|
+
|
|
591
|
+
Args:
|
|
592
|
+
items: List items (e.g., asset RIDs, dataset specs).
|
|
593
|
+
description: Human-readable description of this configuration.
|
|
594
|
+
|
|
595
|
+
Returns:
|
|
596
|
+
A hydra-zen config that instantiates to a DescribedList.
|
|
597
|
+
|
|
598
|
+
Example:
|
|
599
|
+
>>> from hydra_zen import store
|
|
600
|
+
>>> from deriva_ml.execution import with_description
|
|
601
|
+
>>>
|
|
602
|
+
>>> # Assets with description
|
|
603
|
+
>>> asset_store = store(group="assets")
|
|
604
|
+
>>> asset_store(
|
|
605
|
+
... with_description(
|
|
606
|
+
... ["3WMG", "3XPA"],
|
|
607
|
+
... "Model weights from quick and extended training runs",
|
|
608
|
+
... ),
|
|
609
|
+
... name="comparison_weights",
|
|
610
|
+
... )
|
|
611
|
+
>>>
|
|
612
|
+
>>> # Datasets with description
|
|
613
|
+
>>> from deriva_ml.dataset import DatasetSpecConfig
|
|
614
|
+
>>> datasets_store = store(group="datasets")
|
|
615
|
+
>>> datasets_store(
|
|
616
|
+
... with_description(
|
|
617
|
+
... [DatasetSpecConfig(rid="28CT", version="0.21.0")],
|
|
618
|
+
... "Complete CIFAR-10 dataset with 10,000 images",
|
|
619
|
+
... ),
|
|
620
|
+
... name="cifar10_complete",
|
|
621
|
+
... )
|
|
622
|
+
>>>
|
|
623
|
+
>>> # After instantiation:
|
|
624
|
+
>>> # config.assets is a DescribedList
|
|
625
|
+
>>> # config.assets[0] # "3WMG"
|
|
626
|
+
>>> # config.assets.description # "Model weights from..."
|
|
627
|
+
|
|
628
|
+
Note:
|
|
629
|
+
For model configs created with `builds()`, use the `zen_meta` parameter
|
|
630
|
+
instead:
|
|
631
|
+
|
|
632
|
+
>>> model_store(
|
|
633
|
+
... Cifar10CNNConfig,
|
|
634
|
+
... name="cifar10_quick",
|
|
635
|
+
... epochs=3,
|
|
636
|
+
... zen_meta={"description": "Quick training - 3 epochs"},
|
|
637
|
+
... )
|
|
638
|
+
"""
|
|
639
|
+
return _DescribedListConfig(items=items, description=description)
|