deriva-ml 1.17.10__py3-none-any.whl → 1.17.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +43 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +21 -0
  7. deriva_ml/catalog/clone.py +1199 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +817 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +126 -110
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +543 -242
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +223 -34
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -4
  67. deriva_ml-1.17.11.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +1 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.10.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,639 @@
1
+ """Base configuration for DerivaML applications.
2
+
3
+ This module defines the base configuration and helper functions that simplify
4
+ creating hydra-zen configurations for both script execution and notebooks.
5
+
6
+ Simple Usage (notebooks using only BaseConfig fields):
7
+ # In configs/my_notebook.py
8
+ from deriva_ml.execution import notebook_config
9
+
10
+ notebook_config(
11
+ "my_notebook",
12
+ defaults={"assets": "my_assets", "datasets": "my_dataset"},
13
+ )
14
+
15
+ # In notebook
16
+ from deriva_ml.execution import run_notebook
17
+ ml, execution, config = run_notebook("my_notebook")
18
+
19
+ Advanced Usage (notebooks with custom parameters):
20
+ # In configs/my_analysis.py
21
+ from dataclasses import dataclass
22
+ from deriva_ml.execution import BaseConfig, notebook_config
23
+
24
+ @dataclass
25
+ class MyAnalysisConfig(BaseConfig):
26
+ threshold: float = 0.5
27
+ num_samples: int = 100
28
+
29
+ notebook_config(
30
+ "my_analysis",
31
+ config_class=MyAnalysisConfig,
32
+ defaults={"assets": "analysis_assets"},
33
+ )
34
+
35
+ # In notebook
36
+ from deriva_ml.execution import run_notebook
37
+ ml, execution, config = run_notebook("my_analysis")
38
+ print(config.threshold) # 0.5
39
+ """
40
+
41
+ import importlib
42
+ import json
43
+ import os
44
+ import pkgutil
45
+ from dataclasses import dataclass, field
46
+ from pathlib import Path
47
+ from typing import Any, TypeVar, TYPE_CHECKING
48
+
49
+ from hydra_zen import builds, instantiate, launch, make_config, store
50
+
51
+ if TYPE_CHECKING:
52
+ from deriva_ml import DerivaML
53
+ from deriva_ml.execution import Execution
54
+
55
+ T = TypeVar("T")
56
+
57
+
58
+ # Standard hydra defaults for DerivaML applications.
59
+ # Projects can customize these or define their own defaults.
60
+ base_defaults = [
61
+ "_self_",
62
+ {"deriva_ml": "default_deriva"},
63
+ {"datasets": "default_dataset"},
64
+ {"assets": "default_asset"},
65
+ {"workflow": "default_workflow"},
66
+ {"model_config": "default_model"},
67
+ ]
68
+
69
+
70
+ @dataclass
71
+ class BaseConfig:
72
+ """Base configuration for DerivaML applications.
73
+
74
+ This dataclass defines the common configuration structure shared by
75
+ both script execution and notebook modes. Project-specific configs
76
+ should inherit from this class to get the standard DerivaML fields.
77
+
78
+ Note:
79
+ Fields use ``Any`` type annotations because several DerivaML types
80
+ (DerivaMLConfig, DatasetSpec) are Pydantic models which are not
81
+ compatible with OmegaConf structured configs. The actual types at
82
+ runtime are documented below.
83
+
84
+ Attributes:
85
+ deriva_ml: DerivaML connection configuration (DerivaMLConfig at runtime).
86
+ datasets: List of dataset specifications (list[DatasetSpec] at runtime).
87
+ assets: List of asset RIDs to load (list[str] at runtime).
88
+ dry_run: If True, skip catalog writes (for testing/debugging).
89
+ description: Human-readable description of this run.
90
+ config_choices: Dictionary mapping config group names to selected config names.
91
+ This is automatically populated by get_notebook_configuration() with the
92
+ Hydra runtime choices (e.g., {"model_config": "cifar10_quick", "assets": "roc_quick"}).
93
+ Useful for tracking which configurations were used in an execution.
94
+
95
+ Example:
96
+ >>> from dataclasses import dataclass
97
+ >>> from deriva_ml.execution import BaseConfig
98
+ >>>
99
+ >>> @dataclass
100
+ ... class MyConfig(BaseConfig):
101
+ ... learning_rate: float = 0.001
102
+ ... epochs: int = 10
103
+ """
104
+ deriva_ml: Any = None
105
+ datasets: Any = field(default_factory=list)
106
+ assets: Any = field(default_factory=list)
107
+ dry_run: bool = False
108
+ description: str = ""
109
+ config_choices: dict[str, str] = field(default_factory=dict)
110
+
111
+
112
+ # Create and register the base config with hydra-zen store.
113
+ # This provides a ready-to-use base that experiments can inherit from.
114
+ DerivaBaseConfig = builds(
115
+ BaseConfig,
116
+ populate_full_signature=True,
117
+ hydra_defaults=base_defaults,
118
+ )
119
+
120
+ store(DerivaBaseConfig, name="deriva_base")
121
+
122
+
123
+ def get_notebook_configuration(
124
+ config_class: type[T],
125
+ config_name: str,
126
+ overrides: list[str] | None = None,
127
+ job_name: str = "notebook",
128
+ version_base: str = "1.3",
129
+ ) -> T:
130
+ """Load and return a hydra-zen configuration for use in notebooks.
131
+
132
+ This function is the notebook equivalent of `run_model`. While `run_model`
133
+ launches a full execution with model training, `get_notebook_configuration`
134
+ simply resolves the configuration and returns it for interactive use.
135
+
136
+ The function handles:
137
+ - Adding configurations to the hydra store
138
+ - Launching hydra-zen to resolve defaults and overrides
139
+ - Returning the instantiated configuration object
140
+
141
+ Args:
142
+ config_class: The hydra-zen builds() class for the configuration.
143
+ This should be a class created with `builds(YourConfig, ...)`.
144
+ config_name: Name of the configuration in the hydra store.
145
+ Must match the name used when calling `store(config_class, name=...)`.
146
+ overrides: Optional list of Hydra override strings (e.g., ["param=value"]).
147
+ job_name: Name for the Hydra job (default: "notebook").
148
+ version_base: Hydra version base (default: "1.3").
149
+
150
+ Returns:
151
+ The instantiated configuration object with all defaults resolved.
152
+
153
+ Example:
154
+ In your notebook's configuration module (e.g., `configs/roc_analysis.py`):
155
+
156
+ >>> from dataclasses import dataclass, field
157
+ >>> from hydra_zen import builds, store
158
+ >>> from deriva_ml.execution import BaseConfig
159
+ >>>
160
+ >>> @dataclass
161
+ ... class ROCAnalysisConfig(BaseConfig):
162
+ ... execution_rids: list[str] = field(default_factory=list)
163
+ >>>
164
+ >>> ROCAnalysisConfigBuilds = builds(
165
+ ... ROCAnalysisConfig,
166
+ ... populate_full_signature=True,
167
+ ... hydra_defaults=["_self_", {"deriva_ml": "default_deriva"}],
168
+ ... )
169
+ >>> store(ROCAnalysisConfigBuilds, name="roc_analysis")
170
+
171
+ In your notebook:
172
+
173
+ >>> from configs import load_all_configs
174
+ >>> from configs.roc_analysis import ROCAnalysisConfigBuilds
175
+ >>> from deriva_ml.execution import get_notebook_configuration
176
+ >>>
177
+ >>> # Load all project configs into hydra store
178
+ >>> load_all_configs()
179
+ >>>
180
+ >>> # Get resolved configuration
181
+ >>> config = get_notebook_configuration(
182
+ ... ROCAnalysisConfigBuilds,
183
+ ... config_name="roc_analysis",
184
+ ... overrides=["execution_rids=[3JRC,3KT0]"],
185
+ ... )
186
+ >>>
187
+ >>> # Use the configuration
188
+ >>> print(config.execution_rids) # ['3JRC', '3KT0']
189
+ >>> print(config.deriva_ml.hostname) # From default_deriva config
190
+
191
+ Environment Variables:
192
+ DERIVA_ML_HYDRA_OVERRIDES: JSON-encoded list of override strings.
193
+ When running via `deriva-ml-run-notebook`, this is automatically
194
+ set from command-line arguments. Overrides from this environment
195
+ variable are applied first, then any overrides passed directly
196
+ to this function are applied (taking precedence).
197
+ """
198
+ # Ensure configs are in the hydra store
199
+ store.add_to_hydra_store(overwrite_ok=True)
200
+
201
+ # Collect overrides from environment variable (set by run_notebook CLI)
202
+ env_overrides_json = os.environ.get("DERIVA_ML_HYDRA_OVERRIDES")
203
+ env_overrides = json.loads(env_overrides_json) if env_overrides_json else []
204
+
205
+ # Merge overrides: env overrides first, then explicit overrides (higher precedence)
206
+ all_overrides = env_overrides + (overrides or [])
207
+
208
+ # Variable to capture choices from within the task function
209
+ captured_choices: dict[str, str] = {}
210
+
211
+ # Define a task function that instantiates and returns the config
212
+ # The cfg from launch() is an OmegaConf DictConfig, so we need to
213
+ # use hydra_zen.instantiate() to convert it to actual Python objects
214
+ def return_instantiated_config(cfg: Any) -> T:
215
+ nonlocal captured_choices
216
+ # Capture the Hydra runtime choices (which config names were selected)
217
+ # Filter out None values (some Hydra internal groups have None choices)
218
+ try:
219
+ from hydra.core.hydra_config import HydraConfig
220
+ choices = HydraConfig.get().runtime.choices
221
+ captured_choices = {k: v for k, v in choices.items() if v is not None}
222
+ except Exception:
223
+ # If HydraConfig is not available, leave choices empty
224
+ pass
225
+ return instantiate(cfg)
226
+
227
+ # Launch hydra-zen to resolve the configuration
228
+ result = launch(
229
+ config_class,
230
+ return_instantiated_config,
231
+ version_base=version_base,
232
+ config_name=config_name,
233
+ job_name=job_name,
234
+ overrides=all_overrides,
235
+ )
236
+
237
+ # Inject the captured choices into the config object
238
+ config = result.return_value
239
+ if hasattr(config, "config_choices"):
240
+ config.config_choices = captured_choices
241
+
242
+ return config
243
+
244
+
245
+ # ---------------------------------------------------------------------------
246
+ # Registry for notebook configurations
247
+ # ---------------------------------------------------------------------------
248
+ # Maps config_name -> (config_builds_class, config_name)
249
+ _notebook_configs: dict[str, tuple[Any, str]] = {}
250
+
251
+
252
+ def notebook_config(
253
+ name: str,
254
+ config_class: type[BaseConfig] | None = None,
255
+ defaults: dict[str, str] | None = None,
256
+ **field_defaults: Any,
257
+ ) -> Any:
258
+ """Register a notebook configuration with simplified syntax.
259
+
260
+ This is the recommended way to create notebook configurations. It handles
261
+ all the hydra-zen boilerplate (builds, store, defaults) automatically.
262
+
263
+ For simple notebooks that only use BaseConfig fields (deriva_ml, datasets,
264
+ assets, etc.), just specify which defaults to use. For notebooks with
265
+ custom parameters, provide a config_class that inherits from BaseConfig.
266
+
267
+ Args:
268
+ name: Configuration name. Used both as the hydra config name and
269
+ to look up the config in run_notebook().
270
+ config_class: Optional dataclass inheriting from BaseConfig. If None,
271
+ uses BaseConfig directly (suitable for notebooks that only need
272
+ the standard fields).
273
+ defaults: Dict mapping config group names to config names. These
274
+ override the base defaults. Common groups:
275
+ - "deriva_ml": Connection config (e.g., "default_deriva", "eye_ai")
276
+ - "datasets": Dataset config (e.g., "cifar10_training")
277
+ - "assets": Asset config (e.g., "model_weights")
278
+ - "workflow": Workflow config (e.g., "default_workflow")
279
+ **field_defaults: Default values for fields in config_class.
280
+
281
+ Returns:
282
+ The hydra-zen builds() class, in case you need to reference it directly.
283
+
284
+ Examples:
285
+ Simple notebook using only standard fields:
286
+
287
+ # configs/roc_analysis.py
288
+ from deriva_ml.execution import notebook_config
289
+
290
+ notebook_config(
291
+ "roc_analysis",
292
+ defaults={"assets": "roc_comparison_probabilities"},
293
+ )
294
+
295
+ Notebook with custom parameters:
296
+
297
+ # configs/training_analysis.py
298
+ from dataclasses import dataclass
299
+ from deriva_ml.execution import BaseConfig, notebook_config
300
+
301
+ @dataclass
302
+ class TrainingAnalysisConfig(BaseConfig):
303
+ learning_rate: float = 0.001
304
+ batch_size: int = 32
305
+
306
+ notebook_config(
307
+ "training_analysis",
308
+ config_class=TrainingAnalysisConfig,
309
+ defaults={"datasets": "cifar10_training"},
310
+ learning_rate=0.01, # Override default
311
+ )
312
+ """
313
+ # Use BaseConfig if no custom class provided
314
+ actual_class = config_class or BaseConfig
315
+
316
+ # Build the hydra defaults list
317
+ hydra_defaults = ["_self_"]
318
+
319
+ # Start with base defaults, then apply overrides
320
+ default_groups = {
321
+ "deriva_ml": "default_deriva",
322
+ "datasets": "default_dataset",
323
+ "assets": "default_asset",
324
+ }
325
+ if defaults:
326
+ default_groups.update(defaults)
327
+
328
+ for group, config_name in default_groups.items():
329
+ hydra_defaults.append({group: config_name})
330
+
331
+ # Create the hydra-zen builds() class
332
+ config_builds = builds(
333
+ actual_class,
334
+ populate_full_signature=True,
335
+ hydra_defaults=hydra_defaults,
336
+ **field_defaults,
337
+ )
338
+
339
+ # Register with hydra-zen store
340
+ store(config_builds, name=name)
341
+
342
+ # Also register in our internal registry for run_notebook()
343
+ _notebook_configs[name] = (config_builds, name)
344
+
345
+ return config_builds
346
+
347
+
348
+ def load_configs(package_name: str = "configs") -> list[str]:
349
+ """Dynamically import all configuration modules from a package.
350
+
351
+ This function discovers and imports all Python modules in the specified
352
+ package. Each module is expected to register its configurations with
353
+ the hydra-zen store as a side effect of being imported.
354
+
355
+ Args:
356
+ package_name: Name of the package containing config modules.
357
+ Default is "configs" which works for the standard project layout.
358
+
359
+ Returns:
360
+ List of module names that were successfully loaded.
361
+
362
+ Raises:
363
+ ImportError: If a config module fails to import.
364
+
365
+ Example:
366
+ # In your main script or notebook
367
+ from deriva_ml.execution import load_configs
368
+
369
+ load_configs() # Loads from "configs" package
370
+ # or
371
+ load_configs("my_project.configs") # Custom package
372
+
373
+ Note:
374
+ The "experiments" module (if present) is loaded last because it
375
+ typically depends on other configs being registered first.
376
+ """
377
+ loaded_modules = []
378
+
379
+ try:
380
+ package = importlib.import_module(package_name)
381
+ except ImportError:
382
+ # Package doesn't exist, return empty
383
+ return []
384
+
385
+ package_dir = Path(package.__file__).parent
386
+
387
+ # Collect module names
388
+ modules_to_load = []
389
+ for module_info in pkgutil.iter_modules([str(package_dir)]):
390
+ modules_to_load.append(module_info.name)
391
+
392
+ # Sort modules but ensure 'experiments' is loaded last
393
+ modules_to_load.sort()
394
+ if "experiments" in modules_to_load:
395
+ modules_to_load.remove("experiments")
396
+ modules_to_load.append("experiments")
397
+
398
+ for module_name in modules_to_load:
399
+ importlib.import_module(f"{package_name}.{module_name}")
400
+ loaded_modules.append(module_name)
401
+
402
+ return sorted(loaded_modules)
403
+
404
+
405
+ def run_notebook(
406
+ config_name: str,
407
+ overrides: list[str] | None = None,
408
+ workflow_name: str | None = None,
409
+ workflow_type: str = "Analysis Notebook",
410
+ ml_class: type["DerivaML"] | None = None,
411
+ config_package: str = "configs",
412
+ ) -> tuple["DerivaML", "Execution", BaseConfig]:
413
+ """Initialize a notebook with DerivaML execution context.
414
+
415
+ This is the main entry point for notebooks. It handles all the setup:
416
+ 1. Loads all config modules from the config package
417
+ 2. Resolves the hydra-zen configuration
418
+ 3. Creates the DerivaML connection
419
+ 4. Creates a workflow and execution context
420
+ 5. Downloads any specified datasets and assets
421
+
422
+ Args:
423
+ config_name: Name of the notebook configuration (registered via
424
+ notebook_config() or store()).
425
+ overrides: Optional list of Hydra override strings
426
+ (e.g., ["assets=different_assets"]).
427
+ workflow_name: Name for the workflow. Defaults to config_name.
428
+ workflow_type: Type of workflow (default: "Analysis Notebook").
429
+ ml_class: Optional DerivaML subclass to use. If None, uses DerivaML.
430
+ config_package: Package containing config modules (default: "configs").
431
+
432
+ Returns:
433
+ Tuple of (ml_instance, execution, config):
434
+ - ml_instance: Connected DerivaML (or subclass) instance
435
+ - execution: Execution context with downloaded inputs
436
+ - config: Resolved configuration object
437
+
438
+ Example:
439
+ # Simple usage
440
+ from deriva_ml.execution import run_notebook
441
+
442
+ ml, execution, config = run_notebook("roc_analysis")
443
+
444
+ # Access config values
445
+ print(config.assets)
446
+ print(config.deriva_ml.hostname)
447
+
448
+ # Use ml and execution
449
+ for asset_table, paths in execution.asset_paths.items():
450
+ for path in paths:
451
+ print(f"Downloaded: {path.file_name}")
452
+
453
+ # At the end of notebook
454
+ execution.upload_execution_outputs()
455
+
456
+ Example with overrides:
457
+ ml, execution, config = run_notebook(
458
+ "roc_analysis",
459
+ overrides=["assets=roc_quick_probabilities"],
460
+ )
461
+
462
+ Example with custom ML class:
463
+ from eye_ai import EyeAI
464
+
465
+ ml, execution, config = run_notebook(
466
+ "eye_analysis",
467
+ ml_class=EyeAI,
468
+ )
469
+ """
470
+ # Import here to avoid circular imports
471
+ from deriva_ml import DerivaML
472
+ from deriva_ml.execution import Execution, ExecutionConfiguration
473
+
474
+ # Load all config modules
475
+ load_configs(config_package)
476
+
477
+ # Get the config builds class from our registry or try the store
478
+ if config_name in _notebook_configs:
479
+ config_builds, _ = _notebook_configs[config_name]
480
+ else:
481
+ # Fall back to looking up in hydra store by building a simple config
482
+ # This handles configs registered the old way
483
+ config_builds = DerivaBaseConfig
484
+
485
+ # Resolve the configuration
486
+ config = get_notebook_configuration(
487
+ config_builds,
488
+ config_name=config_name,
489
+ overrides=overrides,
490
+ )
491
+
492
+ # Create DerivaML instance
493
+ actual_ml_class = ml_class or DerivaML
494
+ ml = actual_ml_class(
495
+ hostname=config.deriva_ml.hostname,
496
+ catalog_id=config.deriva_ml.catalog_id,
497
+ )
498
+
499
+ # Create workflow
500
+ actual_workflow_name = workflow_name or config_name.replace("_", " ").title()
501
+ workflow = ml.create_workflow(
502
+ name=actual_workflow_name,
503
+ workflow_type=workflow_type,
504
+ description=config.description or f"Running {config_name}",
505
+ )
506
+
507
+ # Create execution configuration
508
+ exec_config = ExecutionConfiguration(
509
+ workflow=workflow,
510
+ datasets=config.datasets if config.datasets else [],
511
+ assets=config.assets if config.assets else [],
512
+ description=config.description or f"Execution of {config_name}",
513
+ )
514
+
515
+ # Create execution context (downloads inputs)
516
+ execution = Execution(configuration=exec_config, ml_object=ml)
517
+
518
+ return ml, execution, config
519
+
520
+
521
+ class DescribedList(list):
522
+ """A list with an attached description.
523
+
524
+ This class extends list to add a `description` attribute while maintaining
525
+ full list compatibility. This allows configuration values (like asset RIDs
526
+ or dataset specs) to carry documentation without changing how they're used.
527
+
528
+ When stored in hydra-zen and resolved via `instantiate()`, the result is a
529
+ DescribedList that behaves like a regular list but has a `description` attribute.
530
+
531
+ Attributes:
532
+ description: Human-readable description of this configuration.
533
+
534
+ Example:
535
+ >>> from hydra_zen import store
536
+ >>> from deriva_ml.execution import with_description
537
+ >>>
538
+ >>> asset_store = store(group="assets")
539
+ >>> asset_store(
540
+ ... with_description(
541
+ ... ["3WMG", "3XPA"],
542
+ ... "Model weights from quick and extended training",
543
+ ... ),
544
+ ... name="comparison_weights",
545
+ ... )
546
+ >>>
547
+ >>> # After instantiation, usage is identical to a regular list:
548
+ >>> # config.assets[0] # "3WMG"
549
+ >>> # len(config.assets) # 2
550
+ >>> # for rid in config.assets: ...
551
+ >>> # config.assets.description # "Model weights from..."
552
+ """
553
+
554
+ def __init__(self, items: list | None = None, description: str = ""):
555
+ """Initialize a DescribedList.
556
+
557
+ Args:
558
+ items: Initial list items. If None, creates empty list.
559
+ description: Human-readable description of this list.
560
+ """
561
+ super().__init__(items or [])
562
+ self.description = description
563
+
564
+ def __repr__(self) -> str:
565
+ """Return string representation including description."""
566
+ if self.description:
567
+ return f"DescribedList({list(self)!r}, description={self.description!r})"
568
+ return f"DescribedList({list(self)!r})"
569
+
570
+
571
+ def _make_described_list(items: list, description: str = "") -> DescribedList:
572
+ """Factory function for creating DescribedList instances.
573
+
574
+ This is used internally by `with_description` to create a hydra-zen
575
+ compatible config.
576
+ """
577
+ return DescribedList(items, description)
578
+
579
+
580
+ # Pre-built config for DescribedList
581
+ _DescribedListConfig = builds(_make_described_list, populate_full_signature=True)
582
+
583
+
584
+ def with_description(items: list, description: str) -> Any:
585
+ """Create a hydra-zen config for a list with an attached description.
586
+
587
+ Use this to add descriptions to configuration values like asset RIDs
588
+ or dataset specifications. The result is a hydra-zen config that, when
589
+ instantiated, produces a DescribedList.
590
+
591
+ Args:
592
+ items: List items (e.g., asset RIDs, dataset specs).
593
+ description: Human-readable description of this configuration.
594
+
595
+ Returns:
596
+ A hydra-zen config that instantiates to a DescribedList.
597
+
598
+ Example:
599
+ >>> from hydra_zen import store
600
+ >>> from deriva_ml.execution import with_description
601
+ >>>
602
+ >>> # Assets with description
603
+ >>> asset_store = store(group="assets")
604
+ >>> asset_store(
605
+ ... with_description(
606
+ ... ["3WMG", "3XPA"],
607
+ ... "Model weights from quick and extended training runs",
608
+ ... ),
609
+ ... name="comparison_weights",
610
+ ... )
611
+ >>>
612
+ >>> # Datasets with description
613
+ >>> from deriva_ml.dataset import DatasetSpecConfig
614
+ >>> datasets_store = store(group="datasets")
615
+ >>> datasets_store(
616
+ ... with_description(
617
+ ... [DatasetSpecConfig(rid="28CT", version="0.21.0")],
618
+ ... "Complete CIFAR-10 dataset with 10,000 images",
619
+ ... ),
620
+ ... name="cifar10_complete",
621
+ ... )
622
+ >>>
623
+ >>> # After instantiation:
624
+ >>> # config.assets is a DescribedList
625
+ >>> # config.assets[0] # "3WMG"
626
+ >>> # config.assets.description # "Model weights from..."
627
+
628
+ Note:
629
+ For model configs created with `builds()`, use the `zen_meta` parameter
630
+ instead:
631
+
632
+ >>> model_store(
633
+ ... Cifar10CNNConfig,
634
+ ... name="cifar10_quick",
635
+ ... epochs=3,
636
+ ... zen_meta={"description": "Quick training - 3 epochs"},
637
+ ... )
638
+ """
639
+ return _DescribedListConfig(items=items, description=description)