deriva-ml 1.17.10__py3-none-any.whl → 1.17.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +69 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +31 -0
  7. deriva_ml/catalog/clone.py +1939 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +845 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +126 -110
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +543 -242
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +223 -34
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/METADATA +4 -4
  67. deriva_ml-1.17.12.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/entry_points.txt +1 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.10.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/top_level.txt +0 -0
deriva_ml/core/config.py CHANGED
@@ -1,9 +1,62 @@
1
+ """Configuration management for DerivaML.
2
+
3
+ This module provides the DerivaMLConfig class for managing DerivaML instance
4
+ configuration. It integrates with hydra-zen for configuration management and supports
5
+ both programmatic and structured configuration.
6
+
7
+ The configuration handles:
8
+ - Server connection settings (hostname, catalog_id, credentials)
9
+ - Schema configuration (domain_schema, ml_schema)
10
+ - Directory paths (working_dir, cache_dir)
11
+ - Logging levels for both DerivaML and underlying Deriva libraries
12
+ - Feature toggles (use_minid, check_auth)
13
+
14
+ Integration with hydra-zen:
15
+ The module registers a custom resolver for computing working directories
16
+ and configures Hydra's output directory structure for reproducible runs.
17
+ Use hydra-zen's `builds()` and `store` to create composable configurations.
18
+
19
+ Example:
20
+ Programmatic configuration:
21
+ >>> config = DerivaMLConfig(
22
+ ... hostname='deriva.example.org',
23
+ ... catalog_id='my_catalog',
24
+ ... working_dir='/path/to/work'
25
+ ... )
26
+ >>> ml = DerivaML.instantiate(config)
27
+
28
+ With hydra-zen:
29
+ >>> from hydra_zen import builds, instantiate, store, zen
30
+ >>> from deriva_ml import DerivaML
31
+ >>> from deriva_ml.core.config import DerivaMLConfig
32
+ >>>
33
+ >>> # Create a structured config for DerivaML
34
+ >>> DerivaMLConf = builds(DerivaMLConfig, populate_full_signature=True)
35
+ >>>
36
+ >>> # Store configurations for different environments
37
+ >>> store(DerivaMLConf(
38
+ ... hostname='dev.example.org',
39
+ ... catalog_id='1',
40
+ ... ), name='dev')
41
+ >>>
42
+ >>> store(DerivaMLConf(
43
+ ... hostname='prod.example.org',
44
+ ... catalog_id='52',
45
+ ... ), name='prod')
46
+ >>>
47
+ >>> # Use with Hydra's @hydra.main or zen() wrapper
48
+ >>> @zen(DerivaMLConf)
49
+ ... def my_task(cfg: DerivaMLConfig):
50
+ ... ml = DerivaML.instantiate(cfg)
51
+ ... # ... do work with ml instance
52
+ """
53
+
1
54
  import getpass
2
55
  import logging
3
56
  from pathlib import Path
4
57
  from typing import Any
5
58
 
6
- from hydra.conf import HydraConf, RunDir
59
+ from hydra.conf import HydraConf, RunDir, SweepDir
7
60
  from hydra.core.hydra_config import HydraConfig
8
61
  from hydra_zen import store
9
62
  from omegaconf import OmegaConf
@@ -13,9 +66,52 @@ from deriva_ml.core.definitions import ML_SCHEMA
13
66
 
14
67
 
15
68
  class DerivaMLConfig(BaseModel):
69
+ """Configuration model for DerivaML instances.
70
+
71
+ This Pydantic model defines all configurable parameters for a DerivaML instance.
72
+ It can be used directly or via Hydra configuration files.
73
+
74
+ Attributes:
75
+ hostname: Hostname of the Deriva server (e.g., 'deriva.example.org').
76
+ catalog_id: Catalog identifier, either numeric ID or catalog name.
77
+ domain_schemas: Optional set of domain schema names. If None, auto-detects all
78
+ non-system schemas. Use this when working with catalogs that have multiple
79
+ user-defined schemas.
80
+ default_schema: The default schema for table creation operations. If None and
81
+ there is exactly one domain schema, that schema is used. If there are multiple
82
+ domain schemas, this must be specified for table creation to work without
83
+ explicit schema parameters.
84
+ project_name: Project name for organizing outputs. Defaults to default_schema.
85
+ cache_dir: Directory for caching downloaded datasets. Defaults to working_dir/cache.
86
+ working_dir: Base directory for computation data. Defaults to ~/deriva-ml.
87
+ hydra_runtime_output_dir: Hydra's runtime output directory (set automatically).
88
+ ml_schema: Schema name for ML tables. Defaults to 'deriva-ml'.
89
+ logging_level: Logging level for DerivaML. Defaults to WARNING.
90
+ deriva_logging_level: Logging level for Deriva libraries. Defaults to WARNING.
91
+ credential: Authentication credentials. If None, retrieved automatically.
92
+ s3_bucket: S3 bucket URL for dataset bag storage (e.g., 's3://my-bucket').
93
+ If provided, enables MINID creation and S3 upload for dataset exports.
94
+ If None, MINID functionality is disabled regardless of use_minid setting.
95
+ use_minid: Whether to use MINID service for dataset bags. Only effective when
96
+ s3_bucket is configured. Defaults to True when s3_bucket is set, False otherwise.
97
+ check_auth: Whether to verify authentication on connection. Defaults to True.
98
+ clean_execution_dir: Whether to automatically clean execution working directories
99
+ after successful upload. Defaults to True. Set to False to retain local copies
100
+ of execution outputs for debugging or manual inspection.
101
+
102
+ Example:
103
+ >>> config = DerivaMLConfig(
104
+ ... hostname='deriva.example.org',
105
+ ... catalog_id=1,
106
+ ... default_schema='my_domain',
107
+ ... logging_level=logging.INFO
108
+ ... )
109
+ """
110
+
16
111
  hostname: str
17
112
  catalog_id: str | int = 1
18
- domain_schema: str | None = None
113
+ domain_schemas: set[str] | None = None
114
+ default_schema: str | None = None
19
115
  project_name: str | None = None
20
116
  cache_dir: str | Path | None = None
21
117
  working_dir: str | Path | None = None
@@ -24,46 +120,98 @@ class DerivaMLConfig(BaseModel):
24
120
  logging_level: Any = logging.WARNING
25
121
  deriva_logging_level: Any = logging.WARNING
26
122
  credential: Any = None
27
- use_minid: bool = True
123
+ s3_bucket: str | None = None
124
+ use_minid: bool | None = None # None means "auto" - True if s3_bucket is set
28
125
  check_auth: bool = True
126
+ clean_execution_dir: bool = True
29
127
 
30
128
  @model_validator(mode="after")
31
- def init_working_dir(self):
32
- """
33
- Sets up the working directory for the model.
129
+ def init_working_dir(self) -> "DerivaMLConfig":
130
+ """Initialize working directory and resolve use_minid after model validation.
131
+
132
+ Sets up the working directory path, computing a default if not specified.
133
+ Also captures Hydra's runtime output directory for logging and outputs.
34
134
 
35
- This method configures the working directory, ensuring that all required
36
- file operations are performed in the appropriate location. If the user does not
37
- specify a directory, a default directory based on the user's home directory
38
- or username will be used.
135
+ Resolves the use_minid flag based on s3_bucket configuration:
136
+ - If use_minid is explicitly set, use that value (but it only takes effect if s3_bucket is set)
137
+ - If use_minid is None (auto), set it to True if s3_bucket is configured, False otherwise
39
138
 
40
- This is a repeat of what is in the DerivaML.__init__ bu we put this here so that the working
41
- directory is available to hydra.
139
+ This validator runs after all field validation and ensures the working
140
+ directory is available for Hydra configuration resolution.
42
141
 
43
142
  Returns:
44
- Self: The object instance with the working directory initialized.
143
+ Self: The configuration instance with initialized paths.
45
144
  """
46
-
47
- self.working_dir = DerivaMLConfig.compute_workdir(self.working_dir)
145
+ self.working_dir = DerivaMLConfig.compute_workdir(self.working_dir, self.catalog_id)
48
146
  self.hydra_runtime_output_dir = Path(HydraConfig.get().runtime.output_dir)
147
+
148
+ # Resolve use_minid based on s3_bucket configuration
149
+ if self.use_minid is None:
150
+ # Auto mode: enable MINID if s3_bucket is configured
151
+ self.use_minid = self.s3_bucket is not None
152
+ elif self.use_minid and self.s3_bucket is None:
153
+ # User requested MINID but no S3 bucket configured - disable MINID
154
+ self.use_minid = False
155
+
49
156
  return self
50
157
 
51
158
  @staticmethod
52
- def compute_workdir(working_dir) -> Path:
53
- # Create a default working directory if none is provided. If a working directory is provided, we add the
54
- # user name to it to ensure that multiple users do not overwrite each other's work.'
55
- working_dir = (Path(working_dir) / getpass.getuser() if working_dir else Path.home()) / "deriva-ml"
56
- return working_dir.absolute()
159
+ def compute_workdir(working_dir: str | Path | None, catalog_id: str | int | None = None) -> Path:
160
+ """Compute the effective working directory path.
57
161
 
162
+ Creates a standardized working directory path. If a base directory is provided,
163
+ appends the current username to prevent conflicts between users. If no directory
164
+ is provided, uses ~/.deriva-ml. The catalog_id is appended to
165
+ separate data from different catalogs.
166
+
167
+ Args:
168
+ working_dir: Base working directory path, or None for default.
169
+ catalog_id: Catalog identifier to include in the path. If None, no
170
+ catalog subdirectory is created.
171
+
172
+ Returns:
173
+ Path: Absolute path to the working directory.
58
174
 
175
+ Example:
176
+ >>> DerivaMLConfig.compute_workdir('/shared/data', '52')
177
+ PosixPath('/shared/data/username/deriva-ml/52')
178
+ >>> DerivaMLConfig.compute_workdir(None, 1)
179
+ PosixPath('/home/username/.deriva-ml/1')
180
+ """
181
+ # Append username and deriva-ml to provided path, or use ~/.deriva-ml as base
182
+ if working_dir:
183
+ base_dir = Path(working_dir) / getpass.getuser() / "deriva-ml"
184
+ else:
185
+ base_dir = Path.home() / ".deriva-ml"
186
+ # Append catalog_id if provided
187
+ if catalog_id is not None:
188
+ base_dir = base_dir / str(catalog_id)
189
+ return base_dir.absolute()
190
+
191
+
192
+ # =============================================================================
193
+ # Hydra Integration
194
+ # =============================================================================
195
+
196
+ # Register custom resolver for computing working directories in Hydra configs
197
+ # This allows ${compute_workdir:${working_dir},${catalog_id}} syntax in YAML configuration files
59
198
  OmegaConf.register_new_resolver("compute_workdir", DerivaMLConfig.compute_workdir, replace=True)
199
+
200
+ # Configure Hydra's output directory structure for reproducible runs
201
+ # Outputs are organized by timestamp under the computed working directory
202
+ # For multirun/sweep, outputs go to a sweep subdirectory with job number subfolders
60
203
  store(
61
204
  HydraConf(
62
- run=RunDir("${compute_workdir:${deriva_ml.working_dir}}/hydra/${now:%Y-%m-%d_%H-%M-%S}"),
205
+ run=RunDir("${compute_workdir:${deriva_ml.working_dir},${deriva_ml.catalog_id}}/hydra/${now:%Y-%m-%d_%H-%M-%S}"),
206
+ sweep=SweepDir(
207
+ dir="${compute_workdir:${deriva_ml.working_dir},${deriva_ml.catalog_id}}/hydra-sweep/${now:%Y-%m-%d_%H-%M-%S}",
208
+ subdir="${hydra.job.num}",
209
+ ),
63
210
  output_subdir="hydra-config",
64
211
  ),
65
212
  group="hydra",
66
213
  name="config",
67
214
  )
68
215
 
216
+ # Add the configuration to Hydra's store for discovery
69
217
  store.add_to_hydra_store()
@@ -1,36 +1,137 @@
1
- """
2
- Constants used throughout the DerivaML package.
1
+ """Constants used throughout the DerivaML package.
2
+
3
+ This module defines fundamental constants, type aliases, and regular expressions
4
+ used for validating and working with Deriva catalog structures.
5
+
6
+ Constants:
7
+ ML_SCHEMA: Default schema name for ML-related tables ('deriva-ml').
8
+ DRY_RUN_RID: Special RID used for dry-run operations without database changes.
9
+
10
+ Type Aliases:
11
+ RID: Annotated string type for Resource Identifiers with validation.
12
+
13
+ Regular Expressions:
14
+ rid_part: Pattern for matching the RID portion of an identifier.
15
+ snapshot_part: Pattern for matching optional snapshot timestamps.
16
+ rid_regex: Complete pattern for validating RID strings.
17
+
18
+ Column Sets:
19
+ DerivaSystemColumns: Standard Deriva system columns present in all tables.
20
+ DerivaAssetColumns: Columns specific to asset tables (files, etc.).
21
+
22
+ Example:
23
+ >>> from deriva_ml.core.constants import RID, ML_SCHEMA
24
+ >>> def process_entity(rid: RID) -> None:
25
+ ... # RID is validated by Pydantic
26
+ ... pass
3
27
  """
4
28
 
5
29
  from __future__ import annotations
6
30
 
7
- from typing import NewType, TypeAlias
31
+ from typing import Annotated
8
32
 
9
- from pydantic import constr
33
+ from pydantic import StringConstraints
10
34
 
11
- # Schema name
35
+ # =============================================================================
36
+ # Schema Constants
37
+ # =============================================================================
38
+
39
+ # Default schema name for ML-related tables in the catalog
12
40
  ML_SCHEMA = "deriva-ml"
13
41
 
14
- # Special RID for dry runs
42
+ # Special RID value used for dry-run operations that don't modify the database
15
43
  DRY_RUN_RID = "0000"
16
44
 
17
- # Regular expression parts for RIDs
45
+ # System schemas that are part of Deriva infrastructure (not user domain schemas)
46
+ # These are excluded when auto-detecting domain schemas
47
+ SYSTEM_SCHEMAS: frozenset[str] = frozenset({"public", "www", "WWW"})
48
+
49
+
50
+ def is_system_schema(schema_name: str, ml_schema: str = ML_SCHEMA) -> bool:
51
+ """Check if a schema is a system or ML schema (not a domain schema).
52
+
53
+ System schemas are Deriva infrastructure schemas (public, www, WWW) and the
54
+ ML schema (deriva-ml by default). Domain schemas are user-defined schemas
55
+ containing business logic tables.
56
+
57
+ Args:
58
+ schema_name: Name of the schema to check.
59
+ ml_schema: Name of the ML schema (default: 'deriva-ml').
60
+
61
+ Returns:
62
+ True if the schema is a system or ML schema, False if it's a domain schema.
63
+
64
+ Example:
65
+ >>> is_system_schema("public")
66
+ True
67
+ >>> is_system_schema("deriva-ml")
68
+ True
69
+ >>> is_system_schema("my_project")
70
+ False
71
+ """
72
+ return schema_name.lower() in {s.lower() for s in SYSTEM_SCHEMAS} or schema_name == ml_schema
73
+
74
+
75
+ def get_domain_schemas(all_schemas: set[str] | list[str], ml_schema: str = ML_SCHEMA) -> frozenset[str]:
76
+ """Return all domain schemas from a collection of schema names.
77
+
78
+ Filters out system schemas (public, www, WWW) and the ML schema to return
79
+ only user-defined domain schemas.
80
+
81
+ Args:
82
+ all_schemas: Collection of schema names to filter.
83
+ ml_schema: Name of the ML schema to exclude (default: 'deriva-ml').
84
+
85
+ Returns:
86
+ Frozen set of domain schema names.
87
+
88
+ Example:
89
+ >>> get_domain_schemas(["public", "deriva-ml", "my_project", "www"])
90
+ frozenset({'my_project'})
91
+ """
92
+ return frozenset(s for s in all_schemas if not is_system_schema(s, ml_schema))
93
+
94
+ # =============================================================================
95
+ # RID Regular Expression Components
96
+ # =============================================================================
97
+
98
+ # Pattern for the RID portion: 1-4 alphanumeric chars, optionally followed by
99
+ # hyphen-separated groups of exactly 4 alphanumeric chars (e.g., "1ABC" or "1ABC-DEF2-3GHI")
18
100
  rid_part = r"(?P<rid>(?:[A-Z\d]{1,4}|[A-Z\d]{1,4}(?:-[A-Z\d]{4})+))"
101
+
102
+ # Pattern for optional snapshot timestamp suffix (e.g., "@2024-01-01T12:00:00")
103
+ # Uses the same format as RID for the snapshot identifier
19
104
  snapshot_part = r"(?:@(?P<snapshot>(?:[A-Z\d]{1,4}|[A-Z\d]{1,4}(?:-[A-Z\d]{4})+)))?"
105
+
106
+ # Complete regex for validating RID strings with optional snapshot
20
107
  rid_regex = f"^{rid_part}{snapshot_part}$"
21
108
 
22
- # RID type definition
23
- BaseRIDString = constr(pattern=rid_regex)
24
- # RID = TypeVar("RID", bound=BaseRIDString)
25
- RIDType: TypeAlias = constr(pattern=rid_regex)
26
- RID = NewType("RID", BaseRIDString)
109
+ # =============================================================================
110
+ # Type Aliases
111
+ # =============================================================================
27
112
 
28
- # System columns in Deriva
113
+ # RID type with Pydantic validation - ensures strings match the RID format
114
+ # Used throughout the codebase for type hints and runtime validation
115
+ RID = Annotated[str, StringConstraints(pattern=rid_regex)]
116
+
117
+ # =============================================================================
118
+ # Column Definitions
119
+ # =============================================================================
120
+
121
+ # Standard Deriva system columns present in every table:
122
+ # - RID: Resource Identifier (unique key)
123
+ # - RCT: Record Creation Time
124
+ # - RMT: Record Modification Time
125
+ # - RCB: Record Created By (user ID)
126
+ # - RMB: Record Modified By (user ID)
29
127
  DerivaSystemColumns = ["RID", "RCT", "RMT", "RCB", "RMB"]
128
+
129
+ # Columns specific to asset tables (files, images, etc.)
130
+ # Includes system columns plus asset-specific metadata
30
131
  DerivaAssetColumns = {
31
- "Filename",
32
- "URL",
33
- "Length",
34
- "MD5",
35
- "Description",
36
- }.union(set(DerivaSystemColumns))
132
+ "Filename", # Original filename
133
+ "URL", # Hatrac storage URL
134
+ "Length", # File size in bytes
135
+ "MD5", # MD5 checksum for integrity verification
136
+ "Description", # Optional description of the asset
137
+ }.union(set(DerivaSystemColumns))
@@ -1,23 +1,52 @@
1
- """
2
- Shared definitions that are used in different DerivaML modules.
3
- This module re-exports all symbols from the core submodules for backwards compatibility.
1
+ """Shared definitions for DerivaML modules.
2
+
3
+ This module serves as the central location for type definitions, constants, enums,
4
+ and data models used throughout DerivaML. It re-exports symbols from specialized
5
+ submodules for convenience and backwards compatibility.
6
+
7
+ The module consolidates:
8
+ - Constants: Schema names, RID patterns, column definitions
9
+ - Enums: Status codes, upload states, built-in types, vocabulary identifiers
10
+ - Models: Dataclass-based models for ERMrest structures (tables, columns, keys)
11
+ - Utilities: FileSpec for file metadata handling
12
+
13
+ Core definition classes (ColumnDef, KeyDef, ForeignKeyDef, TableDef) are provided by
14
+ `deriva.core.typed` and re-exported here. Legacy aliases (ColumnDefinition, etc.)
15
+ are maintained for backwards compatibility.
16
+
17
+ This is the recommended import location for most DerivaML type definitions:
18
+ >>> from deriva_ml.core.definitions import RID, MLVocab, TableDef
19
+
20
+ For more specialized imports, you can import directly from submodules:
21
+ >>> from deriva_ml.core.constants import ML_SCHEMA
22
+ >>> from deriva_ml.core.enums import Status
23
+ >>> from deriva.core.typed import ColumnDef
4
24
  """
5
25
 
6
26
  from __future__ import annotations
7
27
 
8
- # Re-export constants
28
+ # =============================================================================
29
+ # Re-exported Constants
30
+ # =============================================================================
31
+ # From constants.py: Schema names, RID patterns, and column definitions
9
32
  from deriva_ml.core.constants import (
10
33
  DRY_RUN_RID,
11
34
  ML_SCHEMA,
12
35
  RID,
36
+ SYSTEM_SCHEMAS,
13
37
  DerivaAssetColumns,
14
38
  DerivaSystemColumns,
39
+ get_domain_schemas,
40
+ is_system_schema,
15
41
  rid_part,
16
42
  rid_regex,
17
43
  snapshot_part,
18
44
  )
19
45
 
20
- # Re-export enums
46
+ # =============================================================================
47
+ # Re-exported Enums
48
+ # =============================================================================
49
+ # From enums.py: Status codes, type identifiers, and vocabulary names
21
50
  from deriva_ml.core.enums import (
22
51
  BaseStrEnum,
23
52
  BuiltinTypes,
@@ -29,46 +58,127 @@ from deriva_ml.core.enums import (
29
58
  Status,
30
59
  UploadState,
31
60
  )
61
+ # Also export BuiltinType directly (BuiltinTypes is the backwards-compatible alias)
62
+ from deriva.core.typed import BuiltinType
32
63
 
33
- # Re-export models
64
+ # =============================================================================
65
+ # Re-exported ERMrest Models
66
+ # =============================================================================
67
+ # From ermrest.py: Dataclass-based models for catalog structure definitions
68
+ # New typed classes from deriva.core.typed
34
69
  from deriva_ml.core.ermrest import (
70
+ # New dataclass-based definitions from deriva.core.typed
71
+ ColumnDef,
72
+ KeyDef,
73
+ ForeignKeyDef,
74
+ TableDef,
75
+ VocabularyTableDef,
76
+ AssetTableDef,
77
+ AssociationTableDef,
78
+ SchemaDef,
79
+ # Legacy aliases for backwards compatibility
35
80
  ColumnDefinition,
36
- FileUploadState,
37
- ForeignKeyDefinition,
38
81
  KeyDefinition,
82
+ ForeignKeyDefinition,
39
83
  TableDefinition,
84
+ # DerivaML-specific classes
85
+ FileUploadState,
86
+ UploadCallback,
87
+ UploadProgress,
40
88
  VocabularyTerm,
89
+ VocabularyTermHandle,
90
+ )
91
+
92
+ # =============================================================================
93
+ # Re-exported Exceptions
94
+ # =============================================================================
95
+ # From exceptions.py: Exception hierarchy for DerivaML errors
96
+ from deriva_ml.core.exceptions import (
97
+ DerivaMLAuthenticationError,
98
+ DerivaMLConfigurationError,
99
+ DerivaMLCycleError,
100
+ DerivaMLDataError,
101
+ DerivaMLDatasetNotFound,
102
+ DerivaMLException,
103
+ DerivaMLExecutionError,
104
+ DerivaMLInvalidTerm,
105
+ DerivaMLNotFoundError,
106
+ DerivaMLReadOnlyError,
107
+ DerivaMLSchemaError,
108
+ DerivaMLTableNotFound,
109
+ DerivaMLTableTypeError,
110
+ DerivaMLUploadError,
111
+ DerivaMLValidationError,
112
+ DerivaMLWorkflowError,
41
113
  )
42
114
 
43
- # Re-export exceptions
115
+ # =============================================================================
116
+ # Re-exported Utilities
117
+ # =============================================================================
118
+ # From filespec.py: File metadata and specification handling
44
119
  from deriva_ml.core.filespec import FileSpec
45
120
 
46
121
  __all__ = [
47
122
  # Constants
48
123
  "ML_SCHEMA",
49
124
  "DRY_RUN_RID",
125
+ "SYSTEM_SCHEMAS",
50
126
  "rid_part",
51
127
  "snapshot_part",
52
128
  "rid_regex",
53
129
  "DerivaSystemColumns",
54
130
  "DerivaAssetColumns",
55
131
  "RID",
132
+ # Schema classification helpers
133
+ "is_system_schema",
134
+ "get_domain_schemas",
56
135
  # Enums
57
136
  "BaseStrEnum",
58
137
  "UploadState",
59
138
  "Status",
139
+ "BuiltinType",
60
140
  "BuiltinTypes",
61
141
  "MLVocab",
62
142
  "MLTable",
63
143
  "MLAsset",
64
144
  "ExecMetadataType",
65
145
  "ExecAssetType",
66
- # Models
67
- "FileUploadState",
68
- "FileSpec",
69
- "VocabularyTerm",
146
+ # Typed definitions from deriva.core.typed
147
+ "ColumnDef",
148
+ "KeyDef",
149
+ "ForeignKeyDef",
150
+ "TableDef",
151
+ "VocabularyTableDef",
152
+ "AssetTableDef",
153
+ "AssociationTableDef",
154
+ "SchemaDef",
155
+ # Legacy aliases for backwards compatibility
70
156
  "ColumnDefinition",
71
157
  "KeyDefinition",
72
158
  "ForeignKeyDefinition",
73
159
  "TableDefinition",
160
+ # DerivaML-specific models
161
+ "FileUploadState",
162
+ "FileSpec",
163
+ "VocabularyTerm",
164
+ "VocabularyTermHandle",
165
+ "UploadProgress",
166
+ "UploadCallback",
167
+ # Exceptions
168
+ "DerivaMLException",
169
+ "DerivaMLConfigurationError",
170
+ "DerivaMLSchemaError",
171
+ "DerivaMLAuthenticationError",
172
+ "DerivaMLDataError",
173
+ "DerivaMLNotFoundError",
174
+ "DerivaMLDatasetNotFound",
175
+ "DerivaMLTableNotFound",
176
+ "DerivaMLInvalidTerm",
177
+ "DerivaMLTableTypeError",
178
+ "DerivaMLValidationError",
179
+ "DerivaMLCycleError",
180
+ "DerivaMLExecutionError",
181
+ "DerivaMLWorkflowError",
182
+ "DerivaMLUploadError",
183
+ "DerivaMLReadOnlyError",
74
184
  ]