deriva-ml 1.17.15__py3-none-any.whl → 1.17.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. deriva_ml/__init__.py +2 -2
  2. deriva_ml/asset/asset.py +0 -4
  3. deriva_ml/catalog/__init__.py +6 -0
  4. deriva_ml/catalog/clone.py +1513 -22
  5. deriva_ml/catalog/localize.py +66 -29
  6. deriva_ml/core/base.py +12 -9
  7. deriva_ml/core/definitions.py +13 -12
  8. deriva_ml/core/ermrest.py +11 -12
  9. deriva_ml/core/mixins/annotation.py +2 -2
  10. deriva_ml/core/mixins/asset.py +3 -3
  11. deriva_ml/core/mixins/dataset.py +3 -3
  12. deriva_ml/core/mixins/execution.py +1 -0
  13. deriva_ml/core/mixins/feature.py +2 -2
  14. deriva_ml/core/mixins/file.py +2 -2
  15. deriva_ml/core/mixins/path_builder.py +2 -2
  16. deriva_ml/core/mixins/rid_resolution.py +2 -2
  17. deriva_ml/core/mixins/vocabulary.py +2 -2
  18. deriva_ml/core/mixins/workflow.py +3 -3
  19. deriva_ml/dataset/catalog_graph.py +3 -4
  20. deriva_ml/dataset/dataset.py +5 -3
  21. deriva_ml/dataset/dataset_bag.py +0 -2
  22. deriva_ml/dataset/upload.py +2 -2
  23. deriva_ml/demo_catalog.py +0 -1
  24. deriva_ml/execution/__init__.py +8 -8
  25. deriva_ml/execution/base_config.py +2 -2
  26. deriva_ml/execution/execution.py +5 -3
  27. deriva_ml/execution/execution_record.py +0 -1
  28. deriva_ml/execution/model_protocol.py +1 -1
  29. deriva_ml/execution/multirun_config.py +0 -1
  30. deriva_ml/execution/runner.py +3 -3
  31. deriva_ml/experiment/experiment.py +3 -3
  32. deriva_ml/feature.py +2 -2
  33. deriva_ml/interfaces.py +2 -2
  34. deriva_ml/model/__init__.py +45 -24
  35. deriva_ml/model/annotations.py +0 -1
  36. deriva_ml/model/catalog.py +3 -2
  37. deriva_ml/model/data_loader.py +330 -0
  38. deriva_ml/model/data_sources.py +439 -0
  39. deriva_ml/model/database.py +216 -32
  40. deriva_ml/model/fk_orderer.py +379 -0
  41. deriva_ml/model/handles.py +1 -1
  42. deriva_ml/model/schema_builder.py +816 -0
  43. deriva_ml/run_model.py +3 -3
  44. deriva_ml/schema/annotations.py +2 -1
  45. deriva_ml/schema/create_schema.py +1 -1
  46. deriva_ml/schema/validation.py +1 -1
  47. {deriva_ml-1.17.15.dist-info → deriva_ml-1.17.16.dist-info}/METADATA +1 -1
  48. deriva_ml-1.17.16.dist-info/RECORD +81 -0
  49. deriva_ml-1.17.15.dist-info/RECORD +0 -77
  50. {deriva_ml-1.17.15.dist-info → deriva_ml-1.17.16.dist-info}/WHEEL +0 -0
  51. {deriva_ml-1.17.15.dist-info → deriva_ml-1.17.16.dist-info}/entry_points.txt +0 -0
  52. {deriva_ml-1.17.15.dist-info → deriva_ml-1.17.16.dist-info}/licenses/LICENSE +0 -0
  53. {deriva_ml-1.17.15.dist-info → deriva_ml-1.17.16.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,8 @@ import tempfile
7
7
  from dataclasses import dataclass, field
8
8
  from pathlib import Path
9
9
  from typing import TYPE_CHECKING
10
- from urllib.parse import urlparse, quote as urlquote
10
+ from urllib.parse import quote as urlquote
11
+ from urllib.parse import urlparse
11
12
 
12
13
  from deriva.core import ErmrestCatalog, HatracStore, get_credential
13
14
 
@@ -44,19 +45,20 @@ def localize_assets(
44
45
  hatrac_namespace: str | None = None,
45
46
  chunk_size: int | None = None,
46
47
  dry_run: bool = False,
48
+ source_hostname: str | None = None,
47
49
  ) -> LocalizeResult:
48
50
  """Localize remote hatrac assets to the local catalog server.
49
51
 
50
- Downloads assets from remote hatrac servers (determined from the URL in each
51
- asset record) and uploads them to the local hatrac server, updating the asset
52
- table URLs to point to the local copies.
52
+ Downloads assets from remote hatrac servers and uploads them to the local
53
+ hatrac server, updating the asset table URLs to point to the local copies.
53
54
 
54
55
  This is useful after cloning a catalog with asset_mode="refs" where the
55
- asset URLs still point to the source server. Use this function to make
56
- the assets fully local.
56
+ asset URLs still point to the source server (either as absolute URLs or
57
+ as relative hatrac paths). Use this function to make the assets fully local.
57
58
 
58
- The source hatrac server for each asset is determined automatically from
59
- the URL stored in the asset record.
59
+ The source hatrac server for each asset is determined:
60
+ 1. From the URL if it's an absolute URL (e.g., https://source.org/hatrac/...)
61
+ 2. From the source_hostname parameter if the URL is relative (e.g., /hatrac/...)
60
62
 
61
63
  This function is optimized for bulk operations:
62
64
  - Fetches all asset records in a single query
@@ -75,6 +77,9 @@ def localize_assets(
75
77
  chunk_size: Optional chunk size in bytes for large file uploads. If None,
76
78
  uses default chunking behavior.
77
79
  dry_run: If True, only report what would be done without making changes.
80
+ source_hostname: Hostname to use for assets with relative URLs (e.g.,
81
+ "www.facebase.org"). Required when localizing assets cloned with
82
+ asset_mode="refs" from a different server.
78
83
 
79
84
  Returns:
80
85
  LocalizeResult with counts and details of the operation.
@@ -93,6 +98,15 @@ def localize_assets(
93
98
  ... )
94
99
  >>> print(f"Localized {result.assets_processed} assets")
95
100
 
101
+ Localize assets cloned from another server with relative URLs:
102
+ >>> result = localize_assets(
103
+ ... ml,
104
+ ... asset_table="file",
105
+ ... asset_rids=["TG0", "TG2"],
106
+ ... schema_name="isa",
107
+ ... source_hostname="www.facebase.org", # Where the hatrac files are
108
+ ... )
109
+
96
110
  Localize using ErmrestCatalog:
97
111
  >>> from deriva.core import DerivaServer
98
112
  >>> server = DerivaServer("https", "localhost")
@@ -131,6 +145,12 @@ def localize_assets(
131
145
  # Build a map of RID -> record for easy lookup
132
146
  records_by_rid = {r["RID"]: r for r in all_records}
133
147
 
148
+ # Detect URL column name from first record (try URL first, then url)
149
+ url_column = "URL"
150
+ if all_records:
151
+ if "URL" not in all_records[0] and "url" in all_records[0]:
152
+ url_column = "url"
153
+
134
154
  # Identify which assets need to be localized
135
155
  assets_to_localize = []
136
156
  for rid in asset_rids:
@@ -140,22 +160,29 @@ def localize_assets(
140
160
  result.assets_skipped += 1
141
161
  continue
142
162
 
143
- current_url = record.get("URL")
163
+ # Try both URL and url column names (different catalogs use different conventions)
164
+ current_url = record.get("URL") or record.get("url")
144
165
  if not current_url:
145
- logger.warning(f"Asset {rid} has no URL, skipping")
166
+ logger.warning(f"Asset {rid} has no URL column, skipping")
146
167
  result.assets_skipped += 1
147
168
  continue
148
169
 
149
170
  # Parse the URL to get source hostname
150
171
  parsed_url = urlparse(current_url)
151
- source_hostname = parsed_url.netloc
152
-
153
- if not source_hostname:
154
- logger.info(f"Asset {rid} has relative URL, already local")
155
- result.assets_skipped += 1
156
- continue
172
+ asset_source_hostname = parsed_url.netloc
173
+
174
+ if not asset_source_hostname:
175
+ # URL is relative (e.g., /hatrac/facebase/data/...)
176
+ if source_hostname:
177
+ # Use provided source_hostname for relative URLs
178
+ asset_source_hostname = source_hostname
179
+ logger.info(f"Asset {rid} has relative URL, using source_hostname={source_hostname}")
180
+ else:
181
+ logger.info(f"Asset {rid} has relative URL, already local (specify source_hostname to localize)")
182
+ result.assets_skipped += 1
183
+ continue
157
184
 
158
- if source_hostname == hostname:
185
+ if asset_source_hostname == hostname:
159
186
  logger.info(f"Asset {rid} is already local, skipping")
160
187
  result.assets_skipped += 1
161
188
  continue
@@ -170,7 +197,7 @@ def localize_assets(
170
197
  assets_to_localize.append({
171
198
  "rid": rid,
172
199
  "record": record,
173
- "source_hostname": source_hostname,
200
+ "source_hostname": asset_source_hostname,
174
201
  "source_path": source_path,
175
202
  "current_url": current_url,
176
203
  })
@@ -209,8 +236,9 @@ def localize_assets(
209
236
  source_hostname = asset_info["source_hostname"]
210
237
  source_path = asset_info["source_path"]
211
238
  current_url = asset_info["current_url"]
212
- filename = record.get("Filename")
213
- md5 = record.get("MD5")
239
+ # Handle case variations in column names
240
+ filename = record.get("Filename") or record.get("filename")
241
+ md5 = record.get("MD5") or record.get("md5")
214
242
 
215
243
  logger.info(f"[{i+1}/{len(assets_to_localize)}] Localizing {rid}: {filename} from {source_hostname}")
216
244
 
@@ -232,16 +260,22 @@ def localize_assets(
232
260
  # Upload to local hatrac
233
261
  dest_path = f"{hatrac_namespace}/{md5}.{filename}" if md5 and filename else f"{hatrac_namespace}/{rid}"
234
262
 
263
+ # Enable chunking for large files (> 100MB) by default
264
+ file_size = local_file.stat().st_size
265
+ default_chunk_size = 50 * 1024 * 1024 # 50MB chunks
266
+ use_chunked = chunk_size is not None or file_size > 100 * 1024 * 1024
267
+ actual_chunk_size = chunk_size or default_chunk_size
268
+
235
269
  new_url = local_hatrac.put_loc(
236
270
  dest_path,
237
271
  str(local_file),
238
272
  headers={"Content-Disposition": f"filename*=UTF-8''{urlquote(filename or 'asset')}"},
239
- chunked=chunk_size is not None,
240
- chunk_size=chunk_size or 0,
273
+ chunked=use_chunked,
274
+ chunk_size=actual_chunk_size if use_chunked else 0,
241
275
  )
242
276
 
243
- # Queue the catalog update
244
- catalog_updates.append({"RID": rid, "URL": new_url})
277
+ # Queue the catalog update using the detected URL column name
278
+ catalog_updates.append({"RID": rid, url_column: new_url})
245
279
 
246
280
  logger.info(f"Localized asset {rid}: {current_url} -> {new_url}")
247
281
  result.assets_processed += 1
@@ -257,20 +291,23 @@ def localize_assets(
257
291
  result.errors.append(error_msg)
258
292
  result.assets_failed += 1
259
293
 
260
- # Batch update the catalog records
294
+ # Batch update the catalog records using datapath
261
295
  if catalog_updates:
262
296
  logger.info(f"Updating {len(catalog_updates)} catalog records...")
263
297
  try:
264
- table_path.path.update(catalog_updates)
265
- logger.info("Catalog records updated successfully")
298
+ # Use datapath update - table_path.update() handles the update correctly
299
+ table_path.update(catalog_updates)
300
+ logger.info(f"Updated {len(catalog_updates)} catalog records successfully")
266
301
  except Exception as e:
267
302
  # If batch update fails, try individual updates as fallback
268
303
  logger.warning(f"Batch update failed ({e}), falling back to individual updates...")
269
304
  for update in catalog_updates:
305
+ rid = update["RID"]
270
306
  try:
271
- table_path.path.filter(table_path.RID == update["RID"]).update([update])
307
+ table_path.update([update])
308
+ logger.info(f"Updated catalog record {rid}")
272
309
  except Exception as e2:
273
- error_msg = f"Failed to update catalog record {update['RID']}: {e2}"
310
+ error_msg = f"Failed to update catalog record {rid}: {e2}"
274
311
  logger.error(error_msg)
275
312
  result.errors.append(error_msg)
276
313
 
deriva_ml/core/base.py CHANGED
@@ -47,20 +47,20 @@ from deriva_ml.core.config import DerivaMLConfig
47
47
  from deriva_ml.core.definitions import ML_SCHEMA, RID, Status, TableDefinition, VocabularyTableDef
48
48
  from deriva_ml.core.exceptions import DerivaMLException
49
49
  from deriva_ml.core.logging_config import apply_logger_overrides, configure_logging
50
- from deriva_ml.dataset.upload import bulk_upload_configuration
51
- from deriva_ml.interfaces import DerivaMLCatalog
52
50
  from deriva_ml.core.mixins import (
53
51
  AnnotationMixin,
54
- VocabularyMixin,
55
- RidResolutionMixin,
56
- PathBuilderMixin,
57
- WorkflowMixin,
58
- FeatureMixin,
59
- DatasetMixin,
60
52
  AssetMixin,
53
+ DatasetMixin,
61
54
  ExecutionMixin,
55
+ FeatureMixin,
62
56
  FileMixin,
57
+ PathBuilderMixin,
58
+ RidResolutionMixin,
59
+ VocabularyMixin,
60
+ WorkflowMixin,
63
61
  )
62
+ from deriva_ml.dataset.upload import bulk_upload_configuration
63
+ from deriva_ml.interfaces import DerivaMLCatalog
64
64
 
65
65
  # Optional debug imports
66
66
  try:
@@ -74,6 +74,7 @@ if TYPE_CHECKING:
74
74
  from deriva_ml.catalog.clone import CatalogProvenance
75
75
  from deriva_ml.execution.execution import Execution
76
76
  from deriva_ml.model.catalog import DerivaModel
77
+ from deriva_ml.schema.validation import SchemaValidationReport
77
78
 
78
79
  # Stop pycharm from complaining about undefined references.
79
80
  ml: DerivaML
@@ -1098,6 +1099,7 @@ class DerivaML(
1098
1099
  ... print(f"{d['execution_rid']}: {d['size_mb']:.1f} MB")
1099
1100
  """
1100
1101
  from datetime import datetime
1102
+
1101
1103
  from deriva_ml.dataset.upload import upload_root
1102
1104
 
1103
1105
  results = []
@@ -1155,6 +1157,7 @@ class DerivaML(
1155
1157
  """
1156
1158
  import shutil
1157
1159
  import time
1160
+
1158
1161
  from deriva_ml.dataset.upload import upload_root
1159
1162
 
1160
1163
  stats = {'dirs_removed': 0, 'bytes_freed': 0, 'errors': 0}
@@ -1292,7 +1295,7 @@ class DerivaML(
1292
1295
  - deriva_ml.schema.validation.SchemaValidationReport
1293
1296
  - deriva_ml.schema.validation.validate_ml_schema
1294
1297
  """
1295
- from deriva_ml.schema.validation import SchemaValidationReport, validate_ml_schema
1298
+ from deriva_ml.schema.validation import validate_ml_schema
1296
1299
  return validate_ml_schema(self, strict=strict)
1297
1300
 
1298
1301
  # Methods moved to mixins:
@@ -25,6 +25,9 @@ For more specialized imports, you can import directly from submodules:
25
25
 
26
26
  from __future__ import annotations
27
27
 
28
+ # Also export BuiltinType directly (BuiltinTypes is the backwards-compatible alias)
29
+ from deriva.core.typed import BuiltinType
30
+
28
31
  # =============================================================================
29
32
  # Re-exported Constants
30
33
  # =============================================================================
@@ -58,8 +61,6 @@ from deriva_ml.core.enums import (
58
61
  Status,
59
62
  UploadState,
60
63
  )
61
- # Also export BuiltinType directly (BuiltinTypes is the backwards-compatible alias)
62
- from deriva.core.typed import BuiltinType
63
64
 
64
65
  # =============================================================================
65
66
  # Re-exported ERMrest Models
@@ -67,24 +68,24 @@ from deriva.core.typed import BuiltinType
67
68
  # From ermrest.py: Dataclass-based models for catalog structure definitions
68
69
  # New typed classes from deriva.core.typed
69
70
  from deriva_ml.core.ermrest import (
70
- # New dataclass-based definitions from deriva.core.typed
71
- ColumnDef,
72
- KeyDef,
73
- ForeignKeyDef,
74
- TableDef,
75
- VocabularyTableDef,
76
71
  AssetTableDef,
77
72
  AssociationTableDef,
78
- SchemaDef,
73
+ # New dataclass-based definitions from deriva.core.typed
74
+ ColumnDef,
79
75
  # Legacy aliases for backwards compatibility
80
76
  ColumnDefinition,
81
- KeyDefinition,
82
- ForeignKeyDefinition,
83
- TableDefinition,
84
77
  # DerivaML-specific classes
85
78
  FileUploadState,
79
+ ForeignKeyDef,
80
+ ForeignKeyDefinition,
81
+ KeyDef,
82
+ KeyDefinition,
83
+ SchemaDef,
84
+ TableDef,
85
+ TableDefinition,
86
86
  UploadCallback,
87
87
  UploadProgress,
88
+ VocabularyTableDef,
88
89
  VocabularyTerm,
89
90
  VocabularyTermHandle,
90
91
  )
deriva_ml/core/ermrest.py CHANGED
@@ -21,6 +21,17 @@ import warnings
21
21
  from dataclasses import dataclass
22
22
  from typing import Any, Protocol
23
23
 
24
+ # Import and re-export typed definitions from deriva.core.typed
25
+ from deriva.core.typed import (
26
+ AssetTableDef,
27
+ AssociationTableDef,
28
+ ColumnDef,
29
+ ForeignKeyDef,
30
+ KeyDef,
31
+ SchemaDef,
32
+ TableDef,
33
+ VocabularyTableDef,
34
+ )
24
35
  from pydantic import (
25
36
  BaseModel,
26
37
  Field,
@@ -31,18 +42,6 @@ from pydantic import (
31
42
  from .constants import RID
32
43
  from .enums import UploadState
33
44
 
34
- # Import and re-export typed definitions from deriva.core.typed
35
- from deriva.core.typed import (
36
- ColumnDef,
37
- KeyDef,
38
- ForeignKeyDef,
39
- TableDef,
40
- VocabularyTableDef,
41
- AssetTableDef,
42
- AssociationTableDef,
43
- SchemaDef,
44
- )
45
-
46
45
  # Re-export all typed classes for convenience
47
46
  __all__ = [
48
47
  # New typed definitions from deriva.core.typed
@@ -14,10 +14,10 @@ Annotation Tags:
14
14
 
15
15
  from __future__ import annotations
16
16
 
17
- from typing import TYPE_CHECKING, Any, Callable
18
-
19
17
  # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
20
18
  import importlib
19
+ from typing import TYPE_CHECKING, Any, Callable
20
+
21
21
  _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
22
22
  Column = _ermrest_model.Column
23
23
  Table = _ermrest_model.Table
@@ -6,14 +6,14 @@ asset table operations including creating, listing, and looking up assets.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- from typing import TYPE_CHECKING, Any, Callable, Iterable
10
-
11
9
  # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
12
10
  import importlib
11
+ from typing import TYPE_CHECKING, Any, Callable, Iterable
12
+
13
13
  _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
14
14
  Table = _ermrest_model.Table
15
15
 
16
- from deriva_ml.core.definitions import AssetTableDef, ColumnDefinition, MLVocab, RID, VocabularyTerm
16
+ from deriva_ml.core.definitions import RID, AssetTableDef, ColumnDefinition, MLVocab, VocabularyTerm
17
17
  from deriva_ml.core.exceptions import DerivaMLException
18
18
  from deriva_ml.schema.annotations import asset_annotation
19
19
 
@@ -7,16 +7,16 @@ deleting, and managing dataset elements.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- from typing import TYPE_CHECKING, Any, Callable, Iterable
11
-
12
10
  # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
13
11
  import importlib
12
+ from typing import TYPE_CHECKING, Any, Callable, Iterable
13
+
14
14
  _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
15
15
  Table = _ermrest_model.Table
16
16
 
17
17
  from pydantic import ConfigDict, validate_call
18
18
 
19
- from deriva_ml.core.definitions import RID, MLVocab
19
+ from deriva_ml.core.definitions import RID
20
20
  from deriva_ml.core.exceptions import DerivaMLException, DerivaMLTableTypeError
21
21
  from deriva_ml.dataset.aux_classes import DatasetSpec
22
22
 
@@ -369,6 +369,7 @@ class ExecutionMixin:
369
369
  ... print(f"{exp.name}: {exp.config_choices}")
370
370
  """
371
371
  import re
372
+
372
373
  from deriva_ml.experiment import Experiment
373
374
 
374
375
  # Get datapath to tables
@@ -7,11 +7,11 @@ and listing feature values.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
11
+ import importlib
10
12
  from itertools import chain
11
13
  from typing import TYPE_CHECKING, Any, Callable, Iterable
12
14
 
13
- # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
14
- import importlib
15
15
  datapath = importlib.import_module("deriva.core.datapath")
16
16
  _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
17
17
  Key = _ermrest_model.Key
@@ -6,14 +6,14 @@ file operations including adding and listing files.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
10
+ import importlib
9
11
  from collections import defaultdict
10
12
  from itertools import chain
11
13
  from pathlib import Path
12
14
  from typing import TYPE_CHECKING, Any, Callable, Iterable
13
15
  from urllib.parse import urlsplit
14
16
 
15
- # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
16
- import importlib
17
17
  datapath = importlib.import_module("deriva.core.datapath")
18
18
 
19
19
  from deriva_ml.core.definitions import RID, FileSpec, MLTable, MLVocab, VocabularyTerm
@@ -6,11 +6,11 @@ catalog path building and table access utilities.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
10
+ import importlib
9
11
  from pathlib import Path
10
12
  from typing import TYPE_CHECKING, Any, Iterable
11
13
 
12
- # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
13
- import importlib
14
14
  datapath = importlib.import_module("deriva.core.datapath")
15
15
  _ermrest_catalog = importlib.import_module("deriva.core.ermrest_catalog")
16
16
  _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
@@ -6,11 +6,11 @@ Resource Identifier (RID) resolution and retrieval operations.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
10
+ import importlib
9
11
  from dataclasses import dataclass
10
12
  from typing import TYPE_CHECKING, Any
11
13
 
12
- # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
13
- import importlib
14
14
  _datapath = importlib.import_module("deriva.core.datapath")
15
15
  _ermrest_catalog = importlib.import_module("deriva.core.ermrest_catalog")
16
16
  _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
@@ -7,10 +7,10 @@ controlled vocabulary tables.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- from typing import TYPE_CHECKING, Any, Callable
11
-
12
10
  # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
13
11
  import importlib
12
+ from typing import TYPE_CHECKING, Any, Callable
13
+
14
14
  _datapath = importlib.import_module("deriva.core.datapath")
15
15
  _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
16
16
  DataPathException = _datapath.DataPathException
@@ -7,10 +7,10 @@ and creating workflows.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- from typing import TYPE_CHECKING, Any, Callable
11
-
12
10
  # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
13
11
  import importlib
12
+ from typing import TYPE_CHECKING, Any, Callable
13
+
14
14
  _deriva_core = importlib.import_module("deriva.core")
15
15
  format_exception = _deriva_core.format_exception
16
16
 
@@ -19,7 +19,7 @@ from deriva_ml.core.exceptions import DerivaMLException
19
19
  from deriva_ml.execution.workflow import Workflow
20
20
 
21
21
  if TYPE_CHECKING:
22
- from deriva_ml.interfaces import DerivaMLCatalog
22
+ pass
23
23
 
24
24
 
25
25
  class WorkflowMixin:
@@ -9,6 +9,7 @@ from deriva.core.utils.core_utils import tag as deriva_tags
9
9
 
10
10
  from deriva_ml.core.constants import RID
11
11
  from deriva_ml.interfaces import DatasetLike, DerivaMLCatalog
12
+ from deriva_ml.model.catalog import ASSET_COLUMNS
12
13
 
13
14
  try:
14
15
 
@@ -117,8 +118,7 @@ class CatalogGraph:
117
118
  ]
118
119
 
119
120
  # If this table is an asset table, then we need to output the files associated with the asset.
120
- asset_columns = {"Filename", "URL", "Length", "MD5", "Description"}
121
- if asset_columns.issubset({c.name for c in table.columns}):
121
+ if ASSET_COLUMNS.issubset({c.name for c in table.columns}):
122
122
  exports.append(
123
123
  {
124
124
  "processor": "fetch",
@@ -168,8 +168,7 @@ class CatalogGraph:
168
168
  ]
169
169
 
170
170
  # If this table is an asset table, then we need to output the files associated with the asset.
171
- asset_columns = {"Filename", "URL", "Length", "MD5", "Description"}
172
- if asset_columns.issubset({c.name for c in table.columns}):
171
+ if ASSET_COLUMNS.issubset({c.name for c in table.columns}):
173
172
  exports.append(
174
173
  {
175
174
  "source": {
@@ -39,12 +39,15 @@ from pathlib import Path
39
39
  # Local imports
40
40
  from pprint import pformat
41
41
  from tempfile import TemporaryDirectory
42
- from typing import Any, Generator, Iterable, Self
42
+ from typing import TYPE_CHECKING, Any, Generator, Iterable, Self
43
43
  from urllib.parse import urlparse
44
44
 
45
45
  # Deriva imports
46
46
  import deriva.core.utils.hash_utils as hash_utils
47
47
 
48
+ if TYPE_CHECKING:
49
+ from deriva_ml.execution.execution import Execution
50
+
48
51
  # Third-party imports
49
52
  import pandas as pd
50
53
  import requests
@@ -581,7 +584,7 @@ class Dataset:
581
584
  >>> ds = ml.lookup_dataset("4HM")
582
585
  >>> ds.display_markdown(show_children=True)
583
586
  """
584
- from IPython.display import display, Markdown
587
+ from IPython.display import Markdown, display
585
588
 
586
589
  display(Markdown(self.to_markdown(show_children, indent)))
587
590
 
@@ -1331,7 +1334,6 @@ class Dataset:
1331
1334
  ... print(f"Execution {exe.execution_rid}: {exe.status}")
1332
1335
  """
1333
1336
  # Import here to avoid circular dependency
1334
- from deriva_ml.execution.execution import Execution
1335
1337
 
1336
1338
  pb = self._ml_instance.pathBuilder()
1337
1339
  dataset_execution_path = pb.schemas[self._ml_instance.ml_schema].Dataset_Execution
@@ -40,8 +40,6 @@ from dataclasses import dataclass, field
40
40
  from pathlib import Path
41
41
  from typing import TYPE_CHECKING, Any, Callable, Generator, Iterable, Self, cast
42
42
 
43
- import deriva.core.datapath as datapath
44
-
45
43
  # Third-party imports
46
44
  import pandas as pd
47
45
 
@@ -35,6 +35,8 @@ Here is the directory layout we support:
35
35
  file1.jsonl, file2.jsonl
36
36
  """
37
37
 
38
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
39
+ import importlib
38
40
  import json
39
41
  import os
40
42
  from pathlib import Path
@@ -43,8 +45,6 @@ from typing import Any, Callable, Optional
43
45
 
44
46
  import regex as re
45
47
 
46
- # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
47
- import importlib
48
48
  _deriva_core = importlib.import_module("deriva.core")
49
49
  _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
50
50
  _hatrac_store = importlib.import_module("deriva.core.hatrac_store")
deriva_ml/demo_catalog.py CHANGED
@@ -20,7 +20,6 @@ from random import choice, randint, random
20
20
  from tempfile import TemporaryDirectory
21
21
 
22
22
  from deriva.core import BaseCLI, ErmrestCatalog
23
- from deriva.core.ermrest_model import Schema, Table
24
23
  from deriva.core.typed import BuiltinType, ColumnDef, SchemaDef, TableDef
25
24
  from pydantic import BaseModel, ConfigDict
26
25
  from requests.exceptions import HTTPError
@@ -4,27 +4,27 @@ from typing import TYPE_CHECKING
4
4
  from deriva_ml.execution.base_config import (
5
5
  BaseConfig,
6
6
  DerivaBaseConfig,
7
+ # Config metadata helpers
8
+ DescribedList,
7
9
  base_defaults,
8
10
  get_notebook_configuration,
11
+ load_configs,
9
12
  # New simplified API
10
13
  notebook_config,
11
- load_configs,
12
14
  run_notebook,
13
- # Config metadata helpers
14
- DescribedList,
15
15
  with_description,
16
16
  )
17
+ from deriva_ml.execution.execution_configuration import AssetRID, ExecutionConfiguration
18
+ from deriva_ml.execution.model_protocol import DerivaMLModel
17
19
  from deriva_ml.execution.multirun_config import (
18
20
  MultirunSpec,
19
- multirun_config,
21
+ get_all_multirun_configs,
20
22
  get_multirun_config,
21
23
  list_multirun_configs,
22
- get_all_multirun_configs,
24
+ multirun_config,
23
25
  )
24
- from deriva_ml.execution.execution_configuration import AssetRID, ExecutionConfiguration
26
+ from deriva_ml.execution.runner import create_model_config, reset_multirun_state, run_model
25
27
  from deriva_ml.execution.workflow import Workflow
26
- from deriva_ml.execution.runner import run_model, create_model_config, reset_multirun_state
27
- from deriva_ml.execution.model_protocol import DerivaMLModel
28
28
 
29
29
  if TYPE_CHECKING:
30
30
  from deriva_ml.execution.execution import Execution
@@ -44,9 +44,9 @@ import os
44
44
  import pkgutil
45
45
  from dataclasses import dataclass, field
46
46
  from pathlib import Path
47
- from typing import Any, TypeVar, TYPE_CHECKING
47
+ from typing import TYPE_CHECKING, Any, TypeVar
48
48
 
49
- from hydra_zen import builds, instantiate, launch, make_config, store
49
+ from hydra_zen import builds, instantiate, launch, store
50
50
 
51
51
  if TYPE_CHECKING:
52
52
  from deriva_ml import DerivaML