deriva-ml 1.16.0__tar.gz → 1.17.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/PKG-INFO +9 -7
  2. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/pyproject.toml +12 -11
  3. deriva_ml-1.17.1/src/.DS_Store +0 -0
  4. deriva_ml-1.17.1/src/deriva_ml/.DS_Store +0 -0
  5. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/__init__.py +0 -10
  6. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/base.py +18 -6
  7. deriva_ml-1.17.1/src/deriva_ml/dataset/__init__.py +12 -0
  8. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/aux_classes.py +2 -10
  9. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/dataset.py +5 -4
  10. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/dataset_bag.py +144 -151
  11. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/upload.py +6 -4
  12. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/demo_catalog.py +16 -2
  13. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/__init__.py +2 -1
  14. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/execution.py +5 -3
  15. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/execution_configuration.py +28 -9
  16. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/workflow.py +8 -0
  17. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/model/catalog.py +55 -50
  18. deriva_ml-1.17.1/src/deriva_ml/model/database.py +719 -0
  19. deriva_ml-1.17.1/src/deriva_ml/test.py +94 -0
  20. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/PKG-INFO +9 -7
  21. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/SOURCES.txt +3 -1
  22. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/requires.txt +7 -5
  23. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/core/test_file.py +2 -1
  24. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/uv.lock +596 -847
  25. deriva_ml-1.16.0/src/deriva_ml/dataset/__init__.py +0 -17
  26. deriva_ml-1.16.0/src/deriva_ml/model/database.py +0 -345
  27. deriva_ml-1.16.0/src/deriva_ml/model/sql_mapper.py +0 -44
  28. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/.github/release-drafter.yml +0 -0
  29. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/.github/workflows/publish-docs.yml +0 -0
  30. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/.github/workflows/release.yml +0 -0
  31. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/.gitignore +0 -0
  32. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/LICENSE +0 -0
  33. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/README.md +0 -0
  34. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/.DS_Store +0 -0
  35. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  36. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  37. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
  38. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  39. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
  40. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  41. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/ERD.png +0 -0
  42. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/Launcher.png +0 -0
  43. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/copy_minid.png +0 -0
  44. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/deriva-logo.png +0 -0
  45. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/deriva-ml.pdf +0 -0
  46. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/sharing-at-home.pdf +0 -0
  47. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/dataset.md +0 -0
  48. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/dataset_aux_classes.md +0 -0
  49. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/dataset_bag.md +0 -0
  50. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/deriva_definitions.md +0 -0
  51. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/deriva_ml_base.md +0 -0
  52. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/deriva_model.md +0 -0
  53. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/execution.md +0 -0
  54. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/execution_configuration.md +0 -0
  55. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/feature.md +0 -0
  56. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/upload.md +0 -0
  57. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/index.md +0 -0
  58. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/release-notes.md +0 -0
  59. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/datasets.md +0 -0
  60. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/deriva_ml_structure.md +0 -0
  61. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/execution-configuration.md +0 -0
  62. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/file-assets.md +0 -0
  63. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/identifiers.md +0 -0
  64. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/install.md +0 -0
  65. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/notebooks.md +0 -0
  66. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/overview.md +0 -0
  67. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/mkdocs.yml +0 -0
  68. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/setup.cfg +0 -0
  69. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/bump_version.py +0 -0
  70. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/__init__.py +0 -0
  71. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/config.py +0 -0
  72. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/constants.py +0 -0
  73. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/definitions.py +0 -0
  74. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/enums.py +0 -0
  75. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/ermrest.py +0 -0
  76. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/exceptions.py +0 -0
  77. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/filespec.py +0 -0
  78. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/history.py +0 -0
  79. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/environment.py +0 -0
  80. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/feature.py +0 -0
  81. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/install_kernel.py +0 -0
  82. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/model/__init__.py +0 -0
  83. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/protocols/dataset.py +0 -0
  84. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/run_notebook.py +0 -0
  85. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/__init__.py +0 -0
  86. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/annotations.py +0 -0
  87. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/check_schema.py +0 -0
  88. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/create_schema.py +0 -0
  89. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
  90. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/policy.json +0 -0
  91. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/table_comments_utils.py +0 -0
  92. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  93. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/entry_points.txt +0 -0
  94. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/top_level.txt +0 -0
  95. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/__init__.py +0 -0
  96. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/conftest.py +0 -0
  97. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/core/__init__.py +0 -0
  98. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/core/test_basic_tables.py +0 -0
  99. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/core/test_vocabulary.py +0 -0
  100. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/__init__.py +0 -0
  101. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/demo-catalog-schema.json +0 -0
  102. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/deriva-ml-reference.json +0 -0
  103. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/eye-ai-catalog-schema.json +0 -0
  104. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/test_dataset_version.py +0 -0
  105. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/test_datasets.py +0 -0
  106. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/test_download.py +0 -0
  107. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/execution/__init__.py +0 -0
  108. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/execution/test_execution.py +0 -0
  109. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/execution/workflow-test.ipynb +0 -0
  110. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/execution/workflow-test.py +0 -0
  111. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/feature/test_features.py +0 -0
  112. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/model/__init__.py +0 -0
  113. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/model/test_database.py +0 -0
  114. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/model/test_models.py +0 -0
  115. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/test-files/execution-parameters.json +0 -0
  116. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/test-files/notebook-parameters.json +0 -0
  117. {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/test_utils.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.16.0
3
+ Version: 1.17.1
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
- Requires-Python: >=3.10
6
+ Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: bump-my-version
@@ -12,16 +12,18 @@ Requires-Dist: deriva~=1.7.10
12
12
  Requires-Dist: deepdiff
13
13
  Requires-Dist: nbconvert
14
14
  Requires-Dist: pandas
15
- Requires-Dist: regex~=2024.7.24
15
+ Requires-Dist: pip-system-certs
16
16
  Requires-Dist: pydantic>=2.11
17
- Requires-Dist: semver>3.0.0
18
- Requires-Dist: setuptools>=64
19
- Requires-Dist: setuptools-scm>=8.0
20
- Requires-Dist: nbstripout
21
17
  Requires-Dist: papermill
22
18
  Requires-Dist: pandas-stubs==2.2.3.250527
23
19
  Requires-Dist: pyyaml
20
+ Requires-Dist: regex~=2024.7.24
21
+ Requires-Dist: semver>3.0.0
22
+ Requires-Dist: setuptools>=80
23
+ Requires-Dist: setuptools-scm>=8.0
24
+ Requires-Dist: nbstripout
24
25
  Requires-Dist: hydra_zen
26
+ Requires-Dist: SQLAlchemy
25
27
  Dynamic: license-file
26
28
 
27
29
  # DerivaML
@@ -6,7 +6,7 @@ authors = [
6
6
  ]
7
7
  description = "Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines"
8
8
  readme = "README.md"
9
- requires-python = ">=3.10"
9
+ requires-python = ">=3.11"
10
10
  dependencies = [
11
11
  "bump-my-version",
12
12
  "bdbag",
@@ -14,16 +14,18 @@ dependencies = [
14
14
  "deepdiff",
15
15
  "nbconvert",
16
16
  "pandas",
17
- "regex~=2024.7.24",
17
+ "pip-system-certs",
18
18
  "pydantic>=2.11",
19
- "semver>3.0.0",
20
- "setuptools>=64",
21
- "setuptools-scm>=8.0",
22
- "nbstripout",
23
19
  "papermill",
24
20
  "pandas-stubs==2.2.3.250527",
25
21
  "pyyaml",
22
+ "regex~=2024.7.24",
23
+ "semver>3.0.0",
24
+ "setuptools>=80",
25
+ "setuptools-scm>=8.0",
26
+ "nbstripout",
26
27
  "hydra_zen",
28
+ "SQLAlchemy"
27
29
  ]
28
30
 
29
31
  [project.scripts]
@@ -39,10 +41,11 @@ deriva-ml-check-catalog-schema = "deriva_ml.schema.check_schema:main"
39
41
  [project.optional-dependencies]
40
42
 
41
43
  [build-system]
42
- requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2", "wheel"]
44
+ requires = ["setuptools>=80", "setuptools_scm[toml]>=8", "wheel"]
43
45
  build-backend = "setuptools.build_meta"
44
46
 
45
47
  [tool.uv]
48
+ python-preference = "only-managed"
46
49
 
47
50
  [tool.uv.sources]
48
51
  #bdbag = {git = "https://github.com/fair-research/bdbag", branch = "master" }
@@ -77,13 +80,10 @@ pre_commit_hooks = []
77
80
  post_commit_hooks = []
78
81
 
79
82
  [tool.pytest]
80
- mock_use_standalone_module = true
81
-
82
- [tool.pytest.ini_options]
83
83
  testpaths = ["tests"]
84
84
  python_files = ["test_*.py"]
85
85
  #addopts = "-v --cov=deriva_ml --cov-report=term-missing --import-mode=importlib"
86
- addopts = "-v --import-mode=importlib"
86
+ addopts = ["-v", "--import-mode=importlib"]
87
87
 
88
88
 
89
89
  [tool.ruff]
@@ -114,6 +114,7 @@ dev = [
114
114
  "pytest>=8.4.1",
115
115
  "pytest-mock",
116
116
  "pytest-coverage>=0.0",
117
+ "pip-system-certs",
117
118
  "ruff"
118
119
  ]
119
120
  lint = [
Binary file
Binary file
@@ -25,9 +25,6 @@ from deriva_ml.core.exceptions import (
25
25
  DerivaMLInvalidTerm,
26
26
  DerivaMLTableTypeError,
27
27
  )
28
- from deriva_ml.dataset.aux_classes import DatasetConfig, DatasetConfigList, DatasetSpec, DatasetVersion
29
-
30
- from .execution import Execution, ExecutionConfiguration, Workflow
31
28
 
32
29
  # Type-checking only - avoid circular import at runtime
33
30
  if TYPE_CHECKING:
@@ -51,13 +48,6 @@ def __getattr__(name):
51
48
  __all__ = [
52
49
  "DerivaML", # Lazy-loaded
53
50
  "DerivaMLConfig",
54
- "DatasetConfig",
55
- "DatasetConfigList",
56
- "DatasetSpec",
57
- "DatasetVersion",
58
- "Execution",
59
- "ExecutionConfiguration",
60
- "Workflow",
61
51
  # Exceptions
62
52
  "DerivaMLException",
63
53
  "DerivaMLInvalidTerm",
@@ -19,7 +19,7 @@ import logging
19
19
  from datetime import datetime
20
20
  from itertools import chain
21
21
  from pathlib import Path
22
- from typing import Dict, Iterable, List, cast, TYPE_CHECKING, Any
22
+ from typing import Dict, Iterable, List, cast, TYPE_CHECKING, Any, Self
23
23
  from urllib.parse import urlsplit
24
24
 
25
25
 
@@ -28,13 +28,14 @@ import requests
28
28
  from pydantic import ConfigDict, validate_call
29
29
 
30
30
  # Deriva imports
31
- from deriva.core import DEFAULT_SESSION_CONFIG, format_exception, get_credential, urlquote, init_logging
31
+ from deriva.core import DEFAULT_SESSION_CONFIG, format_exception, get_credential, urlquote
32
32
 
33
33
  import deriva.core.datapath as datapath
34
34
  from deriva.core.datapath import DataPathException, _SchemaWrapper as SchemaWrapper
35
35
  from deriva.core.deriva_server import DerivaServer
36
36
  from deriva.core.ermrest_catalog import ResolveRidResult
37
37
  from deriva.core.ermrest_model import Key, Table
38
+ from deriva.core.utils.core_utils import DEFAULT_LOGGER_OVERRIDES
38
39
  from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
39
40
 
40
41
  from deriva_ml.core.exceptions import DerivaMLInvalidTerm
@@ -103,6 +104,10 @@ class DerivaML(Dataset):
103
104
  >>> ml.add_term('vocabulary_table', 'new_term', description='Description of term')
104
105
  """
105
106
 
107
+ @classmethod
108
+ def instantiate(cls, config: DerivaMLConfig) -> Self:
109
+ return cls(**config.model_dump())
110
+
106
111
  def __init__(
107
112
  self,
108
113
  hostname: str,
@@ -149,7 +154,6 @@ class DerivaML(Dataset):
149
154
  credentials=self.credential,
150
155
  session_config=self._get_session_config(),
151
156
  )
152
-
153
157
  try:
154
158
  if check_auth and server.get_authn_session():
155
159
  pass
@@ -158,7 +162,6 @@ class DerivaML(Dataset):
158
162
  "You are not authorized to access this catalog. "
159
163
  "Please check your credentials and make sure you have logged in."
160
164
  )
161
-
162
165
  self.catalog = server.connect_ermrest(catalog_id)
163
166
  self.model = DerivaModel(self.catalog.getCatalogModel(), domain_schema=domain_schema)
164
167
 
@@ -176,9 +179,13 @@ class DerivaML(Dataset):
176
179
  # Set up logging
177
180
  self._logger = logging.getLogger("deriva_ml")
178
181
  self._logger.setLevel(logging_level)
182
+ self._logging_level = logging_level
183
+ self._deriva_logging_level = deriva_logging_level
179
184
 
180
185
  # Configure deriva logging level
181
- init_logging(deriva_logging_level)
186
+ logger_config = DEFAULT_LOGGER_OVERRIDES
187
+ # allow for reconfiguration of module-specific logging levels
188
+ [logging.getLogger(name).setLevel(level) for name, level in logger_config.items()]
182
189
  logging.getLogger("bagit").setLevel(deriva_logging_level)
183
190
  logging.getLogger("bdbag").setLevel(deriva_logging_level)
184
191
 
@@ -1081,7 +1088,12 @@ class DerivaML(Dataset):
1081
1088
  return self._download_dataset_bag(
1082
1089
  dataset=dataset,
1083
1090
  execution_rid=execution_rid,
1084
- snapshot_catalog=DerivaML(self.host_name, self._version_snapshot(dataset)),
1091
+ snapshot_catalog=DerivaML(
1092
+ self.host_name,
1093
+ self._version_snapshot(dataset),
1094
+ logging_level=self._logging_level,
1095
+ deriva_logging_level=self._deriva_logging_level,
1096
+ ),
1085
1097
  )
1086
1098
 
1087
1099
  def _update_status(self, new_status: Status, status_detail: str, execution_rid: RID):
@@ -0,0 +1,12 @@
1
+ from .aux_classes import DatasetSpec, DatasetSpecConfig, DatasetVersion, VersionPart
2
+ from .dataset import Dataset
3
+ from .dataset_bag import DatasetBag
4
+
5
+ __all__ = [
6
+ "Dataset",
7
+ "DatasetSpec",
8
+ "DatasetSpecConfig",
9
+ "DatasetBag",
10
+ "DatasetVersion",
11
+ "VersionPart",
12
+ ]
@@ -212,18 +212,10 @@ class DatasetSpec(BaseModel):
212
212
  return version.to_dict()
213
213
 
214
214
 
215
+ # Interface for hydra-zen
215
216
  @hydrated_dataclass(DatasetSpec)
216
- class DatasetConfig:
217
+ class DatasetSpecConfig:
217
218
  rid: str
218
219
  version: str
219
220
  materialize: bool = True
220
221
  description: str = ""
221
-
222
- class DatasetList(BaseModel):
223
- datasets: list[DatasetSpec]
224
- description: str = ""
225
-
226
- @hydrated_dataclass(DatasetList)
227
- class DatasetConfigList:
228
- datasets: list[DatasetConfig]
229
- description: str = ""
@@ -31,6 +31,7 @@ from graphlib import TopologicalSorter
31
31
  from pathlib import Path
32
32
  from tempfile import TemporaryDirectory
33
33
  from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator
34
+ from urllib.parse import urlparse
34
35
 
35
36
  import deriva.core.utils.hash_utils as hash_utils
36
37
  import requests
@@ -1040,7 +1041,6 @@ class Dataset:
1040
1041
  envars={"RID": dataset.rid},
1041
1042
  )
1042
1043
  minid_page_url = exporter.export()[0] # Get the MINID launch page
1043
-
1044
1044
  except (
1045
1045
  DerivaDownloadError,
1046
1046
  DerivaDownloadConfigurationError,
@@ -1096,7 +1096,8 @@ class Dataset:
1096
1096
 
1097
1097
  # Check or create MINID
1098
1098
  minid_url = version_record.minid
1099
- if not minid_url:
1099
+ # If we either don't have a MINID, or we have a MINID, but we don't want to use it, generate a new one.
1100
+ if (not minid_url) or (not self._use_minid):
1100
1101
  if not create:
1101
1102
  raise DerivaMLException(f"Minid for dataset {rid} doesn't exist")
1102
1103
  if self._use_minid:
@@ -1106,7 +1107,6 @@ class Dataset:
1106
1107
  # Return based on MINID usage
1107
1108
  if self._use_minid:
1108
1109
  return self._fetch_minid_metadata(minid_url, dataset.version)
1109
-
1110
1110
  return DatasetMinid(
1111
1111
  dataset_version=dataset.version,
1112
1112
  RID=f"{rid}@{version_record.snapshot}",
@@ -1139,7 +1139,8 @@ class Dataset:
1139
1139
  with TemporaryDirectory() as tmp_dir:
1140
1140
  if self._use_minid:
1141
1141
  # Get bag from S3
1142
- archive_path = fetch_single_file(minid.bag_url, output_path=tmp_dir)
1142
+ bag_path = Path(tmp_dir) / Path(urlparse(minid.bag_url).path).name
1143
+ archive_path = fetch_single_file(minid.bag_url, output_path=bag_path)
1143
1144
  else:
1144
1145
  exporter = DerivaExport(host=self._model.catalog.deriva_server.server, output_dir=tmp_dir)
1145
1146
  archive_path = exporter.retrieve_file(minid.bag_url)