deriva-ml 1.16.0__tar.gz → 1.17.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/PKG-INFO +9 -7
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/pyproject.toml +12 -11
- deriva_ml-1.17.1/src/.DS_Store +0 -0
- deriva_ml-1.17.1/src/deriva_ml/.DS_Store +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/__init__.py +0 -10
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/base.py +18 -6
- deriva_ml-1.17.1/src/deriva_ml/dataset/__init__.py +12 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/aux_classes.py +2 -10
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/dataset.py +5 -4
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/dataset_bag.py +144 -151
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/upload.py +6 -4
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/demo_catalog.py +16 -2
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/__init__.py +2 -1
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/execution.py +5 -3
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/execution_configuration.py +28 -9
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/workflow.py +8 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/model/catalog.py +55 -50
- deriva_ml-1.17.1/src/deriva_ml/model/database.py +719 -0
- deriva_ml-1.17.1/src/deriva_ml/test.py +94 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/PKG-INFO +9 -7
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/SOURCES.txt +3 -1
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/requires.txt +7 -5
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/core/test_file.py +2 -1
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/uv.lock +596 -847
- deriva_ml-1.16.0/src/deriva_ml/dataset/__init__.py +0 -17
- deriva_ml-1.16.0/src/deriva_ml/model/database.py +0 -345
- deriva_ml-1.16.0/src/deriva_ml/model/sql_mapper.py +0 -44
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/.github/release-drafter.yml +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/.github/workflows/publish-docs.yml +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/.github/workflows/release.yml +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/.gitignore +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/LICENSE +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/README.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/.DS_Store +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Features.ipynb +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/ERD.png +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/Launcher.png +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/copy_minid.png +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/deriva-logo.png +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/deriva-ml.pdf +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/assets/sharing-at-home.pdf +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/dataset.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/dataset_aux_classes.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/dataset_bag.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/deriva_definitions.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/deriva_ml_base.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/deriva_model.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/execution.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/execution_configuration.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/feature.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/code-docs/upload.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/index.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/release-notes.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/datasets.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/deriva_ml_structure.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/execution-configuration.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/file-assets.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/identifiers.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/install.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/notebooks.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/docs/user-guide/overview.md +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/mkdocs.yml +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/setup.cfg +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/bump_version.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/__init__.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/config.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/constants.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/definitions.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/enums.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/ermrest.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/exceptions.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/core/filespec.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/dataset/history.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/execution/environment.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/feature.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/install_kernel.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/model/__init__.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/protocols/dataset.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/run_notebook.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/__init__.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/annotations.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/check_schema.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/create_schema.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/policy.json +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml/schema/table_comments_utils.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/entry_points.txt +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/src/deriva_ml.egg-info/top_level.txt +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/__init__.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/conftest.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/core/__init__.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/core/test_basic_tables.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/core/test_vocabulary.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/__init__.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/demo-catalog-schema.json +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/deriva-ml-reference.json +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/eye-ai-catalog-schema.json +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/test_dataset_version.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/test_datasets.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/dataset/test_download.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/execution/__init__.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/execution/test_execution.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/execution/workflow-test.ipynb +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/execution/workflow-test.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/feature/test_features.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/model/__init__.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/model/test_database.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/model/test_models.py +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/test-files/execution-parameters.json +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/test-files/notebook-parameters.json +0 -0
- {deriva_ml-1.16.0 → deriva_ml-1.17.1}/tests/test_utils.py +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.17.1
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
|
-
Requires-Python: >=3.
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
Requires-Dist: bump-my-version
|
|
@@ -12,16 +12,18 @@ Requires-Dist: deriva~=1.7.10
|
|
|
12
12
|
Requires-Dist: deepdiff
|
|
13
13
|
Requires-Dist: nbconvert
|
|
14
14
|
Requires-Dist: pandas
|
|
15
|
-
Requires-Dist:
|
|
15
|
+
Requires-Dist: pip-system-certs
|
|
16
16
|
Requires-Dist: pydantic>=2.11
|
|
17
|
-
Requires-Dist: semver>3.0.0
|
|
18
|
-
Requires-Dist: setuptools>=64
|
|
19
|
-
Requires-Dist: setuptools-scm>=8.0
|
|
20
|
-
Requires-Dist: nbstripout
|
|
21
17
|
Requires-Dist: papermill
|
|
22
18
|
Requires-Dist: pandas-stubs==2.2.3.250527
|
|
23
19
|
Requires-Dist: pyyaml
|
|
20
|
+
Requires-Dist: regex~=2024.7.24
|
|
21
|
+
Requires-Dist: semver>3.0.0
|
|
22
|
+
Requires-Dist: setuptools>=80
|
|
23
|
+
Requires-Dist: setuptools-scm>=8.0
|
|
24
|
+
Requires-Dist: nbstripout
|
|
24
25
|
Requires-Dist: hydra_zen
|
|
26
|
+
Requires-Dist: SQLAlchemy
|
|
25
27
|
Dynamic: license-file
|
|
26
28
|
|
|
27
29
|
# DerivaML
|
|
@@ -6,7 +6,7 @@ authors = [
|
|
|
6
6
|
]
|
|
7
7
|
description = "Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines"
|
|
8
8
|
readme = "README.md"
|
|
9
|
-
requires-python = ">=3.
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
10
|
dependencies = [
|
|
11
11
|
"bump-my-version",
|
|
12
12
|
"bdbag",
|
|
@@ -14,16 +14,18 @@ dependencies = [
|
|
|
14
14
|
"deepdiff",
|
|
15
15
|
"nbconvert",
|
|
16
16
|
"pandas",
|
|
17
|
-
"
|
|
17
|
+
"pip-system-certs",
|
|
18
18
|
"pydantic>=2.11",
|
|
19
|
-
"semver>3.0.0",
|
|
20
|
-
"setuptools>=64",
|
|
21
|
-
"setuptools-scm>=8.0",
|
|
22
|
-
"nbstripout",
|
|
23
19
|
"papermill",
|
|
24
20
|
"pandas-stubs==2.2.3.250527",
|
|
25
21
|
"pyyaml",
|
|
22
|
+
"regex~=2024.7.24",
|
|
23
|
+
"semver>3.0.0",
|
|
24
|
+
"setuptools>=80",
|
|
25
|
+
"setuptools-scm>=8.0",
|
|
26
|
+
"nbstripout",
|
|
26
27
|
"hydra_zen",
|
|
28
|
+
"SQLAlchemy"
|
|
27
29
|
]
|
|
28
30
|
|
|
29
31
|
[project.scripts]
|
|
@@ -39,10 +41,11 @@ deriva-ml-check-catalog-schema = "deriva_ml.schema.check_schema:main"
|
|
|
39
41
|
[project.optional-dependencies]
|
|
40
42
|
|
|
41
43
|
[build-system]
|
|
42
|
-
requires = ["setuptools>=
|
|
44
|
+
requires = ["setuptools>=80", "setuptools_scm[toml]>=8", "wheel"]
|
|
43
45
|
build-backend = "setuptools.build_meta"
|
|
44
46
|
|
|
45
47
|
[tool.uv]
|
|
48
|
+
python-preference = "only-managed"
|
|
46
49
|
|
|
47
50
|
[tool.uv.sources]
|
|
48
51
|
#bdbag = {git = "https://github.com/fair-research/bdbag", branch = "master" }
|
|
@@ -77,13 +80,10 @@ pre_commit_hooks = []
|
|
|
77
80
|
post_commit_hooks = []
|
|
78
81
|
|
|
79
82
|
[tool.pytest]
|
|
80
|
-
mock_use_standalone_module = true
|
|
81
|
-
|
|
82
|
-
[tool.pytest.ini_options]
|
|
83
83
|
testpaths = ["tests"]
|
|
84
84
|
python_files = ["test_*.py"]
|
|
85
85
|
#addopts = "-v --cov=deriva_ml --cov-report=term-missing --import-mode=importlib"
|
|
86
|
-
addopts = "-v
|
|
86
|
+
addopts = ["-v", "--import-mode=importlib"]
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
[tool.ruff]
|
|
@@ -114,6 +114,7 @@ dev = [
|
|
|
114
114
|
"pytest>=8.4.1",
|
|
115
115
|
"pytest-mock",
|
|
116
116
|
"pytest-coverage>=0.0",
|
|
117
|
+
"pip-system-certs",
|
|
117
118
|
"ruff"
|
|
118
119
|
]
|
|
119
120
|
lint = [
|
|
Binary file
|
|
Binary file
|
|
@@ -25,9 +25,6 @@ from deriva_ml.core.exceptions import (
|
|
|
25
25
|
DerivaMLInvalidTerm,
|
|
26
26
|
DerivaMLTableTypeError,
|
|
27
27
|
)
|
|
28
|
-
from deriva_ml.dataset.aux_classes import DatasetConfig, DatasetConfigList, DatasetSpec, DatasetVersion
|
|
29
|
-
|
|
30
|
-
from .execution import Execution, ExecutionConfiguration, Workflow
|
|
31
28
|
|
|
32
29
|
# Type-checking only - avoid circular import at runtime
|
|
33
30
|
if TYPE_CHECKING:
|
|
@@ -51,13 +48,6 @@ def __getattr__(name):
|
|
|
51
48
|
__all__ = [
|
|
52
49
|
"DerivaML", # Lazy-loaded
|
|
53
50
|
"DerivaMLConfig",
|
|
54
|
-
"DatasetConfig",
|
|
55
|
-
"DatasetConfigList",
|
|
56
|
-
"DatasetSpec",
|
|
57
|
-
"DatasetVersion",
|
|
58
|
-
"Execution",
|
|
59
|
-
"ExecutionConfiguration",
|
|
60
|
-
"Workflow",
|
|
61
51
|
# Exceptions
|
|
62
52
|
"DerivaMLException",
|
|
63
53
|
"DerivaMLInvalidTerm",
|
|
@@ -19,7 +19,7 @@ import logging
|
|
|
19
19
|
from datetime import datetime
|
|
20
20
|
from itertools import chain
|
|
21
21
|
from pathlib import Path
|
|
22
|
-
from typing import Dict, Iterable, List, cast, TYPE_CHECKING, Any
|
|
22
|
+
from typing import Dict, Iterable, List, cast, TYPE_CHECKING, Any, Self
|
|
23
23
|
from urllib.parse import urlsplit
|
|
24
24
|
|
|
25
25
|
|
|
@@ -28,13 +28,14 @@ import requests
|
|
|
28
28
|
from pydantic import ConfigDict, validate_call
|
|
29
29
|
|
|
30
30
|
# Deriva imports
|
|
31
|
-
from deriva.core import DEFAULT_SESSION_CONFIG, format_exception, get_credential, urlquote
|
|
31
|
+
from deriva.core import DEFAULT_SESSION_CONFIG, format_exception, get_credential, urlquote
|
|
32
32
|
|
|
33
33
|
import deriva.core.datapath as datapath
|
|
34
34
|
from deriva.core.datapath import DataPathException, _SchemaWrapper as SchemaWrapper
|
|
35
35
|
from deriva.core.deriva_server import DerivaServer
|
|
36
36
|
from deriva.core.ermrest_catalog import ResolveRidResult
|
|
37
37
|
from deriva.core.ermrest_model import Key, Table
|
|
38
|
+
from deriva.core.utils.core_utils import DEFAULT_LOGGER_OVERRIDES
|
|
38
39
|
from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
|
|
39
40
|
|
|
40
41
|
from deriva_ml.core.exceptions import DerivaMLInvalidTerm
|
|
@@ -103,6 +104,10 @@ class DerivaML(Dataset):
|
|
|
103
104
|
>>> ml.add_term('vocabulary_table', 'new_term', description='Description of term')
|
|
104
105
|
"""
|
|
105
106
|
|
|
107
|
+
@classmethod
|
|
108
|
+
def instantiate(cls, config: DerivaMLConfig) -> Self:
|
|
109
|
+
return cls(**config.model_dump())
|
|
110
|
+
|
|
106
111
|
def __init__(
|
|
107
112
|
self,
|
|
108
113
|
hostname: str,
|
|
@@ -149,7 +154,6 @@ class DerivaML(Dataset):
|
|
|
149
154
|
credentials=self.credential,
|
|
150
155
|
session_config=self._get_session_config(),
|
|
151
156
|
)
|
|
152
|
-
|
|
153
157
|
try:
|
|
154
158
|
if check_auth and server.get_authn_session():
|
|
155
159
|
pass
|
|
@@ -158,7 +162,6 @@ class DerivaML(Dataset):
|
|
|
158
162
|
"You are not authorized to access this catalog. "
|
|
159
163
|
"Please check your credentials and make sure you have logged in."
|
|
160
164
|
)
|
|
161
|
-
|
|
162
165
|
self.catalog = server.connect_ermrest(catalog_id)
|
|
163
166
|
self.model = DerivaModel(self.catalog.getCatalogModel(), domain_schema=domain_schema)
|
|
164
167
|
|
|
@@ -176,9 +179,13 @@ class DerivaML(Dataset):
|
|
|
176
179
|
# Set up logging
|
|
177
180
|
self._logger = logging.getLogger("deriva_ml")
|
|
178
181
|
self._logger.setLevel(logging_level)
|
|
182
|
+
self._logging_level = logging_level
|
|
183
|
+
self._deriva_logging_level = deriva_logging_level
|
|
179
184
|
|
|
180
185
|
# Configure deriva logging level
|
|
181
|
-
|
|
186
|
+
logger_config = DEFAULT_LOGGER_OVERRIDES
|
|
187
|
+
# allow for reconfiguration of module-specific logging levels
|
|
188
|
+
[logging.getLogger(name).setLevel(level) for name, level in logger_config.items()]
|
|
182
189
|
logging.getLogger("bagit").setLevel(deriva_logging_level)
|
|
183
190
|
logging.getLogger("bdbag").setLevel(deriva_logging_level)
|
|
184
191
|
|
|
@@ -1081,7 +1088,12 @@ class DerivaML(Dataset):
|
|
|
1081
1088
|
return self._download_dataset_bag(
|
|
1082
1089
|
dataset=dataset,
|
|
1083
1090
|
execution_rid=execution_rid,
|
|
1084
|
-
snapshot_catalog=DerivaML(
|
|
1091
|
+
snapshot_catalog=DerivaML(
|
|
1092
|
+
self.host_name,
|
|
1093
|
+
self._version_snapshot(dataset),
|
|
1094
|
+
logging_level=self._logging_level,
|
|
1095
|
+
deriva_logging_level=self._deriva_logging_level,
|
|
1096
|
+
),
|
|
1085
1097
|
)
|
|
1086
1098
|
|
|
1087
1099
|
def _update_status(self, new_status: Status, status_detail: str, execution_rid: RID):
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .aux_classes import DatasetSpec, DatasetSpecConfig, DatasetVersion, VersionPart
|
|
2
|
+
from .dataset import Dataset
|
|
3
|
+
from .dataset_bag import DatasetBag
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"Dataset",
|
|
7
|
+
"DatasetSpec",
|
|
8
|
+
"DatasetSpecConfig",
|
|
9
|
+
"DatasetBag",
|
|
10
|
+
"DatasetVersion",
|
|
11
|
+
"VersionPart",
|
|
12
|
+
]
|
|
@@ -212,18 +212,10 @@ class DatasetSpec(BaseModel):
|
|
|
212
212
|
return version.to_dict()
|
|
213
213
|
|
|
214
214
|
|
|
215
|
+
# Interface for hydra-zen
|
|
215
216
|
@hydrated_dataclass(DatasetSpec)
|
|
216
|
-
class
|
|
217
|
+
class DatasetSpecConfig:
|
|
217
218
|
rid: str
|
|
218
219
|
version: str
|
|
219
220
|
materialize: bool = True
|
|
220
221
|
description: str = ""
|
|
221
|
-
|
|
222
|
-
class DatasetList(BaseModel):
|
|
223
|
-
datasets: list[DatasetSpec]
|
|
224
|
-
description: str = ""
|
|
225
|
-
|
|
226
|
-
@hydrated_dataclass(DatasetList)
|
|
227
|
-
class DatasetConfigList:
|
|
228
|
-
datasets: list[DatasetConfig]
|
|
229
|
-
description: str = ""
|
|
@@ -31,6 +31,7 @@ from graphlib import TopologicalSorter
|
|
|
31
31
|
from pathlib import Path
|
|
32
32
|
from tempfile import TemporaryDirectory
|
|
33
33
|
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator
|
|
34
|
+
from urllib.parse import urlparse
|
|
34
35
|
|
|
35
36
|
import deriva.core.utils.hash_utils as hash_utils
|
|
36
37
|
import requests
|
|
@@ -1040,7 +1041,6 @@ class Dataset:
|
|
|
1040
1041
|
envars={"RID": dataset.rid},
|
|
1041
1042
|
)
|
|
1042
1043
|
minid_page_url = exporter.export()[0] # Get the MINID launch page
|
|
1043
|
-
|
|
1044
1044
|
except (
|
|
1045
1045
|
DerivaDownloadError,
|
|
1046
1046
|
DerivaDownloadConfigurationError,
|
|
@@ -1096,7 +1096,8 @@ class Dataset:
|
|
|
1096
1096
|
|
|
1097
1097
|
# Check or create MINID
|
|
1098
1098
|
minid_url = version_record.minid
|
|
1099
|
-
|
|
1099
|
+
# If we either don't have a MINID, or we have a MINID, but we don't want to use it, generate a new one.
|
|
1100
|
+
if (not minid_url) or (not self._use_minid):
|
|
1100
1101
|
if not create:
|
|
1101
1102
|
raise DerivaMLException(f"Minid for dataset {rid} doesn't exist")
|
|
1102
1103
|
if self._use_minid:
|
|
@@ -1106,7 +1107,6 @@ class Dataset:
|
|
|
1106
1107
|
# Return based on MINID usage
|
|
1107
1108
|
if self._use_minid:
|
|
1108
1109
|
return self._fetch_minid_metadata(minid_url, dataset.version)
|
|
1109
|
-
|
|
1110
1110
|
return DatasetMinid(
|
|
1111
1111
|
dataset_version=dataset.version,
|
|
1112
1112
|
RID=f"{rid}@{version_record.snapshot}",
|
|
@@ -1139,7 +1139,8 @@ class Dataset:
|
|
|
1139
1139
|
with TemporaryDirectory() as tmp_dir:
|
|
1140
1140
|
if self._use_minid:
|
|
1141
1141
|
# Get bag from S3
|
|
1142
|
-
|
|
1142
|
+
bag_path = Path(tmp_dir) / Path(urlparse(minid.bag_url).path).name
|
|
1143
|
+
archive_path = fetch_single_file(minid.bag_url, output_path=bag_path)
|
|
1143
1144
|
else:
|
|
1144
1145
|
exporter = DerivaExport(host=self._model.catalog.deriva_server.server, output_dir=tmp_dir)
|
|
1145
1146
|
archive_path = exporter.retrieve_file(minid.bag_url)
|