deriva-ml 1.14.32__tar.gz → 1.14.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/PKG-INFO +4 -5
- deriva_ml-1.14.34/README.md +11 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/pyproject.toml +4 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/core/base.py +1 -1
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/feature.py +1 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/run_notebook.py +4 -4
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/PKG-INFO +4 -5
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/conftest.py +11 -1
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/execution/test_execution.py +17 -6
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/execution/workflow-test.ipynb +12 -17
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/execution/workflow-test.py +0 -1
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/feature/test_features.py +32 -16
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/test_utils.py +69 -1
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/uv.lock +608 -580
- deriva_ml-1.14.32/README.md +0 -12
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/.github/release-drafter.yml +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/.github/workflows/publish-docs.yml +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/.github/workflows/release.yml +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/.gitignore +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/LICENSE +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/.DS_Store +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Features.ipynb +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/assets/ERD.png +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/assets/Launcher.png +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/assets/copy_minid.png +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/assets/deriva-logo.png +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/assets/deriva-ml.pdf +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/assets/sharing-at-home.pdf +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/dataset.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/dataset_aux_classes.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/dataset_bag.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/deriva_definitions.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/deriva_ml_base.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/deriva_model.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/execution.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/execution_configuration.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/feature.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/code-docs/upload.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/index.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/release-notes.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/user-guide/datasets.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/user-guide/deriva_ml_structure.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/user-guide/execution-configuration.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/user-guide/file-assets.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/user-guide/identifiers.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/user-guide/install.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/user-guide/notebooks.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/docs/user-guide/overview.md +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/mkdocs.yml +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/release.sh +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/setup.cfg +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/core/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/core/constants.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/core/definitions.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/core/enums.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/core/ermrest.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/core/exceptions.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/core/filespec.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/dataset/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/dataset/aux_classes.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/dataset/dataset.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/dataset/dataset_bag.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/dataset/history.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/dataset/upload.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/demo_catalog.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/execution/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/execution/environment.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/execution/execution.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/execution/execution_configuration.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/execution/workflow.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/model/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/model/catalog.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/model/database.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/model/sql_mapper.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/schema/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/schema/annotations.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/schema/check_schema.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/schema/create_schema.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/schema/policy.json +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/schema/table_comments_utils.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/SOURCES.txt +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/entry_points.txt +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/requires.txt +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/top_level.txt +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/core/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/core/test_basic_tables.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/core/test_file.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/core/test_vocabulary.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/dataset/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/dataset/demo-catalog-schema.json +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/dataset/deriva-ml-reference.json +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/dataset/eye-ai-catalog-schema.json +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/dataset/test_dataset_export.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/dataset/test_dataset_version.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/dataset/test_datasets.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/dataset/test_download.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/execution/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/model/__init__.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/model/test_database.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/model/test_models.py +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/test-files/execution-parameters.json +0 -0
- {deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/test-files/notebook-parameters.json +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.14.
|
|
3
|
+
Version: 1.14.34
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
|
|
|
25
25
|
using a deriva catalog.
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
GitHUB CLI be installed.
|
|
30
|
+
To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
|
|
32
31
|
|
|
33
|
-
See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
|
|
34
32
|
|
|
33
|
+
## References
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# DerivaML
|
|
2
|
+
Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
|
|
3
|
+
using a deriva catalog.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
|
|
7
|
+
|
|
8
|
+
To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
## References
|
|
@@ -64,6 +64,9 @@ setup_hooks = []
|
|
|
64
64
|
pre_commit_hooks = []
|
|
65
65
|
post_commit_hooks = []
|
|
66
66
|
|
|
67
|
+
[tool.pytest]
|
|
68
|
+
mock_use_standalone_module = true
|
|
69
|
+
|
|
67
70
|
[tool.pytest.ini_options]
|
|
68
71
|
testpaths = ["tests"]
|
|
69
72
|
python_files = ["test_*.py"]
|
|
@@ -97,6 +100,7 @@ dev = [
|
|
|
97
100
|
"mkdocstrings[python]",
|
|
98
101
|
"mkdocs-material",
|
|
99
102
|
"pytest>=8.4.1",
|
|
103
|
+
"pytest-mock",
|
|
100
104
|
"pytest-coverage>=0.0",
|
|
101
105
|
"ruff"
|
|
102
106
|
]
|
|
@@ -117,7 +117,7 @@ class DerivaML(Dataset):
|
|
|
117
117
|
cache_dir: str | Path | None = None,
|
|
118
118
|
working_dir: str | Path | None = None,
|
|
119
119
|
ml_schema: str = ML_SCHEMA,
|
|
120
|
-
logging_level=logging.
|
|
120
|
+
logging_level=logging.WARNING,
|
|
121
121
|
credential=None,
|
|
122
122
|
use_minid: bool = True,
|
|
123
123
|
):
|
|
@@ -95,7 +95,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
95
95
|
exit(1)
|
|
96
96
|
|
|
97
97
|
os.environ["DERIVA_HOST"] = args.host
|
|
98
|
-
os.environ["
|
|
98
|
+
os.environ["DERIVA_CATALOG"] = args.catalog
|
|
99
99
|
|
|
100
100
|
# Create a workflow instance for this specific version of the script.
|
|
101
101
|
# Return an existing workflow if one is found.
|
|
@@ -106,14 +106,14 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
106
106
|
return
|
|
107
107
|
else:
|
|
108
108
|
notebook_parameters = (
|
|
109
|
-
{
|
|
110
|
-
| {
|
|
109
|
+
{k: v["default"] for k, v in notebook_parameters.items()}
|
|
110
|
+
| {"host": args.host, "hostname": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
|
|
111
111
|
| parameters
|
|
112
112
|
)
|
|
113
113
|
print(f"Running notebook {notebook_file.name} with parameters:")
|
|
114
114
|
for param, value in notebook_parameters.items():
|
|
115
115
|
print(f" {param}:{value}")
|
|
116
|
-
self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
|
|
116
|
+
self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel[0], log=args.log_output)
|
|
117
117
|
|
|
118
118
|
def run_notebook(self, notebook_file, parameters, kernel=None, log=False):
|
|
119
119
|
url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.14.
|
|
3
|
+
Version: 1.14.34
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
|
|
|
25
25
|
using a deriva catalog.
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
GitHUB CLI be installed.
|
|
30
|
+
To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
|
|
32
31
|
|
|
33
|
-
See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
|
|
34
32
|
|
|
33
|
+
## References
|
|
@@ -5,7 +5,7 @@ Pytest configuration and shared fixtures.
|
|
|
5
5
|
import os
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
8
|
-
from test_utils import MLCatalog, MLDatasetCatalog
|
|
8
|
+
from test_utils import MLCatalog, MLDatasetCatalog, create_jupyter_kernel, destroy_jupyter_kernel
|
|
9
9
|
|
|
10
10
|
from deriva_ml import DerivaML
|
|
11
11
|
from deriva_ml.demo_catalog import (
|
|
@@ -61,6 +61,16 @@ def dataset_test(catalog_with_datasets):
|
|
|
61
61
|
return catalog_with_datasets
|
|
62
62
|
|
|
63
63
|
|
|
64
|
+
@pytest.fixture(scope="function")
|
|
65
|
+
def notebook_test(deriva_catalog, tmp_path):
|
|
66
|
+
deriva_catalog.reset_demo_catalog()
|
|
67
|
+
create_jupyter_kernel("test_kernel", tmp_path)
|
|
68
|
+
yield DerivaML(deriva_catalog.hostname, deriva_catalog.catalog_id, use_minid=False, working_dir=tmp_path)
|
|
69
|
+
print("Resetting catalog... ", end="")
|
|
70
|
+
deriva_catalog.reset_demo_catalog()
|
|
71
|
+
destroy_jupyter_kernel("test_kernel")
|
|
72
|
+
|
|
73
|
+
|
|
64
74
|
@pytest.fixture(scope="function")
|
|
65
75
|
def test_ml_demo_catalog(ml_catalog, tmp_path):
|
|
66
76
|
# reset_demo_catalog(ml_catalog.catalog)
|
|
@@ -3,6 +3,7 @@ Tests for the execution module.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from tempfile import TemporaryDirectory
|
|
7
8
|
|
|
8
9
|
from deriva_ml import (
|
|
@@ -23,20 +24,23 @@ class TestWorkflow:
|
|
|
23
24
|
ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
|
|
24
25
|
ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
|
|
25
26
|
print("Running workflow-test.py ...")
|
|
27
|
+
workflow_script = Path(__file__).parent / "workflow-test.py"
|
|
28
|
+
|
|
26
29
|
workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
|
|
27
30
|
workflows = list(workflow_table.entities().fetch())
|
|
28
31
|
assert 0 == len(workflows)
|
|
29
32
|
result = subprocess.run(
|
|
30
33
|
[
|
|
31
34
|
"python",
|
|
32
|
-
|
|
35
|
+
workflow_script.as_posix(),
|
|
33
36
|
ml_instance.catalog.deriva_server.server,
|
|
34
37
|
ml_instance.catalog_id,
|
|
35
38
|
],
|
|
36
39
|
capture_output=True,
|
|
37
40
|
text=True,
|
|
38
41
|
)
|
|
39
|
-
|
|
42
|
+
print(result.stdout)
|
|
43
|
+
print(result.stderr)
|
|
40
44
|
workflows = list(workflow_table.entities().fetch())
|
|
41
45
|
assert 1 == len(workflows)
|
|
42
46
|
workflow_rid = workflows[0]["RID"]
|
|
@@ -50,18 +54,22 @@ class TestWorkflow:
|
|
|
50
54
|
result = subprocess.run(
|
|
51
55
|
[
|
|
52
56
|
"python",
|
|
53
|
-
|
|
57
|
+
workflow_script.as_posix(),
|
|
54
58
|
ml_instance.catalog.deriva_server.server,
|
|
55
59
|
ml_instance.catalog_id,
|
|
56
60
|
],
|
|
57
61
|
capture_output=True,
|
|
58
62
|
text=True,
|
|
59
63
|
)
|
|
64
|
+
print(result.stdout)
|
|
65
|
+
print(result.stderr)
|
|
60
66
|
new_workflow = result.stdout.strip()
|
|
61
67
|
assert new_workflow == workflow_rid
|
|
62
68
|
|
|
63
|
-
def test_workflow_creation_notebook(self,
|
|
64
|
-
ml_instance =
|
|
69
|
+
def test_workflow_creation_notebook(self, notebook_test):
|
|
70
|
+
ml_instance = notebook_test
|
|
71
|
+
|
|
72
|
+
notebook_path = Path(__file__).parent / "workflow-test.ipynb" # directory where this test lives
|
|
65
73
|
ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
|
|
66
74
|
ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
|
|
67
75
|
workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
|
|
@@ -69,14 +77,17 @@ class TestWorkflow:
|
|
|
69
77
|
assert 0 == len(workflows)
|
|
70
78
|
|
|
71
79
|
print("Running notebook...")
|
|
80
|
+
|
|
72
81
|
result = subprocess.run(
|
|
73
82
|
[
|
|
74
83
|
"deriva-ml-run-notebook",
|
|
75
|
-
|
|
84
|
+
notebook_path.as_posix(),
|
|
76
85
|
"--host",
|
|
77
86
|
ml_instance.catalog.deriva_server.server,
|
|
78
87
|
"--catalog",
|
|
79
88
|
ml_instance.catalog_id,
|
|
89
|
+
"--kernel",
|
|
90
|
+
"test_kernel",
|
|
80
91
|
"--log-output",
|
|
81
92
|
],
|
|
82
93
|
capture_output=True,
|
|
@@ -18,9 +18,7 @@
|
|
|
18
18
|
"outputs": [],
|
|
19
19
|
"source": [
|
|
20
20
|
"from deriva_ml import DerivaML, MLVocab as vc\n",
|
|
21
|
-
"import os
|
|
22
|
-
"import logging\n",
|
|
23
|
-
"logger = logging.getLogger()"
|
|
21
|
+
"import os"
|
|
24
22
|
]
|
|
25
23
|
},
|
|
26
24
|
{
|
|
@@ -42,8 +40,8 @@
|
|
|
42
40
|
},
|
|
43
41
|
"outputs": [],
|
|
44
42
|
"source": [
|
|
45
|
-
"
|
|
46
|
-
"
|
|
43
|
+
"host = None\n",
|
|
44
|
+
"catalog = None"
|
|
47
45
|
]
|
|
48
46
|
},
|
|
49
47
|
{
|
|
@@ -54,24 +52,21 @@
|
|
|
54
52
|
"outputs": [],
|
|
55
53
|
"source": [
|
|
56
54
|
"# Modify these to your desired server and catalog.\n",
|
|
57
|
-
"
|
|
58
|
-
"
|
|
55
|
+
"host = host or os.environ.get(\"DERIVA_HOST\")\n",
|
|
56
|
+
"catalog = catalog or os.environ.get(\"DERIVA_CATALOG\")\n",
|
|
59
57
|
"\n",
|
|
60
58
|
"# Change this line to call the domain specific class derived from DerivaML\n",
|
|
61
|
-
"ml_instance = DerivaML(
|
|
62
|
-
"logger.info(\"Got ML instance:\")\n",
|
|
59
|
+
"ml_instance = DerivaML(host, catalog)\n",
|
|
63
60
|
"\n",
|
|
64
61
|
"ml_instance.add_term(vc.asset_type, \"Test Model\", description=\"Model for our Test workflow\")\n",
|
|
65
62
|
"ml_instance.add_term(vc.workflow_type, \"Test Workflow\", description=\"A ML Workflow that uses Deriva ML API\")\n",
|
|
66
|
-
"
|
|
63
|
+
"\n",
|
|
67
64
|
"api_workflow = ml_instance.create_workflow(\n",
|
|
68
|
-
"
|
|
69
|
-
"
|
|
70
|
-
"
|
|
71
|
-
")\n",
|
|
72
|
-
"
|
|
73
|
-
"rid = ml_instance.add_workflow(api_workflow)\n",
|
|
74
|
-
"logger.info(f\"RID {rid}\")\n"
|
|
65
|
+
" name=\"Test Workflow One\",\n",
|
|
66
|
+
" workflow_type=\"Test Workflow\",\n",
|
|
67
|
+
" description=\"A test operation\",\n",
|
|
68
|
+
" )\n",
|
|
69
|
+
"rid = ml_instance.add_workflow(api_workflow)"
|
|
75
70
|
]
|
|
76
71
|
}
|
|
77
72
|
],
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
Tests for feature functionality.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from unittest.mock import Mock
|
|
6
|
-
|
|
7
5
|
import pytest
|
|
6
|
+
from pydantic import ValidationError
|
|
8
7
|
|
|
9
8
|
from deriva_ml import (
|
|
10
9
|
BuiltinTypes,
|
|
@@ -21,14 +20,14 @@ from deriva_ml.feature import FeatureRecord
|
|
|
21
20
|
class TestFeatureRecord:
|
|
22
21
|
"""Test cases for the FeatureRecord base class."""
|
|
23
22
|
|
|
24
|
-
def test_feature_record_creation(self):
|
|
23
|
+
def test_feature_record_creation(self, mocker):
|
|
25
24
|
"""Test basic FeatureRecord creation."""
|
|
26
25
|
# Create a mock feature
|
|
27
|
-
mock_feature = Mock()
|
|
28
|
-
mock_feature.feature_columns = {Mock(name="value"), Mock(name="confidence")}
|
|
29
|
-
mock_feature.asset_columns = {Mock(name="image_file")}
|
|
30
|
-
mock_feature.term_columns = {Mock(name="category")}
|
|
31
|
-
mock_feature.value_columns = {Mock(name="score")}
|
|
26
|
+
mock_feature = mocker.Mock()
|
|
27
|
+
mock_feature.feature_columns = {mocker.Mock(name="value"), mocker.Mock(name="confidence")}
|
|
28
|
+
mock_feature.asset_columns = {mocker.Mock(name="image_file")}
|
|
29
|
+
mock_feature.term_columns = {mocker.Mock(name="category")}
|
|
30
|
+
mock_feature.value_columns = {mocker.Mock(name="score")}
|
|
32
31
|
|
|
33
32
|
# Create a test class that inherits from FeatureRecord
|
|
34
33
|
class TestFeature(FeatureRecord):
|
|
@@ -58,17 +57,17 @@ class TestFeatureRecord:
|
|
|
58
57
|
assert record.category == "good"
|
|
59
58
|
assert record.score == 0.8
|
|
60
59
|
|
|
61
|
-
def test_feature_record_column_methods(self):
|
|
60
|
+
def test_feature_record_column_methods(self, mocker):
|
|
62
61
|
"""Test the column access methods of FeatureRecord."""
|
|
63
62
|
# Create mock columns
|
|
64
|
-
value_col = Mock(name="value")
|
|
65
|
-
confidence_col = Mock(name="confidence")
|
|
66
|
-
asset_col = Mock(name="image_file")
|
|
67
|
-
term_col = Mock(name="category")
|
|
68
|
-
value_only_col = Mock(name="score")
|
|
63
|
+
value_col = mocker.Mock(name="value")
|
|
64
|
+
confidence_col = mocker.Mock(name="confidence")
|
|
65
|
+
asset_col = mocker.Mock(name="image_file")
|
|
66
|
+
term_col = mocker.Mock(name="category")
|
|
67
|
+
value_only_col = mocker.Mock(name="score")
|
|
69
68
|
|
|
70
69
|
# Create a mock feature
|
|
71
|
-
mock_feature = Mock()
|
|
70
|
+
mock_feature = mocker.Mock()
|
|
72
71
|
mock_feature.feature_columns = {value_col, confidence_col, asset_col, term_col, value_only_col}
|
|
73
72
|
mock_feature.asset_columns = {asset_col}
|
|
74
73
|
mock_feature.term_columns = {term_col}
|
|
@@ -141,6 +140,22 @@ class TestFeatures:
|
|
|
141
140
|
with pytest.raises(DerivaMLException):
|
|
142
141
|
ml_instance.lookup_feature("Subject", "SubjectHealth1")
|
|
143
142
|
|
|
143
|
+
def test_feature_record(self, dataset_test, tmp_path):
|
|
144
|
+
ml_instance = DerivaML(
|
|
145
|
+
dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
|
|
146
|
+
)
|
|
147
|
+
SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
|
|
148
|
+
print(SubjectHealthFeature.model_fields.keys())
|
|
149
|
+
|
|
150
|
+
print(SubjectHealthFeature.feature_columns())
|
|
151
|
+
|
|
152
|
+
with pytest.raises(ValidationError):
|
|
153
|
+
SubjectHealthFeature(Subject="SubjectRID", Health="Good", Scale=23, Foo="Bar")
|
|
154
|
+
print(SubjectHealthFeature.value_columns())
|
|
155
|
+
print(SubjectHealthFeature.term_columns())
|
|
156
|
+
print(SubjectHealthFeature.asset_columns())
|
|
157
|
+
print(SubjectHealthFeature.feature_columns())
|
|
158
|
+
|
|
144
159
|
def test_add_feature(self, dataset_test, tmp_path):
|
|
145
160
|
ml_instance = DerivaML(
|
|
146
161
|
dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
|
|
@@ -166,7 +181,8 @@ class TestFeatures:
|
|
|
166
181
|
|
|
167
182
|
with feature_execution.execute() as exe:
|
|
168
183
|
SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
|
|
169
|
-
|
|
184
|
+
print(SubjectHealthFeature.feature_columns())
|
|
185
|
+
exe.add_features([SubjectHealthFeature(Subject=subject_rids[0], SubjectHealth="Sick", Scale=23)])
|
|
170
186
|
|
|
171
187
|
feature_execution.upload_execution_outputs()
|
|
172
188
|
features = list(ml_instance.list_feature_values("Subject", "Health"))
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
1
3
|
from tempfile import TemporaryDirectory
|
|
2
4
|
from urllib.parse import quote as urlquote
|
|
3
5
|
|
|
4
6
|
from demo_catalog import create_demo_features
|
|
5
7
|
from deriva.core.datapath import DataPathException
|
|
8
|
+
from ipykernel.kernelspec import install
|
|
9
|
+
from jupyter_client.kernelspec import KernelSpecManager
|
|
6
10
|
|
|
7
11
|
from deriva_ml import DerivaML
|
|
8
12
|
from deriva_ml.demo_catalog import (
|
|
@@ -42,7 +46,14 @@ class MLCatalog:
|
|
|
42
46
|
pb = self.catalog.getPathBuilder()
|
|
43
47
|
ml_path = pb.schemas["deriva-ml"]
|
|
44
48
|
domain_path = pb.schemas[self.domain_schema]
|
|
45
|
-
for t in [
|
|
49
|
+
for t in [
|
|
50
|
+
"Dataset_Execution",
|
|
51
|
+
"Dataset_Version",
|
|
52
|
+
"Dataset_Dataset",
|
|
53
|
+
"Execution",
|
|
54
|
+
"Workflow_Execution",
|
|
55
|
+
"Workflow",
|
|
56
|
+
]:
|
|
46
57
|
try:
|
|
47
58
|
ml_path.tables[t].path.delete()
|
|
48
59
|
except DataPathException:
|
|
@@ -110,3 +121,60 @@ class MLDatasetCatalog:
|
|
|
110
121
|
with TemporaryDirectory() as tmp_dir:
|
|
111
122
|
ml_instance = DerivaML(self.catalog.hostname, self.catalog.catalog_id, working_dir=tmp_dir, use_minid=False)
|
|
112
123
|
self.dataset_description: DatasetDescription = create_demo_datasets(ml_instance)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def create_jupyter_kernel(name: str, kernel_dir, display_name: str = None, user: bool = True) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Create and install a Jupyter kernel spec using ipykernel.
|
|
129
|
+
|
|
130
|
+
Parameters
|
|
131
|
+
----------
|
|
132
|
+
name : str
|
|
133
|
+
The internal name of the kernel (used in `--kernel`).
|
|
134
|
+
display_name : str, optional
|
|
135
|
+
The label shown in Jupyter’s kernel chooser (defaults to name).
|
|
136
|
+
user : bool, default=True
|
|
137
|
+
If True, install for the current user only.
|
|
138
|
+
If False, requires admin rights (system-wide).
|
|
139
|
+
"""
|
|
140
|
+
if display_name is None:
|
|
141
|
+
display_name = name
|
|
142
|
+
|
|
143
|
+
os.environ["JUPYTER_PATH"] = f"{kernel_dir}/share/jupyter"
|
|
144
|
+
|
|
145
|
+
print(f"Installing Jupyter kernel '{name}' with display name '{display_name}'")
|
|
146
|
+
install(
|
|
147
|
+
kernel_name=name,
|
|
148
|
+
display_name=display_name,
|
|
149
|
+
prefix=kernel_dir, # ensures it uses the current environment
|
|
150
|
+
)
|
|
151
|
+
print("✅ Kernel installed successfully.")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def destroy_jupyter_kernel(name: str, user: bool = True) -> None:
|
|
155
|
+
"""
|
|
156
|
+
Remove a Jupyter kernel spec by name.
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
name : str
|
|
161
|
+
The internal kernel name (the same name used in create_jupyter_kernel).
|
|
162
|
+
user : bool, default=True
|
|
163
|
+
If True, remove from the user-level kernels directory.
|
|
164
|
+
If False, attempt system-wide removal (requires permissions).
|
|
165
|
+
"""
|
|
166
|
+
ksm = KernelSpecManager()
|
|
167
|
+
kernels = ksm.find_kernel_specs()
|
|
168
|
+
|
|
169
|
+
if name not in kernels:
|
|
170
|
+
print(f"❌ Kernel '{name}' not found.")
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
kernel_path = kernels[name]
|
|
174
|
+
print(f"Removing kernel '{name}' at {kernel_path}")
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
shutil.rmtree(kernel_path)
|
|
178
|
+
print(f"✅ Kernel '{name}' removed successfully.")
|
|
179
|
+
except Exception as e:
|
|
180
|
+
print(f"⚠️ Failed to remove kernel '{name}': {e}")
|