deriva-ml 1.14.33__tar.gz → 1.14.35__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/PKG-INFO +4 -5
- deriva_ml-1.14.35/README.md +11 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/pyproject.toml +8 -2
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/workflow.py +3 -5
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/feature.py +1 -0
- deriva_ml-1.14.35/src/deriva_ml/install_kernel.py +46 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/run_notebook.py +4 -4
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/PKG-INFO +4 -5
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/SOURCES.txt +1 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/entry_points.txt +1 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/conftest.py +11 -1
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/execution/test_execution.py +17 -6
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/execution/workflow-test.ipynb +12 -17
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/execution/workflow-test.py +0 -1
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/feature/test_features.py +32 -16
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/test_utils.py +69 -1
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/uv.lock +673 -645
- deriva_ml-1.14.33/README.md +0 -12
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/.github/release-drafter.yml +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/.github/workflows/publish-docs.yml +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/.github/workflows/release.yml +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/.gitignore +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/LICENSE +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/.DS_Store +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Features.ipynb +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/ERD.png +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/Launcher.png +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/copy_minid.png +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/deriva-logo.png +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/deriva-ml.pdf +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/sharing-at-home.pdf +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/dataset.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/dataset_aux_classes.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/dataset_bag.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/deriva_definitions.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/deriva_ml_base.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/deriva_model.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/execution.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/execution_configuration.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/feature.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/upload.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/index.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/release-notes.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/datasets.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/deriva_ml_structure.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/execution-configuration.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/file-assets.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/identifiers.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/install.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/notebooks.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/overview.md +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/mkdocs.yml +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/release.sh +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/setup.cfg +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/base.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/constants.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/definitions.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/enums.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/ermrest.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/exceptions.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/filespec.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/aux_classes.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/dataset.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/dataset_bag.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/history.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/upload.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/demo_catalog.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/environment.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/execution.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/execution_configuration.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/model/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/model/catalog.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/model/database.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/model/sql_mapper.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/annotations.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/check_schema.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/create_schema.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/policy.json +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/table_comments_utils.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/requires.txt +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/top_level.txt +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/core/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/core/test_basic_tables.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/core/test_file.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/core/test_vocabulary.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/demo-catalog-schema.json +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/deriva-ml-reference.json +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/eye-ai-catalog-schema.json +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/test_dataset_export.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/test_dataset_version.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/test_datasets.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/test_download.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/execution/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/model/__init__.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/model/test_database.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/model/test_models.py +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/test-files/execution-parameters.json +0 -0
- {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/test-files/notebook-parameters.json +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.14.
|
|
3
|
+
Version: 1.14.35
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
|
|
|
25
25
|
using a deriva catalog.
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
GitHUB CLI be installed.
|
|
30
|
+
To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
|
|
32
31
|
|
|
33
|
-
See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
|
|
34
32
|
|
|
33
|
+
## References
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# DerivaML
|
|
2
|
+
Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
|
|
3
|
+
using a deriva catalog.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
|
|
7
|
+
|
|
8
|
+
To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
## References
|
|
@@ -27,6 +27,8 @@ deriva-ml-table-comments-utils = "deriva_ml.schema_setup.table_comments_utils:ma
|
|
|
27
27
|
deriva-ml-create-schema = "deriva_ml.schema_setup.create_schema:main"
|
|
28
28
|
deriva-ml-alter-annotation = "deriva_ml.schema_setup.alter_annotation:main"
|
|
29
29
|
deriva-ml-run-notebook = "deriva_ml.run_notebook:main"
|
|
30
|
+
deriva-ml-install-kernel = "deriva_ml.install_kernel:main"
|
|
31
|
+
|
|
30
32
|
deriva-ml-check-catalog-schema = "deriva_ml.schema.check_schema:main"
|
|
31
33
|
|
|
32
34
|
[project.optional-dependencies]
|
|
@@ -64,6 +66,9 @@ setup_hooks = []
|
|
|
64
66
|
pre_commit_hooks = []
|
|
65
67
|
post_commit_hooks = []
|
|
66
68
|
|
|
69
|
+
[tool.pytest]
|
|
70
|
+
mock_use_standalone_module = true
|
|
71
|
+
|
|
67
72
|
[tool.pytest.ini_options]
|
|
68
73
|
testpaths = ["tests"]
|
|
69
74
|
python_files = ["test_*.py"]
|
|
@@ -74,8 +79,8 @@ addopts = "-v --import-mode=importlib"
|
|
|
74
79
|
[tool.ruff]
|
|
75
80
|
line-length = 120
|
|
76
81
|
target-version = "py310"
|
|
77
|
-
select = ["E", "F", "I", "PTH"]
|
|
78
|
-
ignore = []
|
|
82
|
+
lint.select = ["E", "F", "I", "PTH"]
|
|
83
|
+
lint.ignore = []
|
|
79
84
|
|
|
80
85
|
[tool.ruff.format]
|
|
81
86
|
# Like Black, use double quotes for strings.
|
|
@@ -97,6 +102,7 @@ dev = [
|
|
|
97
102
|
"mkdocstrings[python]",
|
|
98
103
|
"mkdocs-material",
|
|
99
104
|
"pytest>=8.4.1",
|
|
105
|
+
"pytest-mock",
|
|
100
106
|
"pytest-coverage>=0.0",
|
|
101
107
|
"ruff"
|
|
102
108
|
]
|
|
@@ -100,9 +100,6 @@ class Workflow(BaseModel):
|
|
|
100
100
|
- DERIVA_ML_WORKFLOW_CHECKSUM: Override the computed checksum
|
|
101
101
|
|
|
102
102
|
Args:
|
|
103
|
-
name: Human-readable name for the workflow.
|
|
104
|
-
workflow_type: Type of workflow (must be a vocabulary term).
|
|
105
|
-
description: Optional description of workflow purpose.
|
|
106
103
|
|
|
107
104
|
Returns:
|
|
108
105
|
Workflow: New workflow instance with detected Git information.
|
|
@@ -240,6 +237,7 @@ class Workflow(BaseModel):
|
|
|
240
237
|
"""
|
|
241
238
|
|
|
242
239
|
server, session = Workflow._get_notebook_session()
|
|
240
|
+
|
|
243
241
|
if server and session:
|
|
244
242
|
relative_path = session["notebook"]["path"]
|
|
245
243
|
# Join the notebook directory with the relative path
|
|
@@ -321,8 +319,8 @@ class Workflow(BaseModel):
|
|
|
321
319
|
# Being called from the command line interpreter.
|
|
322
320
|
filename = Path.cwd() / Path("REPL")
|
|
323
321
|
# Get the caller's filename, which is two up the stack from here.
|
|
324
|
-
|
|
325
|
-
|
|
322
|
+
else:
|
|
323
|
+
raise DerivaMLException("Looking for caller failed") # Stack is too shallow
|
|
326
324
|
return filename, is_notebook
|
|
327
325
|
|
|
328
326
|
@staticmethod
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# your_pkg/install_kernel.py
|
|
2
|
+
import sys
|
|
3
|
+
import re
|
|
4
|
+
from importlib import metadata
|
|
5
|
+
from ipykernel.kernelspec import install as install_kernel
|
|
6
|
+
|
|
7
|
+
def _dist_name_for_this_package() -> str:
|
|
8
|
+
"""
|
|
9
|
+
Try to resolve the distribution name that provides this package.
|
|
10
|
+
Works in editable installs and wheels.
|
|
11
|
+
"""
|
|
12
|
+
# Top-level package name of this module (your_pkg)
|
|
13
|
+
top_pkg = __name__.split(".")[0]
|
|
14
|
+
|
|
15
|
+
# Map top-level packages -> distributions
|
|
16
|
+
pkg_to_dists = metadata.packages_distributions()
|
|
17
|
+
dists = pkg_to_dists.get(top_pkg) or []
|
|
18
|
+
|
|
19
|
+
# Fall back to project name in METADATA when mapping isn't available
|
|
20
|
+
dist_name = dists[0] if dists else metadata.metadata(top_pkg).get("Name", top_pkg)
|
|
21
|
+
return dist_name
|
|
22
|
+
|
|
23
|
+
def _normalize_kernel_name(name: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Jupyter kernel directory names should be simple: lowercase, [-a-z0-9_].
|
|
26
|
+
"""
|
|
27
|
+
name = name.strip().lower()
|
|
28
|
+
name = re.sub(r"[^a-z0-9._-]+", "-", name)
|
|
29
|
+
return name
|
|
30
|
+
|
|
31
|
+
def main() -> None:
|
|
32
|
+
dist_name = _dist_name_for_this_package() # e.g., "deriva-model-template"
|
|
33
|
+
kernel_name = _normalize_kernel_name(dist_name) # e.g., "deriva-model-template"
|
|
34
|
+
display_name = f"Python ({dist_name})"
|
|
35
|
+
|
|
36
|
+
# Install into the current environment's prefix (e.g., .venv/share/jupyter/kernels/..)
|
|
37
|
+
install_kernel(
|
|
38
|
+
user=False, # write under sys.prefix (the active env)
|
|
39
|
+
kernel_name=kernel_name,
|
|
40
|
+
display_name=display_name,
|
|
41
|
+
prefix=sys.prefix,
|
|
42
|
+
)
|
|
43
|
+
print(f"Installed Jupyter kernel '{kernel_name}' with display name '{display_name}' under {sys.prefix!s}")
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
main()
|
|
@@ -95,7 +95,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
95
95
|
exit(1)
|
|
96
96
|
|
|
97
97
|
os.environ["DERIVA_HOST"] = args.host
|
|
98
|
-
os.environ["
|
|
98
|
+
os.environ["DERIVA_CATALOG"] = args.catalog
|
|
99
99
|
|
|
100
100
|
# Create a workflow instance for this specific version of the script.
|
|
101
101
|
# Return an existing workflow if one is found.
|
|
@@ -106,14 +106,14 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
106
106
|
return
|
|
107
107
|
else:
|
|
108
108
|
notebook_parameters = (
|
|
109
|
-
{
|
|
110
|
-
| {
|
|
109
|
+
{k: v["default"] for k, v in notebook_parameters.items()}
|
|
110
|
+
| {"host": args.host, "hostname": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
|
|
111
111
|
| parameters
|
|
112
112
|
)
|
|
113
113
|
print(f"Running notebook {notebook_file.name} with parameters:")
|
|
114
114
|
for param, value in notebook_parameters.items():
|
|
115
115
|
print(f" {param}:{value}")
|
|
116
|
-
self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
|
|
116
|
+
self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel[0], log=args.log_output)
|
|
117
117
|
|
|
118
118
|
def run_notebook(self, notebook_file, parameters, kernel=None, log=False):
|
|
119
119
|
url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.14.
|
|
3
|
+
Version: 1.14.35
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
|
|
|
25
25
|
using a deriva catalog.
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
GitHUB CLI be installed.
|
|
30
|
+
To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
|
|
32
31
|
|
|
33
|
-
See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
|
|
34
32
|
|
|
33
|
+
## References
|
|
@@ -2,5 +2,6 @@
|
|
|
2
2
|
deriva-ml-alter-annotation = deriva_ml.schema_setup.alter_annotation:main
|
|
3
3
|
deriva-ml-check-catalog-schema = deriva_ml.schema.check_schema:main
|
|
4
4
|
deriva-ml-create-schema = deriva_ml.schema_setup.create_schema:main
|
|
5
|
+
deriva-ml-install-kernel = deriva_ml.install_kernel:main
|
|
5
6
|
deriva-ml-run-notebook = deriva_ml.run_notebook:main
|
|
6
7
|
deriva-ml-table-comments-utils = deriva_ml.schema_setup.table_comments_utils:main
|
|
@@ -5,7 +5,7 @@ Pytest configuration and shared fixtures.
|
|
|
5
5
|
import os
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
8
|
-
from test_utils import MLCatalog, MLDatasetCatalog
|
|
8
|
+
from test_utils import MLCatalog, MLDatasetCatalog, create_jupyter_kernel, destroy_jupyter_kernel
|
|
9
9
|
|
|
10
10
|
from deriva_ml import DerivaML
|
|
11
11
|
from deriva_ml.demo_catalog import (
|
|
@@ -61,6 +61,16 @@ def dataset_test(catalog_with_datasets):
|
|
|
61
61
|
return catalog_with_datasets
|
|
62
62
|
|
|
63
63
|
|
|
64
|
+
@pytest.fixture(scope="function")
|
|
65
|
+
def notebook_test(deriva_catalog, tmp_path):
|
|
66
|
+
deriva_catalog.reset_demo_catalog()
|
|
67
|
+
create_jupyter_kernel("test_kernel", tmp_path)
|
|
68
|
+
yield DerivaML(deriva_catalog.hostname, deriva_catalog.catalog_id, use_minid=False, working_dir=tmp_path)
|
|
69
|
+
print("Resetting catalog... ", end="")
|
|
70
|
+
deriva_catalog.reset_demo_catalog()
|
|
71
|
+
destroy_jupyter_kernel("test_kernel")
|
|
72
|
+
|
|
73
|
+
|
|
64
74
|
@pytest.fixture(scope="function")
|
|
65
75
|
def test_ml_demo_catalog(ml_catalog, tmp_path):
|
|
66
76
|
# reset_demo_catalog(ml_catalog.catalog)
|
|
@@ -3,6 +3,7 @@ Tests for the execution module.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import subprocess
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from tempfile import TemporaryDirectory
|
|
7
8
|
|
|
8
9
|
from deriva_ml import (
|
|
@@ -23,20 +24,23 @@ class TestWorkflow:
|
|
|
23
24
|
ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
|
|
24
25
|
ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
|
|
25
26
|
print("Running workflow-test.py ...")
|
|
27
|
+
workflow_script = Path(__file__).parent / "workflow-test.py"
|
|
28
|
+
|
|
26
29
|
workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
|
|
27
30
|
workflows = list(workflow_table.entities().fetch())
|
|
28
31
|
assert 0 == len(workflows)
|
|
29
32
|
result = subprocess.run(
|
|
30
33
|
[
|
|
31
34
|
"python",
|
|
32
|
-
|
|
35
|
+
workflow_script.as_posix(),
|
|
33
36
|
ml_instance.catalog.deriva_server.server,
|
|
34
37
|
ml_instance.catalog_id,
|
|
35
38
|
],
|
|
36
39
|
capture_output=True,
|
|
37
40
|
text=True,
|
|
38
41
|
)
|
|
39
|
-
|
|
42
|
+
print(result.stdout)
|
|
43
|
+
print(result.stderr)
|
|
40
44
|
workflows = list(workflow_table.entities().fetch())
|
|
41
45
|
assert 1 == len(workflows)
|
|
42
46
|
workflow_rid = workflows[0]["RID"]
|
|
@@ -50,18 +54,22 @@ class TestWorkflow:
|
|
|
50
54
|
result = subprocess.run(
|
|
51
55
|
[
|
|
52
56
|
"python",
|
|
53
|
-
|
|
57
|
+
workflow_script.as_posix(),
|
|
54
58
|
ml_instance.catalog.deriva_server.server,
|
|
55
59
|
ml_instance.catalog_id,
|
|
56
60
|
],
|
|
57
61
|
capture_output=True,
|
|
58
62
|
text=True,
|
|
59
63
|
)
|
|
64
|
+
print(result.stdout)
|
|
65
|
+
print(result.stderr)
|
|
60
66
|
new_workflow = result.stdout.strip()
|
|
61
67
|
assert new_workflow == workflow_rid
|
|
62
68
|
|
|
63
|
-
def test_workflow_creation_notebook(self,
|
|
64
|
-
ml_instance =
|
|
69
|
+
def test_workflow_creation_notebook(self, notebook_test):
|
|
70
|
+
ml_instance = notebook_test
|
|
71
|
+
|
|
72
|
+
notebook_path = Path(__file__).parent / "workflow-test.ipynb" # directory where this test lives
|
|
65
73
|
ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
|
|
66
74
|
ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
|
|
67
75
|
workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
|
|
@@ -69,14 +77,17 @@ class TestWorkflow:
|
|
|
69
77
|
assert 0 == len(workflows)
|
|
70
78
|
|
|
71
79
|
print("Running notebook...")
|
|
80
|
+
|
|
72
81
|
result = subprocess.run(
|
|
73
82
|
[
|
|
74
83
|
"deriva-ml-run-notebook",
|
|
75
|
-
|
|
84
|
+
notebook_path.as_posix(),
|
|
76
85
|
"--host",
|
|
77
86
|
ml_instance.catalog.deriva_server.server,
|
|
78
87
|
"--catalog",
|
|
79
88
|
ml_instance.catalog_id,
|
|
89
|
+
"--kernel",
|
|
90
|
+
"test_kernel",
|
|
80
91
|
"--log-output",
|
|
81
92
|
],
|
|
82
93
|
capture_output=True,
|
|
@@ -18,9 +18,7 @@
|
|
|
18
18
|
"outputs": [],
|
|
19
19
|
"source": [
|
|
20
20
|
"from deriva_ml import DerivaML, MLVocab as vc\n",
|
|
21
|
-
"import os
|
|
22
|
-
"import logging\n",
|
|
23
|
-
"logger = logging.getLogger()"
|
|
21
|
+
"import os"
|
|
24
22
|
]
|
|
25
23
|
},
|
|
26
24
|
{
|
|
@@ -42,8 +40,8 @@
|
|
|
42
40
|
},
|
|
43
41
|
"outputs": [],
|
|
44
42
|
"source": [
|
|
45
|
-
"
|
|
46
|
-
"
|
|
43
|
+
"host = None\n",
|
|
44
|
+
"catalog = None"
|
|
47
45
|
]
|
|
48
46
|
},
|
|
49
47
|
{
|
|
@@ -54,24 +52,21 @@
|
|
|
54
52
|
"outputs": [],
|
|
55
53
|
"source": [
|
|
56
54
|
"# Modify these to your desired server and catalog.\n",
|
|
57
|
-
"
|
|
58
|
-
"
|
|
55
|
+
"host = host or os.environ.get(\"DERIVA_HOST\")\n",
|
|
56
|
+
"catalog = catalog or os.environ.get(\"DERIVA_CATALOG\")\n",
|
|
59
57
|
"\n",
|
|
60
58
|
"# Change this line to call the domain specific class derived from DerivaML\n",
|
|
61
|
-
"ml_instance = DerivaML(
|
|
62
|
-
"logger.info(\"Got ML instance:\")\n",
|
|
59
|
+
"ml_instance = DerivaML(host, catalog)\n",
|
|
63
60
|
"\n",
|
|
64
61
|
"ml_instance.add_term(vc.asset_type, \"Test Model\", description=\"Model for our Test workflow\")\n",
|
|
65
62
|
"ml_instance.add_term(vc.workflow_type, \"Test Workflow\", description=\"A ML Workflow that uses Deriva ML API\")\n",
|
|
66
|
-
"
|
|
63
|
+
"\n",
|
|
67
64
|
"api_workflow = ml_instance.create_workflow(\n",
|
|
68
|
-
"
|
|
69
|
-
"
|
|
70
|
-
"
|
|
71
|
-
")\n",
|
|
72
|
-
"
|
|
73
|
-
"rid = ml_instance.add_workflow(api_workflow)\n",
|
|
74
|
-
"logger.info(f\"RID {rid}\")\n"
|
|
65
|
+
" name=\"Test Workflow One\",\n",
|
|
66
|
+
" workflow_type=\"Test Workflow\",\n",
|
|
67
|
+
" description=\"A test operation\",\n",
|
|
68
|
+
" )\n",
|
|
69
|
+
"rid = ml_instance.add_workflow(api_workflow)"
|
|
75
70
|
]
|
|
76
71
|
}
|
|
77
72
|
],
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
Tests for feature functionality.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from unittest.mock import Mock
|
|
6
|
-
|
|
7
5
|
import pytest
|
|
6
|
+
from pydantic import ValidationError
|
|
8
7
|
|
|
9
8
|
from deriva_ml import (
|
|
10
9
|
BuiltinTypes,
|
|
@@ -21,14 +20,14 @@ from deriva_ml.feature import FeatureRecord
|
|
|
21
20
|
class TestFeatureRecord:
|
|
22
21
|
"""Test cases for the FeatureRecord base class."""
|
|
23
22
|
|
|
24
|
-
def test_feature_record_creation(self):
|
|
23
|
+
def test_feature_record_creation(self, mocker):
|
|
25
24
|
"""Test basic FeatureRecord creation."""
|
|
26
25
|
# Create a mock feature
|
|
27
|
-
mock_feature = Mock()
|
|
28
|
-
mock_feature.feature_columns = {Mock(name="value"), Mock(name="confidence")}
|
|
29
|
-
mock_feature.asset_columns = {Mock(name="image_file")}
|
|
30
|
-
mock_feature.term_columns = {Mock(name="category")}
|
|
31
|
-
mock_feature.value_columns = {Mock(name="score")}
|
|
26
|
+
mock_feature = mocker.Mock()
|
|
27
|
+
mock_feature.feature_columns = {mocker.Mock(name="value"), mocker.Mock(name="confidence")}
|
|
28
|
+
mock_feature.asset_columns = {mocker.Mock(name="image_file")}
|
|
29
|
+
mock_feature.term_columns = {mocker.Mock(name="category")}
|
|
30
|
+
mock_feature.value_columns = {mocker.Mock(name="score")}
|
|
32
31
|
|
|
33
32
|
# Create a test class that inherits from FeatureRecord
|
|
34
33
|
class TestFeature(FeatureRecord):
|
|
@@ -58,17 +57,17 @@ class TestFeatureRecord:
|
|
|
58
57
|
assert record.category == "good"
|
|
59
58
|
assert record.score == 0.8
|
|
60
59
|
|
|
61
|
-
def test_feature_record_column_methods(self):
|
|
60
|
+
def test_feature_record_column_methods(self, mocker):
|
|
62
61
|
"""Test the column access methods of FeatureRecord."""
|
|
63
62
|
# Create mock columns
|
|
64
|
-
value_col = Mock(name="value")
|
|
65
|
-
confidence_col = Mock(name="confidence")
|
|
66
|
-
asset_col = Mock(name="image_file")
|
|
67
|
-
term_col = Mock(name="category")
|
|
68
|
-
value_only_col = Mock(name="score")
|
|
63
|
+
value_col = mocker.Mock(name="value")
|
|
64
|
+
confidence_col = mocker.Mock(name="confidence")
|
|
65
|
+
asset_col = mocker.Mock(name="image_file")
|
|
66
|
+
term_col = mocker.Mock(name="category")
|
|
67
|
+
value_only_col = mocker.Mock(name="score")
|
|
69
68
|
|
|
70
69
|
# Create a mock feature
|
|
71
|
-
mock_feature = Mock()
|
|
70
|
+
mock_feature = mocker.Mock()
|
|
72
71
|
mock_feature.feature_columns = {value_col, confidence_col, asset_col, term_col, value_only_col}
|
|
73
72
|
mock_feature.asset_columns = {asset_col}
|
|
74
73
|
mock_feature.term_columns = {term_col}
|
|
@@ -141,6 +140,22 @@ class TestFeatures:
|
|
|
141
140
|
with pytest.raises(DerivaMLException):
|
|
142
141
|
ml_instance.lookup_feature("Subject", "SubjectHealth1")
|
|
143
142
|
|
|
143
|
+
def test_feature_record(self, dataset_test, tmp_path):
|
|
144
|
+
ml_instance = DerivaML(
|
|
145
|
+
dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
|
|
146
|
+
)
|
|
147
|
+
SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
|
|
148
|
+
print(SubjectHealthFeature.model_fields.keys())
|
|
149
|
+
|
|
150
|
+
print(SubjectHealthFeature.feature_columns())
|
|
151
|
+
|
|
152
|
+
with pytest.raises(ValidationError):
|
|
153
|
+
SubjectHealthFeature(Subject="SubjectRID", Health="Good", Scale=23, Foo="Bar")
|
|
154
|
+
print(SubjectHealthFeature.value_columns())
|
|
155
|
+
print(SubjectHealthFeature.term_columns())
|
|
156
|
+
print(SubjectHealthFeature.asset_columns())
|
|
157
|
+
print(SubjectHealthFeature.feature_columns())
|
|
158
|
+
|
|
144
159
|
def test_add_feature(self, dataset_test, tmp_path):
|
|
145
160
|
ml_instance = DerivaML(
|
|
146
161
|
dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
|
|
@@ -166,7 +181,8 @@ class TestFeatures:
|
|
|
166
181
|
|
|
167
182
|
with feature_execution.execute() as exe:
|
|
168
183
|
SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
|
|
169
|
-
|
|
184
|
+
print(SubjectHealthFeature.feature_columns())
|
|
185
|
+
exe.add_features([SubjectHealthFeature(Subject=subject_rids[0], SubjectHealth="Sick", Scale=23)])
|
|
170
186
|
|
|
171
187
|
feature_execution.upload_execution_outputs()
|
|
172
188
|
features = list(ml_instance.list_feature_values("Subject", "Health"))
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
1
3
|
from tempfile import TemporaryDirectory
|
|
2
4
|
from urllib.parse import quote as urlquote
|
|
3
5
|
|
|
4
6
|
from demo_catalog import create_demo_features
|
|
5
7
|
from deriva.core.datapath import DataPathException
|
|
8
|
+
from ipykernel.kernelspec import install
|
|
9
|
+
from jupyter_client.kernelspec import KernelSpecManager
|
|
6
10
|
|
|
7
11
|
from deriva_ml import DerivaML
|
|
8
12
|
from deriva_ml.demo_catalog import (
|
|
@@ -42,7 +46,14 @@ class MLCatalog:
|
|
|
42
46
|
pb = self.catalog.getPathBuilder()
|
|
43
47
|
ml_path = pb.schemas["deriva-ml"]
|
|
44
48
|
domain_path = pb.schemas[self.domain_schema]
|
|
45
|
-
for t in [
|
|
49
|
+
for t in [
|
|
50
|
+
"Dataset_Execution",
|
|
51
|
+
"Dataset_Version",
|
|
52
|
+
"Dataset_Dataset",
|
|
53
|
+
"Execution",
|
|
54
|
+
"Workflow_Execution",
|
|
55
|
+
"Workflow",
|
|
56
|
+
]:
|
|
46
57
|
try:
|
|
47
58
|
ml_path.tables[t].path.delete()
|
|
48
59
|
except DataPathException:
|
|
@@ -110,3 +121,60 @@ class MLDatasetCatalog:
|
|
|
110
121
|
with TemporaryDirectory() as tmp_dir:
|
|
111
122
|
ml_instance = DerivaML(self.catalog.hostname, self.catalog.catalog_id, working_dir=tmp_dir, use_minid=False)
|
|
112
123
|
self.dataset_description: DatasetDescription = create_demo_datasets(ml_instance)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def create_jupyter_kernel(name: str, kernel_dir, display_name: str = None, user: bool = True) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Create and install a Jupyter kernel spec using ipykernel.
|
|
129
|
+
|
|
130
|
+
Parameters
|
|
131
|
+
----------
|
|
132
|
+
name : str
|
|
133
|
+
The internal name of the kernel (used in `--kernel`).
|
|
134
|
+
display_name : str, optional
|
|
135
|
+
The label shown in Jupyter’s kernel chooser (defaults to name).
|
|
136
|
+
user : bool, default=True
|
|
137
|
+
If True, install for the current user only.
|
|
138
|
+
If False, requires admin rights (system-wide).
|
|
139
|
+
"""
|
|
140
|
+
if display_name is None:
|
|
141
|
+
display_name = name
|
|
142
|
+
|
|
143
|
+
os.environ["JUPYTER_PATH"] = f"{kernel_dir}/share/jupyter"
|
|
144
|
+
|
|
145
|
+
print(f"Installing Jupyter kernel '{name}' with display name '{display_name}'")
|
|
146
|
+
install(
|
|
147
|
+
kernel_name=name,
|
|
148
|
+
display_name=display_name,
|
|
149
|
+
prefix=kernel_dir, # ensures it uses the current environment
|
|
150
|
+
)
|
|
151
|
+
print("✅ Kernel installed successfully.")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def destroy_jupyter_kernel(name: str, user: bool = True) -> None:
|
|
155
|
+
"""
|
|
156
|
+
Remove a Jupyter kernel spec by name.
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
name : str
|
|
161
|
+
The internal kernel name (the same name used in create_jupyter_kernel).
|
|
162
|
+
user : bool, default=True
|
|
163
|
+
If True, remove from the user-level kernels directory.
|
|
164
|
+
If False, attempt system-wide removal (requires permissions).
|
|
165
|
+
"""
|
|
166
|
+
ksm = KernelSpecManager()
|
|
167
|
+
kernels = ksm.find_kernel_specs()
|
|
168
|
+
|
|
169
|
+
if name not in kernels:
|
|
170
|
+
print(f"❌ Kernel '{name}' not found.")
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
kernel_path = kernels[name]
|
|
174
|
+
print(f"Removing kernel '{name}' at {kernel_path}")
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
shutil.rmtree(kernel_path)
|
|
178
|
+
print(f"✅ Kernel '{name}' removed successfully.")
|
|
179
|
+
except Exception as e:
|
|
180
|
+
print(f"⚠️ Failed to remove kernel '{name}': {e}")
|